|  | /* | 
|  | * arch/arm/kernel/topology.c | 
|  | * | 
|  | * Copyright (C) 2011 Linaro Limited. | 
|  | * Written by: Vincent Guittot | 
|  | * | 
|  | * based on arch/sh/kernel/topology.c | 
|  | * | 
|  | * This file is subject to the terms and conditions of the GNU General Public | 
|  | * License.  See the file "COPYING" in the main directory of this archive | 
|  | * for more details. | 
|  | */ | 
|  |  | 
|  | #include <linux/arch_topology.h> | 
|  | #include <linux/cpu.h> | 
|  | #include <linux/cpufreq.h> | 
|  | #include <linux/cpumask.h> | 
|  | #include <linux/export.h> | 
|  | #include <linux/init.h> | 
|  | #include <linux/percpu.h> | 
|  | #include <linux/node.h> | 
|  | #include <linux/nodemask.h> | 
|  | #include <linux/of.h> | 
|  | #include <linux/sched.h> | 
|  | #include <linux/sched/topology.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/string.h> | 
|  |  | 
|  | #include <asm/cpu.h> | 
|  | #include <asm/cputype.h> | 
|  | #include <asm/topology.h> | 
|  |  | 
|  | /* | 
|  | * cpu capacity scale management | 
|  | */ | 
|  |  | 
|  | /* | 
|  | * cpu capacity table | 
|  | * This per cpu data structure describes the relative capacity of each core. | 
|  | * On a heteregenous system, cores don't have the same computation capacity | 
|  | * and we reflect that difference in the cpu_capacity field so the scheduler | 
|  | * can take this difference into account during load balance. A per cpu | 
|  | * structure is preferred because each CPU updates its own cpu_capacity field | 
|  | * during the load balance except for idle cores. One idle core is selected | 
|  | * to run the sched_balance_domains for all idle cores and the cpu_capacity can be | 
|  | * updated during this sequence. | 
|  | */ | 
|  |  | 
|  | #ifdef CONFIG_OF | 
|  | struct cpu_efficiency { | 
|  | const char *compatible; | 
|  | unsigned long efficiency; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Table of relative efficiency of each processors | 
|  | * The efficiency value must fit in 20bit and the final | 
|  | * cpu_scale value must be in the range | 
|  | *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 | 
|  | * in order to return at most 1 when DIV_ROUND_CLOSEST | 
|  | * is used to compute the capacity of a CPU. | 
|  | * Processors that are not defined in the table, | 
|  | * use the default SCHED_CAPACITY_SCALE value for cpu_scale. | 
|  | */ | 
|  | static const struct cpu_efficiency table_efficiency[] = { | 
|  | {"arm,cortex-a15", 3891}, | 
|  | {"arm,cortex-a7",  2048}, | 
|  | {NULL, }, | 
|  | }; | 
|  |  | 
|  | static unsigned long *__cpu_capacity; | 
|  | #define cpu_capacity(cpu)	__cpu_capacity[cpu] | 
|  |  | 
|  | static unsigned long middle_capacity = 1; | 
|  | static bool cap_from_dt = true; | 
|  |  | 
|  | /* | 
|  | * Iterate all CPUs' descriptor in DT and compute the efficiency | 
|  | * (as per table_efficiency). Also calculate a middle efficiency | 
|  | * as close as possible to  (max{eff_i} - min{eff_i}) / 2 | 
|  | * This is later used to scale the cpu_capacity field such that an | 
|  | * 'average' CPU is of middle capacity. Also see the comments near | 
|  | * table_efficiency[] and update_cpu_capacity(). | 
|  | */ | 
|  | static void __init parse_dt_topology(void) | 
|  | { | 
|  | const struct cpu_efficiency *cpu_eff; | 
|  | struct device_node *cn = NULL; | 
|  | unsigned long min_capacity = ULONG_MAX; | 
|  | unsigned long max_capacity = 0; | 
|  | unsigned long capacity = 0; | 
|  | int cpu = 0; | 
|  |  | 
|  | __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), | 
|  | GFP_NOWAIT); | 
|  |  | 
|  | for_each_possible_cpu(cpu) { | 
|  | const __be32 *rate; | 
|  | int len; | 
|  |  | 
|  | /* too early to use cpu->of_node */ | 
|  | cn = of_get_cpu_node(cpu, NULL); | 
|  | if (!cn) { | 
|  | pr_err("missing device node for CPU %d\n", cpu); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (topology_parse_cpu_capacity(cn, cpu)) { | 
|  | of_node_put(cn); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | cap_from_dt = false; | 
|  |  | 
|  | for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) | 
|  | if (of_device_is_compatible(cn, cpu_eff->compatible)) | 
|  | break; | 
|  |  | 
|  | if (cpu_eff->compatible == NULL) | 
|  | continue; | 
|  |  | 
|  | rate = of_get_property(cn, "clock-frequency", &len); | 
|  | if (!rate || len != 4) { | 
|  | pr_err("%pOF missing clock-frequency property\n", cn); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; | 
|  |  | 
|  | /* Save min capacity of the system */ | 
|  | if (capacity < min_capacity) | 
|  | min_capacity = capacity; | 
|  |  | 
|  | /* Save max capacity of the system */ | 
|  | if (capacity > max_capacity) | 
|  | max_capacity = capacity; | 
|  |  | 
|  | cpu_capacity(cpu) = capacity; | 
|  | } | 
|  |  | 
|  | /* If min and max capacities are equals, we bypass the update of the | 
|  | * cpu_scale because all CPUs have the same capacity. Otherwise, we | 
|  | * compute a middle_capacity factor that will ensure that the capacity | 
|  | * of an 'average' CPU of the system will be as close as possible to | 
|  | * SCHED_CAPACITY_SCALE, which is the default value, but with the | 
|  | * constraint explained near table_efficiency[]. | 
|  | */ | 
|  | if (4*max_capacity < (3*(max_capacity + min_capacity))) | 
|  | middle_capacity = (min_capacity + max_capacity) | 
|  | >> (SCHED_CAPACITY_SHIFT+1); | 
|  | else | 
|  | middle_capacity = ((max_capacity / 3) | 
|  | >> (SCHED_CAPACITY_SHIFT-1)) + 1; | 
|  |  | 
|  | if (cap_from_dt) | 
|  | topology_normalize_cpu_scale(); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Look for a customed capacity of a CPU in the cpu_capacity table during the | 
|  | * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the | 
|  | * function returns directly for SMP system. | 
|  | */ | 
|  | static void update_cpu_capacity(unsigned int cpu) | 
|  | { | 
|  | if (!cpu_capacity(cpu) || cap_from_dt) | 
|  | return; | 
|  |  | 
|  | topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity); | 
|  |  | 
|  | pr_info("CPU%u: update cpu_capacity %lu\n", | 
|  | cpu, topology_get_cpu_scale(cpu)); | 
|  | } | 
|  |  | 
|  | #else | 
|  | static inline void parse_dt_topology(void) {} | 
|  | static inline void update_cpu_capacity(unsigned int cpuid) {} | 
|  | #endif | 
|  |  | 
|  | /* | 
|  | * store_cpu_topology is called at boot when only one cpu is running | 
|  | * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, | 
|  | * which prevents simultaneous write access to cpu_topology array | 
|  | */ | 
|  | void store_cpu_topology(unsigned int cpuid) | 
|  | { | 
|  | struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; | 
|  | unsigned int mpidr; | 
|  |  | 
|  | if (cpuid_topo->package_id != -1) | 
|  | goto topology_populated; | 
|  |  | 
|  | mpidr = read_cpuid_mpidr(); | 
|  |  | 
|  | /* create cpu topology mapping */ | 
|  | if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) { | 
|  | /* | 
|  | * This is a multiprocessor system | 
|  | * multiprocessor format & multiprocessor mode field are set | 
|  | */ | 
|  |  | 
|  | if (mpidr & MPIDR_MT_BITMASK) { | 
|  | /* core performance interdependency */ | 
|  | cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); | 
|  | cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); | 
|  | cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); | 
|  | } else { | 
|  | /* largely independent cores */ | 
|  | cpuid_topo->thread_id = -1; | 
|  | cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); | 
|  | cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); | 
|  | } | 
|  | } else { | 
|  | /* | 
|  | * This is an uniprocessor system | 
|  | * we are in multiprocessor format but uniprocessor system | 
|  | * or in the old uniprocessor format | 
|  | */ | 
|  | cpuid_topo->thread_id = -1; | 
|  | cpuid_topo->core_id = 0; | 
|  | cpuid_topo->package_id = -1; | 
|  | } | 
|  |  | 
|  | update_cpu_capacity(cpuid); | 
|  |  | 
|  | pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", | 
|  | cpuid, cpu_topology[cpuid].thread_id, | 
|  | cpu_topology[cpuid].core_id, | 
|  | cpu_topology[cpuid].package_id, mpidr); | 
|  |  | 
|  | topology_populated: | 
|  | update_siblings_masks(cpuid); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * init_cpu_topology is called at boot when only one cpu is running | 
|  | * which prevent simultaneous write access to cpu_topology array | 
|  | */ | 
|  | void __init init_cpu_topology(void) | 
|  | { | 
|  | reset_cpu_topology(); | 
|  | smp_wmb(); | 
|  |  | 
|  | parse_dt_topology(); | 
|  | } |