Branch data Line data Source code
1 : : /* 2 : : * arch/arm/kernel/topology.c 3 : : * 4 : : * Copyright (C) 2011 Linaro Limited. 5 : : * Written by: Vincent Guittot 6 : : * 7 : : * based on arch/sh/kernel/topology.c 8 : : * 9 : : * This file is subject to the terms and conditions of the GNU General Public 10 : : * License. See the file "COPYING" in the main directory of this archive 11 : : * for more details. 12 : : */ 13 : : 14 : : #include <linux/arch_topology.h> 15 : : #include <linux/cpu.h> 16 : : #include <linux/cpufreq.h> 17 : : #include <linux/cpumask.h> 18 : : #include <linux/export.h> 19 : : #include <linux/init.h> 20 : : #include <linux/percpu.h> 21 : : #include <linux/node.h> 22 : : #include <linux/nodemask.h> 23 : : #include <linux/of.h> 24 : : #include <linux/sched.h> 25 : : #include <linux/sched/topology.h> 26 : : #include <linux/slab.h> 27 : : #include <linux/string.h> 28 : : 29 : : #include <asm/cpu.h> 30 : : #include <asm/cputype.h> 31 : : #include <asm/topology.h> 32 : : 33 : : /* 34 : : * cpu capacity scale management 35 : : */ 36 : : 37 : : /* 38 : : * cpu capacity table 39 : : * This per cpu data structure describes the relative capacity of each core. 40 : : * On a heteregenous system, cores don't have the same computation capacity 41 : : * and we reflect that difference in the cpu_capacity field so the scheduler 42 : : * can take this difference into account during load balance. A per cpu 43 : : * structure is preferred because each CPU updates its own cpu_capacity field 44 : : * during the load balance except for idle cores. One idle core is selected 45 : : * to run the rebalance_domains for all idle cores and the cpu_capacity can be 46 : : * updated during this sequence. 47 : : */ 48 : : 49 : : #ifdef CONFIG_OF 50 : : struct cpu_efficiency { 51 : : const char *compatible; 52 : : unsigned long efficiency; 53 : : }; 54 : : 55 : : /* 56 : : * Table of relative efficiency of each processors 57 : : * The efficiency value must fit in 20bit and the final 58 : : * cpu_scale value must be in the range 59 : : * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 60 : : * in order to return at most 1 when DIV_ROUND_CLOSEST 61 : : * is used to compute the capacity of a CPU. 62 : : * Processors that are not defined in the table, 63 : : * use the default SCHED_CAPACITY_SCALE value for cpu_scale. 64 : : */ 65 : : static const struct cpu_efficiency table_efficiency[] = { 66 : : {"arm,cortex-a15", 3891}, 67 : : {"arm,cortex-a7", 2048}, 68 : : {NULL, }, 69 : : }; 70 : : 71 : : static unsigned long *__cpu_capacity; 72 : : #define cpu_capacity(cpu) __cpu_capacity[cpu] 73 : : 74 : : static unsigned long middle_capacity = 1; 75 : : static bool cap_from_dt = true; 76 : : 77 : : /* 78 : : * Iterate all CPUs' descriptor in DT and compute the efficiency 79 : : * (as per table_efficiency). Also calculate a middle efficiency 80 : : * as close as possible to (max{eff_i} - min{eff_i}) / 2 81 : : * This is later used to scale the cpu_capacity field such that an 82 : : * 'average' CPU is of middle capacity. Also see the comments near 83 : : * table_efficiency[] and update_cpu_capacity(). 84 : : */ 85 : 3 : static void __init parse_dt_topology(void) 86 : : { 87 : : const struct cpu_efficiency *cpu_eff; 88 : : struct device_node *cn = NULL; 89 : : unsigned long min_capacity = ULONG_MAX; 90 : : unsigned long max_capacity = 0; 91 : : unsigned long capacity = 0; 92 : : int cpu = 0; 93 : : 94 : 3 : __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), 95 : : GFP_NOWAIT); 96 : : 97 : 3 : for_each_possible_cpu(cpu) { 98 : : const u32 *rate; 99 : : int len; 100 : : 101 : : /* too early to use cpu->of_node */ 102 : 3 : cn = of_get_cpu_node(cpu, NULL); 103 : 3 : if (!cn) { 104 : 0 : pr_err("missing device node for CPU %d\n", cpu); 105 : 0 : continue; 106 : : } 107 : : 108 : 3 : if (topology_parse_cpu_capacity(cn, cpu)) { 109 : 0 : of_node_put(cn); 110 : 0 : continue; 111 : : } 112 : : 113 : 3 : cap_from_dt = false; 114 : : 115 : 3 : for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) 116 : 3 : if (of_device_is_compatible(cn, cpu_eff->compatible)) 117 : : break; 118 : : 119 : 3 : if (cpu_eff->compatible == NULL) 120 : 0 : continue; 121 : : 122 : 3 : rate = of_get_property(cn, "clock-frequency", &len); 123 : 3 : if (!rate || len != 4) { 124 : 0 : pr_err("%pOF missing clock-frequency property\n", cn); 125 : 0 : continue; 126 : : } 127 : : 128 : 3 : capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; 129 : : 130 : : /* Save min capacity of the system */ 131 : 3 : if (capacity < min_capacity) 132 : : min_capacity = capacity; 133 : : 134 : : /* Save max capacity of the system */ 135 : 3 : if (capacity > max_capacity) 136 : : max_capacity = capacity; 137 : : 138 : 3 : cpu_capacity(cpu) = capacity; 139 : : } 140 : : 141 : : /* If min and max capacities are equals, we bypass the update of the 142 : : * cpu_scale because all CPUs have the same capacity. Otherwise, we 143 : : * compute a middle_capacity factor that will ensure that the capacity 144 : : * of an 'average' CPU of the system will be as close as possible to 145 : : * SCHED_CAPACITY_SCALE, which is the default value, but with the 146 : : * constraint explained near table_efficiency[]. 147 : : */ 148 : 3 : if (4*max_capacity < (3*(max_capacity + min_capacity))) 149 : 3 : middle_capacity = (min_capacity + max_capacity) 150 : 3 : >> (SCHED_CAPACITY_SHIFT+1); 151 : : else 152 : 0 : middle_capacity = ((max_capacity / 3) 153 : 0 : >> (SCHED_CAPACITY_SHIFT-1)) + 1; 154 : : 155 : 3 : if (cap_from_dt) 156 : 0 : topology_normalize_cpu_scale(); 157 : 3 : } 158 : : 159 : : /* 160 : : * Look for a customed capacity of a CPU in the cpu_capacity table during the 161 : : * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the 162 : : * function returns directly for SMP system. 163 : : */ 164 : 3 : static void update_cpu_capacity(unsigned int cpu) 165 : : { 166 : 3 : if (!cpu_capacity(cpu) || cap_from_dt) 167 : 3 : return; 168 : : 169 : 3 : topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity); 170 : : 171 : 3 : pr_info("CPU%u: update cpu_capacity %lu\n", 172 : : cpu, topology_get_cpu_scale(cpu)); 173 : : } 174 : : 175 : : #else 176 : : static inline void parse_dt_topology(void) {} 177 : : static inline void update_cpu_capacity(unsigned int cpuid) {} 178 : : #endif 179 : : 180 : : /* 181 : : * The current assumption is that we can power gate each core independently. 182 : : * This will be superseded by DT binding once available. 183 : : */ 184 : 0 : const struct cpumask *cpu_corepower_mask(int cpu) 185 : : { 186 : 0 : return &cpu_topology[cpu].thread_sibling; 187 : : } 188 : : 189 : : /* 190 : : * store_cpu_topology is called at boot when only one cpu is running 191 : : * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, 192 : : * which prevents simultaneous write access to cpu_topology array 193 : : */ 194 : 3 : void store_cpu_topology(unsigned int cpuid) 195 : : { 196 : : struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; 197 : : unsigned int mpidr; 198 : : 199 : 3 : if (cpuid_topo->package_id != -1) 200 : : goto topology_populated; 201 : : 202 : : mpidr = read_cpuid_mpidr(); 203 : : 204 : : /* create cpu topology mapping */ 205 : 3 : if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) { 206 : : /* 207 : : * This is a multiprocessor system 208 : : * multiprocessor format & multiprocessor mode field are set 209 : : */ 210 : : 211 : 3 : if (mpidr & MPIDR_MT_BITMASK) { 212 : : /* core performance interdependency */ 213 : 0 : cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); 214 : 0 : cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); 215 : 0 : cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); 216 : : } else { 217 : : /* largely independent cores */ 218 : 3 : cpuid_topo->thread_id = -1; 219 : 3 : cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); 220 : 3 : cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); 221 : : } 222 : : } else { 223 : : /* 224 : : * This is an uniprocessor system 225 : : * we are in multiprocessor format but uniprocessor system 226 : : * or in the old uniprocessor format 227 : : */ 228 : 0 : cpuid_topo->thread_id = -1; 229 : 0 : cpuid_topo->core_id = 0; 230 : 0 : cpuid_topo->package_id = -1; 231 : : } 232 : : 233 : 3 : update_cpu_capacity(cpuid); 234 : : 235 : 3 : pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", 236 : : cpuid, cpu_topology[cpuid].thread_id, 237 : : cpu_topology[cpuid].core_id, 238 : : cpu_topology[cpuid].package_id, mpidr); 239 : : 240 : : topology_populated: 241 : 3 : update_siblings_masks(cpuid); 242 : 3 : } 243 : : 244 : : static inline int cpu_corepower_flags(void) 245 : : { 246 : : return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; 247 : : } 248 : : 249 : : static struct sched_domain_topology_level arm_topology[] = { 250 : : #ifdef CONFIG_SCHED_MC 251 : : { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, 252 : : { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 253 : : #endif 254 : : { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 255 : : { NULL, }, 256 : : }; 257 : : 258 : : /* 259 : : * init_cpu_topology is called at boot when only one cpu is running 260 : : * which prevent simultaneous write access to cpu_topology array 261 : : */ 262 : 3 : void __init init_cpu_topology(void) 263 : : { 264 : 3 : reset_cpu_topology(); 265 : 3 : smp_wmb(); 266 : : 267 : 3 : parse_dt_topology(); 268 : : 269 : : /* Set scheduler topology descriptor */ 270 : 3 : set_sched_topology(arm_topology); 271 : 3 : }