b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * PowerNV cpuidle code |
| 4 | * |
| 5 | * Copyright 2015 IBM Corp. |
| 6 | */ |
| 7 | |
| 8 | #include <linux/types.h> |
| 9 | #include <linux/mm.h> |
| 10 | #include <linux/slab.h> |
| 11 | #include <linux/of.h> |
| 12 | #include <linux/device.h> |
| 13 | #include <linux/cpu.h> |
| 14 | |
| 15 | #include <asm/asm-prototypes.h> |
| 16 | #include <asm/firmware.h> |
| 17 | #include <asm/machdep.h> |
| 18 | #include <asm/opal.h> |
| 19 | #include <asm/cputhreads.h> |
| 20 | #include <asm/cpuidle.h> |
| 21 | #include <asm/code-patching.h> |
| 22 | #include <asm/smp.h> |
| 23 | #include <asm/runlatch.h> |
| 24 | #include <asm/dbell.h> |
| 25 | |
| 26 | #include "powernv.h" |
| 27 | #include "subcore.h" |
| 28 | |
| 29 | /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ |
| 30 | #define MAX_STOP_STATE 0xF |
| 31 | |
| 32 | #define P9_STOP_SPR_MSR 2000 |
| 33 | #define P9_STOP_SPR_PSSCR 855 |
| 34 | |
| 35 | static u32 supported_cpuidle_states; |
| 36 | struct pnv_idle_states_t *pnv_idle_states; |
| 37 | int nr_pnv_idle_states; |
| 38 | |
| 39 | /* |
| 40 | * The default stop state that will be used by ppc_md.power_save |
| 41 | * function on platforms that support stop instruction. |
| 42 | */ |
| 43 | static u64 pnv_default_stop_val; |
| 44 | static u64 pnv_default_stop_mask; |
| 45 | static bool default_stop_found; |
| 46 | |
| 47 | /* |
| 48 | * First stop state levels when SPR and TB loss can occur. |
| 49 | */ |
| 50 | static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; |
| 51 | static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; |
| 52 | |
| 53 | /* |
| 54 | * psscr value and mask of the deepest stop idle state. |
| 55 | * Used when a cpu is offlined. |
| 56 | */ |
| 57 | static u64 pnv_deepest_stop_psscr_val; |
| 58 | static u64 pnv_deepest_stop_psscr_mask; |
| 59 | static u64 pnv_deepest_stop_flag; |
| 60 | static bool deepest_stop_found; |
| 61 | |
| 62 | static unsigned long power7_offline_type; |
| 63 | |
| 64 | static int pnv_save_sprs_for_deep_states(void) |
| 65 | { |
| 66 | int cpu; |
| 67 | int rc; |
| 68 | |
| 69 | /* |
| 70 | * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across |
| 71 | * all cpus at boot. Get these reg values of current cpu and use the |
| 72 | * same across all cpus. |
| 73 | */ |
| 74 | uint64_t lpcr_val = mfspr(SPRN_LPCR); |
| 75 | uint64_t hid0_val = mfspr(SPRN_HID0); |
| 76 | uint64_t hid1_val = mfspr(SPRN_HID1); |
| 77 | uint64_t hid4_val = mfspr(SPRN_HID4); |
| 78 | uint64_t hid5_val = mfspr(SPRN_HID5); |
| 79 | uint64_t hmeer_val = mfspr(SPRN_HMEER); |
| 80 | uint64_t msr_val = MSR_IDLE; |
| 81 | uint64_t psscr_val = pnv_deepest_stop_psscr_val; |
| 82 | |
| 83 | for_each_present_cpu(cpu) { |
| 84 | uint64_t pir = get_hard_smp_processor_id(cpu); |
| 85 | uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; |
| 86 | |
| 87 | rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); |
| 88 | if (rc != 0) |
| 89 | return rc; |
| 90 | |
| 91 | rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); |
| 92 | if (rc != 0) |
| 93 | return rc; |
| 94 | |
| 95 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
| 96 | rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); |
| 97 | if (rc) |
| 98 | return rc; |
| 99 | |
| 100 | rc = opal_slw_set_reg(pir, |
| 101 | P9_STOP_SPR_PSSCR, psscr_val); |
| 102 | |
| 103 | if (rc) |
| 104 | return rc; |
| 105 | } |
| 106 | |
| 107 | /* HIDs are per core registers */ |
| 108 | if (cpu_thread_in_core(cpu) == 0) { |
| 109 | |
| 110 | rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); |
| 111 | if (rc != 0) |
| 112 | return rc; |
| 113 | |
| 114 | rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); |
| 115 | if (rc != 0) |
| 116 | return rc; |
| 117 | |
| 118 | /* Only p8 needs to set extra HID regiters */ |
| 119 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) { |
| 120 | |
| 121 | rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); |
| 122 | if (rc != 0) |
| 123 | return rc; |
| 124 | |
| 125 | rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); |
| 126 | if (rc != 0) |
| 127 | return rc; |
| 128 | |
| 129 | rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); |
| 130 | if (rc != 0) |
| 131 | return rc; |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | return 0; |
| 137 | } |
| 138 | |
| 139 | u32 pnv_get_supported_cpuidle_states(void) |
| 140 | { |
| 141 | return supported_cpuidle_states; |
| 142 | } |
| 143 | EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); |
| 144 | |
| 145 | static void pnv_fastsleep_workaround_apply(void *info) |
| 146 | |
| 147 | { |
| 148 | int rc; |
| 149 | int *err = info; |
| 150 | |
| 151 | rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, |
| 152 | OPAL_CONFIG_IDLE_APPLY); |
| 153 | if (rc) |
| 154 | *err = 1; |
| 155 | } |
| 156 | |
| 157 | static bool power7_fastsleep_workaround_entry = true; |
| 158 | static bool power7_fastsleep_workaround_exit = true; |
| 159 | |
| 160 | /* |
| 161 | * Used to store fastsleep workaround state |
| 162 | * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) |
| 163 | * 1 - Workaround applied once, never undone. |
| 164 | */ |
| 165 | static u8 fastsleep_workaround_applyonce; |
| 166 | |
| 167 | static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, |
| 168 | struct device_attribute *attr, char *buf) |
| 169 | { |
| 170 | return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); |
| 171 | } |
| 172 | |
| 173 | static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, |
| 174 | struct device_attribute *attr, const char *buf, |
| 175 | size_t count) |
| 176 | { |
| 177 | cpumask_t primary_thread_mask; |
| 178 | int err; |
| 179 | u8 val; |
| 180 | |
| 181 | if (kstrtou8(buf, 0, &val) || val != 1) |
| 182 | return -EINVAL; |
| 183 | |
| 184 | if (fastsleep_workaround_applyonce == 1) |
| 185 | return count; |
| 186 | |
| 187 | /* |
| 188 | * fastsleep_workaround_applyonce = 1 implies |
| 189 | * fastsleep workaround needs to be left in 'applied' state on all |
| 190 | * the cores. Do this by- |
| 191 | * 1. Disable the 'undo' workaround in fastsleep exit path |
| 192 | * 2. Sendi IPIs to all the cores which have at least one online thread |
| 193 | * 3. Disable the 'apply' workaround in fastsleep entry path |
| 194 | * |
| 195 | * There is no need to send ipi to cores which have all threads |
| 196 | * offlined, as last thread of the core entering fastsleep or deeper |
| 197 | * state would have applied workaround. |
| 198 | */ |
| 199 | power7_fastsleep_workaround_exit = false; |
| 200 | |
| 201 | get_online_cpus(); |
| 202 | primary_thread_mask = cpu_online_cores_map(); |
| 203 | on_each_cpu_mask(&primary_thread_mask, |
| 204 | pnv_fastsleep_workaround_apply, |
| 205 | &err, 1); |
| 206 | put_online_cpus(); |
| 207 | if (err) { |
| 208 | pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); |
| 209 | goto fail; |
| 210 | } |
| 211 | |
| 212 | power7_fastsleep_workaround_entry = false; |
| 213 | |
| 214 | fastsleep_workaround_applyonce = 1; |
| 215 | |
| 216 | return count; |
| 217 | fail: |
| 218 | return -EIO; |
| 219 | } |
| 220 | |
| 221 | static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, |
| 222 | show_fastsleep_workaround_applyonce, |
| 223 | store_fastsleep_workaround_applyonce); |
| 224 | |
| 225 | static inline void atomic_start_thread_idle(void) |
| 226 | { |
| 227 | int cpu = raw_smp_processor_id(); |
| 228 | int first = cpu_first_thread_sibling(cpu); |
| 229 | int thread_nr = cpu_thread_in_core(cpu); |
| 230 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 231 | |
| 232 | clear_bit(thread_nr, state); |
| 233 | } |
| 234 | |
| 235 | static inline void atomic_stop_thread_idle(void) |
| 236 | { |
| 237 | int cpu = raw_smp_processor_id(); |
| 238 | int first = cpu_first_thread_sibling(cpu); |
| 239 | int thread_nr = cpu_thread_in_core(cpu); |
| 240 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 241 | |
| 242 | set_bit(thread_nr, state); |
| 243 | } |
| 244 | |
| 245 | static inline void atomic_lock_thread_idle(void) |
| 246 | { |
| 247 | int cpu = raw_smp_processor_id(); |
| 248 | int first = cpu_first_thread_sibling(cpu); |
| 249 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 250 | |
| 251 | while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) |
| 252 | barrier(); |
| 253 | } |
| 254 | |
| 255 | static inline void atomic_unlock_and_stop_thread_idle(void) |
| 256 | { |
| 257 | int cpu = raw_smp_processor_id(); |
| 258 | int first = cpu_first_thread_sibling(cpu); |
| 259 | unsigned long thread = 1UL << cpu_thread_in_core(cpu); |
| 260 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 261 | u64 s = READ_ONCE(*state); |
| 262 | u64 new, tmp; |
| 263 | |
| 264 | BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); |
| 265 | BUG_ON(s & thread); |
| 266 | |
| 267 | again: |
| 268 | new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; |
| 269 | tmp = cmpxchg(state, s, new); |
| 270 | if (unlikely(tmp != s)) { |
| 271 | s = tmp; |
| 272 | goto again; |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | static inline void atomic_unlock_thread_idle(void) |
| 277 | { |
| 278 | int cpu = raw_smp_processor_id(); |
| 279 | int first = cpu_first_thread_sibling(cpu); |
| 280 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 281 | |
| 282 | BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); |
| 283 | clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); |
| 284 | } |
| 285 | |
| 286 | /* P7 and P8 */ |
| 287 | struct p7_sprs { |
| 288 | /* per core */ |
| 289 | u64 tscr; |
| 290 | u64 worc; |
| 291 | |
| 292 | /* per subcore */ |
| 293 | u64 sdr1; |
| 294 | u64 rpr; |
| 295 | |
| 296 | /* per thread */ |
| 297 | u64 lpcr; |
| 298 | u64 hfscr; |
| 299 | u64 fscr; |
| 300 | u64 purr; |
| 301 | u64 spurr; |
| 302 | u64 dscr; |
| 303 | u64 wort; |
| 304 | |
| 305 | /* per thread SPRs that get lost in shallow states */ |
| 306 | u64 amr; |
| 307 | u64 iamr; |
| 308 | u64 amor; |
| 309 | u64 uamor; |
| 310 | }; |
| 311 | |
| 312 | static unsigned long power7_idle_insn(unsigned long type) |
| 313 | { |
| 314 | int cpu = raw_smp_processor_id(); |
| 315 | int first = cpu_first_thread_sibling(cpu); |
| 316 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 317 | unsigned long thread = 1UL << cpu_thread_in_core(cpu); |
| 318 | unsigned long core_thread_mask = (1UL << threads_per_core) - 1; |
| 319 | unsigned long srr1; |
| 320 | bool full_winkle; |
| 321 | struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ |
| 322 | bool sprs_saved = false; |
| 323 | int rc; |
| 324 | |
| 325 | if (unlikely(type != PNV_THREAD_NAP)) { |
| 326 | atomic_lock_thread_idle(); |
| 327 | |
| 328 | BUG_ON(!(*state & thread)); |
| 329 | *state &= ~thread; |
| 330 | |
| 331 | if (power7_fastsleep_workaround_entry) { |
| 332 | if ((*state & core_thread_mask) == 0) { |
| 333 | rc = opal_config_cpu_idle_state( |
| 334 | OPAL_CONFIG_IDLE_FASTSLEEP, |
| 335 | OPAL_CONFIG_IDLE_APPLY); |
| 336 | BUG_ON(rc); |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | if (type == PNV_THREAD_WINKLE) { |
| 341 | sprs.tscr = mfspr(SPRN_TSCR); |
| 342 | sprs.worc = mfspr(SPRN_WORC); |
| 343 | |
| 344 | sprs.sdr1 = mfspr(SPRN_SDR1); |
| 345 | sprs.rpr = mfspr(SPRN_RPR); |
| 346 | |
| 347 | sprs.lpcr = mfspr(SPRN_LPCR); |
| 348 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) { |
| 349 | sprs.hfscr = mfspr(SPRN_HFSCR); |
| 350 | sprs.fscr = mfspr(SPRN_FSCR); |
| 351 | } |
| 352 | sprs.purr = mfspr(SPRN_PURR); |
| 353 | sprs.spurr = mfspr(SPRN_SPURR); |
| 354 | sprs.dscr = mfspr(SPRN_DSCR); |
| 355 | sprs.wort = mfspr(SPRN_WORT); |
| 356 | |
| 357 | sprs_saved = true; |
| 358 | |
| 359 | /* |
| 360 | * Increment winkle counter and set all winkle bits if |
| 361 | * all threads are winkling. This allows wakeup side to |
| 362 | * distinguish between fast sleep and winkle state |
| 363 | * loss. Fast sleep still has to resync the timebase so |
| 364 | * this may not be a really big win. |
| 365 | */ |
| 366 | *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; |
| 367 | if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) |
| 368 | >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT |
| 369 | == threads_per_core) |
| 370 | *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; |
| 371 | WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); |
| 372 | } |
| 373 | |
| 374 | atomic_unlock_thread_idle(); |
| 375 | } |
| 376 | |
| 377 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) { |
| 378 | sprs.amr = mfspr(SPRN_AMR); |
| 379 | sprs.iamr = mfspr(SPRN_IAMR); |
| 380 | sprs.amor = mfspr(SPRN_AMOR); |
| 381 | sprs.uamor = mfspr(SPRN_UAMOR); |
| 382 | } |
| 383 | |
| 384 | local_paca->thread_idle_state = type; |
| 385 | srr1 = isa206_idle_insn_mayloss(type); /* go idle */ |
| 386 | local_paca->thread_idle_state = PNV_THREAD_RUNNING; |
| 387 | |
| 388 | WARN_ON_ONCE(!srr1); |
| 389 | WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); |
| 390 | |
| 391 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) { |
| 392 | if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { |
| 393 | /* |
| 394 | * We don't need an isync after the mtsprs here because |
| 395 | * the upcoming mtmsrd is execution synchronizing. |
| 396 | */ |
| 397 | mtspr(SPRN_AMR, sprs.amr); |
| 398 | mtspr(SPRN_IAMR, sprs.iamr); |
| 399 | mtspr(SPRN_AMOR, sprs.amor); |
| 400 | mtspr(SPRN_UAMOR, sprs.uamor); |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) |
| 405 | hmi_exception_realmode(NULL); |
| 406 | |
| 407 | if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { |
| 408 | if (unlikely(type != PNV_THREAD_NAP)) { |
| 409 | atomic_lock_thread_idle(); |
| 410 | if (type == PNV_THREAD_WINKLE) { |
| 411 | WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); |
| 412 | *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; |
| 413 | *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); |
| 414 | } |
| 415 | atomic_unlock_and_stop_thread_idle(); |
| 416 | } |
| 417 | return srr1; |
| 418 | } |
| 419 | |
| 420 | /* HV state loss */ |
| 421 | BUG_ON(type == PNV_THREAD_NAP); |
| 422 | |
| 423 | atomic_lock_thread_idle(); |
| 424 | |
| 425 | full_winkle = false; |
| 426 | if (type == PNV_THREAD_WINKLE) { |
| 427 | WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); |
| 428 | *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; |
| 429 | if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { |
| 430 | *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); |
| 431 | full_winkle = true; |
| 432 | BUG_ON(!sprs_saved); |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | WARN_ON(*state & thread); |
| 437 | |
| 438 | if ((*state & core_thread_mask) != 0) |
| 439 | goto core_woken; |
| 440 | |
| 441 | /* Per-core SPRs */ |
| 442 | if (full_winkle) { |
| 443 | mtspr(SPRN_TSCR, sprs.tscr); |
| 444 | mtspr(SPRN_WORC, sprs.worc); |
| 445 | } |
| 446 | |
| 447 | if (power7_fastsleep_workaround_exit) { |
| 448 | rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, |
| 449 | OPAL_CONFIG_IDLE_UNDO); |
| 450 | BUG_ON(rc); |
| 451 | } |
| 452 | |
| 453 | /* TB */ |
| 454 | if (opal_resync_timebase() != OPAL_SUCCESS) |
| 455 | BUG(); |
| 456 | |
| 457 | core_woken: |
| 458 | if (!full_winkle) |
| 459 | goto subcore_woken; |
| 460 | |
| 461 | if ((*state & local_paca->subcore_sibling_mask) != 0) |
| 462 | goto subcore_woken; |
| 463 | |
| 464 | /* Per-subcore SPRs */ |
| 465 | mtspr(SPRN_SDR1, sprs.sdr1); |
| 466 | mtspr(SPRN_RPR, sprs.rpr); |
| 467 | |
| 468 | subcore_woken: |
| 469 | /* |
| 470 | * isync after restoring shared SPRs and before unlocking. Unlock |
| 471 | * only contains hwsync which does not necessarily do the right |
| 472 | * thing for SPRs. |
| 473 | */ |
| 474 | isync(); |
| 475 | atomic_unlock_and_stop_thread_idle(); |
| 476 | |
| 477 | /* Fast sleep does not lose SPRs */ |
| 478 | if (!full_winkle) |
| 479 | return srr1; |
| 480 | |
| 481 | /* Per-thread SPRs */ |
| 482 | mtspr(SPRN_LPCR, sprs.lpcr); |
| 483 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) { |
| 484 | mtspr(SPRN_HFSCR, sprs.hfscr); |
| 485 | mtspr(SPRN_FSCR, sprs.fscr); |
| 486 | } |
| 487 | mtspr(SPRN_PURR, sprs.purr); |
| 488 | mtspr(SPRN_SPURR, sprs.spurr); |
| 489 | mtspr(SPRN_DSCR, sprs.dscr); |
| 490 | mtspr(SPRN_WORT, sprs.wort); |
| 491 | |
| 492 | mtspr(SPRN_SPRG3, local_paca->sprg_vdso); |
| 493 | |
| 494 | /* |
| 495 | * The SLB has to be restored here, but it sometimes still |
| 496 | * contains entries, so the __ variant must be used to prevent |
| 497 | * multi hits. |
| 498 | */ |
| 499 | __slb_restore_bolted_realmode(); |
| 500 | |
| 501 | return srr1; |
| 502 | } |
| 503 | |
| 504 | extern unsigned long idle_kvm_start_guest(unsigned long srr1); |
| 505 | |
| 506 | #ifdef CONFIG_HOTPLUG_CPU |
| 507 | static unsigned long power7_offline(void) |
| 508 | { |
| 509 | unsigned long srr1; |
| 510 | |
| 511 | mtmsr(MSR_IDLE); |
| 512 | |
| 513 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 514 | /* Tell KVM we're entering idle. */ |
| 515 | /******************************************************/ |
| 516 | /* N O T E W E L L ! ! ! N O T E W E L L */ |
| 517 | /* The following store to HSTATE_HWTHREAD_STATE(r13) */ |
| 518 | /* MUST occur in real mode, i.e. with the MMU off, */ |
| 519 | /* and the MMU must stay off until we clear this flag */ |
| 520 | /* and test HSTATE_HWTHREAD_REQ(r13) in */ |
| 521 | /* pnv_powersave_wakeup in this file. */ |
| 522 | /* The reason is that another thread can switch the */ |
| 523 | /* MMU to a guest context whenever this flag is set */ |
| 524 | /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ |
| 525 | /* that would potentially cause this thread to start */ |
| 526 | /* executing instructions from guest memory in */ |
| 527 | /* hypervisor mode, leading to a host crash or data */ |
| 528 | /* corruption, or worse. */ |
| 529 | /******************************************************/ |
| 530 | local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; |
| 531 | #endif |
| 532 | |
| 533 | __ppc64_runlatch_off(); |
| 534 | srr1 = power7_idle_insn(power7_offline_type); |
| 535 | __ppc64_runlatch_on(); |
| 536 | |
| 537 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 538 | local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; |
| 539 | /* Order setting hwthread_state vs. testing hwthread_req */ |
| 540 | smp_mb(); |
| 541 | if (local_paca->kvm_hstate.hwthread_req) |
| 542 | srr1 = idle_kvm_start_guest(srr1); |
| 543 | #endif |
| 544 | |
| 545 | mtmsr(MSR_KERNEL); |
| 546 | |
| 547 | return srr1; |
| 548 | } |
| 549 | #endif |
| 550 | |
| 551 | void power7_idle_type(unsigned long type) |
| 552 | { |
| 553 | unsigned long srr1; |
| 554 | |
| 555 | if (!prep_irq_for_idle_irqsoff()) |
| 556 | return; |
| 557 | |
| 558 | mtmsr(MSR_IDLE); |
| 559 | __ppc64_runlatch_off(); |
| 560 | srr1 = power7_idle_insn(type); |
| 561 | __ppc64_runlatch_on(); |
| 562 | mtmsr(MSR_KERNEL); |
| 563 | |
| 564 | fini_irq_for_idle_irqsoff(); |
| 565 | irq_set_pending_from_srr1(srr1); |
| 566 | } |
| 567 | |
| 568 | void power7_idle(void) |
| 569 | { |
| 570 | if (!powersave_nap) |
| 571 | return; |
| 572 | |
| 573 | power7_idle_type(PNV_THREAD_NAP); |
| 574 | } |
| 575 | |
| 576 | struct p9_sprs { |
| 577 | /* per core */ |
| 578 | u64 ptcr; |
| 579 | u64 rpr; |
| 580 | u64 tscr; |
| 581 | u64 ldbar; |
| 582 | |
| 583 | /* per thread */ |
| 584 | u64 lpcr; |
| 585 | u64 hfscr; |
| 586 | u64 fscr; |
| 587 | u64 pid; |
| 588 | u64 purr; |
| 589 | u64 spurr; |
| 590 | u64 dscr; |
| 591 | u64 wort; |
| 592 | |
| 593 | u64 mmcra; |
| 594 | u32 mmcr0; |
| 595 | u32 mmcr1; |
| 596 | u64 mmcr2; |
| 597 | |
| 598 | /* per thread SPRs that get lost in shallow states */ |
| 599 | u64 amr; |
| 600 | u64 iamr; |
| 601 | u64 amor; |
| 602 | u64 uamor; |
| 603 | }; |
| 604 | |
| 605 | static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) |
| 606 | { |
| 607 | int cpu = raw_smp_processor_id(); |
| 608 | int first = cpu_first_thread_sibling(cpu); |
| 609 | unsigned long *state = &paca_ptrs[first]->idle_state; |
| 610 | unsigned long core_thread_mask = (1UL << threads_per_core) - 1; |
| 611 | unsigned long srr1; |
| 612 | unsigned long pls; |
| 613 | unsigned long mmcr0 = 0; |
| 614 | struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ |
| 615 | bool sprs_saved = false; |
| 616 | |
| 617 | if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { |
| 618 | /* EC=ESL=0 case */ |
| 619 | |
| 620 | BUG_ON(!mmu_on); |
| 621 | |
| 622 | /* |
| 623 | * Wake synchronously. SRESET via xscom may still cause |
| 624 | * a 0x100 powersave wakeup with SRR1 reason! |
| 625 | */ |
| 626 | srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ |
| 627 | if (likely(!srr1)) |
| 628 | return 0; |
| 629 | |
| 630 | /* |
| 631 | * Registers not saved, can't recover! |
| 632 | * This would be a hardware bug |
| 633 | */ |
| 634 | BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); |
| 635 | |
| 636 | goto out; |
| 637 | } |
| 638 | |
| 639 | /* EC=ESL=1 case */ |
| 640 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 641 | if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { |
| 642 | local_paca->requested_psscr = psscr; |
| 643 | /* order setting requested_psscr vs testing dont_stop */ |
| 644 | smp_mb(); |
| 645 | if (atomic_read(&local_paca->dont_stop)) { |
| 646 | local_paca->requested_psscr = 0; |
| 647 | return 0; |
| 648 | } |
| 649 | } |
| 650 | #endif |
| 651 | |
| 652 | if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { |
| 653 | /* |
| 654 | * POWER9 DD2 can incorrectly set PMAO when waking up |
| 655 | * after a state-loss idle. Saving and restoring MMCR0 |
| 656 | * over idle is a workaround. |
| 657 | */ |
| 658 | mmcr0 = mfspr(SPRN_MMCR0); |
| 659 | } |
| 660 | if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { |
| 661 | sprs.lpcr = mfspr(SPRN_LPCR); |
| 662 | sprs.hfscr = mfspr(SPRN_HFSCR); |
| 663 | sprs.fscr = mfspr(SPRN_FSCR); |
| 664 | sprs.pid = mfspr(SPRN_PID); |
| 665 | sprs.purr = mfspr(SPRN_PURR); |
| 666 | sprs.spurr = mfspr(SPRN_SPURR); |
| 667 | sprs.dscr = mfspr(SPRN_DSCR); |
| 668 | sprs.wort = mfspr(SPRN_WORT); |
| 669 | |
| 670 | sprs.mmcra = mfspr(SPRN_MMCRA); |
| 671 | sprs.mmcr0 = mfspr(SPRN_MMCR0); |
| 672 | sprs.mmcr1 = mfspr(SPRN_MMCR1); |
| 673 | sprs.mmcr2 = mfspr(SPRN_MMCR2); |
| 674 | |
| 675 | sprs.ptcr = mfspr(SPRN_PTCR); |
| 676 | sprs.rpr = mfspr(SPRN_RPR); |
| 677 | sprs.tscr = mfspr(SPRN_TSCR); |
| 678 | if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) |
| 679 | sprs.ldbar = mfspr(SPRN_LDBAR); |
| 680 | |
| 681 | sprs_saved = true; |
| 682 | |
| 683 | atomic_start_thread_idle(); |
| 684 | } |
| 685 | |
| 686 | sprs.amr = mfspr(SPRN_AMR); |
| 687 | sprs.iamr = mfspr(SPRN_IAMR); |
| 688 | sprs.amor = mfspr(SPRN_AMOR); |
| 689 | sprs.uamor = mfspr(SPRN_UAMOR); |
| 690 | |
| 691 | srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ |
| 692 | |
| 693 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 694 | local_paca->requested_psscr = 0; |
| 695 | #endif |
| 696 | |
| 697 | psscr = mfspr(SPRN_PSSCR); |
| 698 | |
| 699 | WARN_ON_ONCE(!srr1); |
| 700 | WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); |
| 701 | |
| 702 | if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { |
| 703 | unsigned long mmcra; |
| 704 | |
| 705 | /* |
| 706 | * We don't need an isync after the mtsprs here because the |
| 707 | * upcoming mtmsrd is execution synchronizing. |
| 708 | */ |
| 709 | mtspr(SPRN_AMR, sprs.amr); |
| 710 | mtspr(SPRN_IAMR, sprs.iamr); |
| 711 | mtspr(SPRN_AMOR, sprs.amor); |
| 712 | mtspr(SPRN_UAMOR, sprs.uamor); |
| 713 | |
| 714 | /* |
| 715 | * Workaround for POWER9 DD2.0, if we lost resources, the ERAT |
| 716 | * might have been corrupted and needs flushing. We also need |
| 717 | * to reload MMCR0 (see mmcr0 comment above). |
| 718 | */ |
| 719 | if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { |
| 720 | asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); |
| 721 | mtspr(SPRN_MMCR0, mmcr0); |
| 722 | } |
| 723 | |
| 724 | /* |
| 725 | * DD2.2 and earlier need to set then clear bit 60 in MMCRA |
| 726 | * to ensure the PMU starts running. |
| 727 | */ |
| 728 | mmcra = mfspr(SPRN_MMCRA); |
| 729 | mmcra |= PPC_BIT(60); |
| 730 | mtspr(SPRN_MMCRA, mmcra); |
| 731 | mmcra &= ~PPC_BIT(60); |
| 732 | mtspr(SPRN_MMCRA, mmcra); |
| 733 | } |
| 734 | |
| 735 | if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) |
| 736 | hmi_exception_realmode(NULL); |
| 737 | |
| 738 | /* |
| 739 | * On POWER9, SRR1 bits do not match exactly as expected. |
| 740 | * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so |
| 741 | * just always test PSSCR for SPR/TB state loss. |
| 742 | */ |
| 743 | pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; |
| 744 | if (likely(pls < pnv_first_spr_loss_level)) { |
| 745 | if (sprs_saved) |
| 746 | atomic_stop_thread_idle(); |
| 747 | goto out; |
| 748 | } |
| 749 | |
| 750 | /* HV state loss */ |
| 751 | BUG_ON(!sprs_saved); |
| 752 | |
| 753 | atomic_lock_thread_idle(); |
| 754 | |
| 755 | if ((*state & core_thread_mask) != 0) |
| 756 | goto core_woken; |
| 757 | |
| 758 | /* Per-core SPRs */ |
| 759 | mtspr(SPRN_PTCR, sprs.ptcr); |
| 760 | mtspr(SPRN_RPR, sprs.rpr); |
| 761 | mtspr(SPRN_TSCR, sprs.tscr); |
| 762 | |
| 763 | if (pls >= pnv_first_tb_loss_level) { |
| 764 | /* TB loss */ |
| 765 | if (opal_resync_timebase() != OPAL_SUCCESS) |
| 766 | BUG(); |
| 767 | } |
| 768 | |
| 769 | /* |
| 770 | * isync after restoring shared SPRs and before unlocking. Unlock |
| 771 | * only contains hwsync which does not necessarily do the right |
| 772 | * thing for SPRs. |
| 773 | */ |
| 774 | isync(); |
| 775 | |
| 776 | core_woken: |
| 777 | atomic_unlock_and_stop_thread_idle(); |
| 778 | |
| 779 | /* Per-thread SPRs */ |
| 780 | mtspr(SPRN_LPCR, sprs.lpcr); |
| 781 | mtspr(SPRN_HFSCR, sprs.hfscr); |
| 782 | mtspr(SPRN_FSCR, sprs.fscr); |
| 783 | mtspr(SPRN_PID, sprs.pid); |
| 784 | mtspr(SPRN_PURR, sprs.purr); |
| 785 | mtspr(SPRN_SPURR, sprs.spurr); |
| 786 | mtspr(SPRN_DSCR, sprs.dscr); |
| 787 | mtspr(SPRN_WORT, sprs.wort); |
| 788 | |
| 789 | mtspr(SPRN_MMCRA, sprs.mmcra); |
| 790 | mtspr(SPRN_MMCR0, sprs.mmcr0); |
| 791 | mtspr(SPRN_MMCR1, sprs.mmcr1); |
| 792 | mtspr(SPRN_MMCR2, sprs.mmcr2); |
| 793 | if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) |
| 794 | mtspr(SPRN_LDBAR, sprs.ldbar); |
| 795 | |
| 796 | mtspr(SPRN_SPRG3, local_paca->sprg_vdso); |
| 797 | |
| 798 | if (!radix_enabled()) |
| 799 | __slb_restore_bolted_realmode(); |
| 800 | |
| 801 | out: |
| 802 | if (mmu_on) |
| 803 | mtmsr(MSR_KERNEL); |
| 804 | |
| 805 | return srr1; |
| 806 | } |
| 807 | |
| 808 | #ifdef CONFIG_HOTPLUG_CPU |
| 809 | static unsigned long power9_offline_stop(unsigned long psscr) |
| 810 | { |
| 811 | unsigned long srr1; |
| 812 | |
| 813 | #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 814 | __ppc64_runlatch_off(); |
| 815 | srr1 = power9_idle_stop(psscr, true); |
| 816 | __ppc64_runlatch_on(); |
| 817 | #else |
| 818 | /* |
| 819 | * Tell KVM we're entering idle. |
| 820 | * This does not have to be done in real mode because the P9 MMU |
| 821 | * is independent per-thread. Some steppings share radix/hash mode |
| 822 | * between threads, but in that case KVM has a barrier sync in real |
| 823 | * mode before and after switching between radix and hash. |
| 824 | * |
| 825 | * kvm_start_guest must still be called in real mode though, hence |
| 826 | * the false argument. |
| 827 | */ |
| 828 | local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; |
| 829 | |
| 830 | __ppc64_runlatch_off(); |
| 831 | srr1 = power9_idle_stop(psscr, false); |
| 832 | __ppc64_runlatch_on(); |
| 833 | |
| 834 | local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; |
| 835 | /* Order setting hwthread_state vs. testing hwthread_req */ |
| 836 | smp_mb(); |
| 837 | if (local_paca->kvm_hstate.hwthread_req) |
| 838 | srr1 = idle_kvm_start_guest(srr1); |
| 839 | mtmsr(MSR_KERNEL); |
| 840 | #endif |
| 841 | |
| 842 | return srr1; |
| 843 | } |
| 844 | #endif |
| 845 | |
| 846 | void power9_idle_type(unsigned long stop_psscr_val, |
| 847 | unsigned long stop_psscr_mask) |
| 848 | { |
| 849 | unsigned long psscr; |
| 850 | unsigned long srr1; |
| 851 | |
| 852 | if (!prep_irq_for_idle_irqsoff()) |
| 853 | return; |
| 854 | |
| 855 | psscr = mfspr(SPRN_PSSCR); |
| 856 | psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; |
| 857 | |
| 858 | __ppc64_runlatch_off(); |
| 859 | srr1 = power9_idle_stop(psscr, true); |
| 860 | __ppc64_runlatch_on(); |
| 861 | |
| 862 | fini_irq_for_idle_irqsoff(); |
| 863 | |
| 864 | irq_set_pending_from_srr1(srr1); |
| 865 | } |
| 866 | |
| 867 | /* |
| 868 | * Used for ppc_md.power_save which needs a function with no parameters |
| 869 | */ |
| 870 | void power9_idle(void) |
| 871 | { |
| 872 | power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); |
| 873 | } |
| 874 | |
| 875 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 876 | /* |
| 877 | * This is used in working around bugs in thread reconfiguration |
| 878 | * on POWER9 (at least up to Nimbus DD2.2) relating to transactional |
| 879 | * memory and the way that XER[SO] is checkpointed. |
| 880 | * This function forces the core into SMT4 in order by asking |
| 881 | * all other threads not to stop, and sending a message to any |
| 882 | * that are in a stop state. |
| 883 | * Must be called with preemption disabled. |
| 884 | */ |
| 885 | void pnv_power9_force_smt4_catch(void) |
| 886 | { |
| 887 | int cpu, cpu0, thr; |
| 888 | int awake_threads = 1; /* this thread is awake */ |
| 889 | int poke_threads = 0; |
| 890 | int need_awake = threads_per_core; |
| 891 | |
| 892 | cpu = smp_processor_id(); |
| 893 | cpu0 = cpu & ~(threads_per_core - 1); |
| 894 | for (thr = 0; thr < threads_per_core; ++thr) { |
| 895 | if (cpu != cpu0 + thr) |
| 896 | atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); |
| 897 | } |
| 898 | /* order setting dont_stop vs testing requested_psscr */ |
| 899 | smp_mb(); |
| 900 | for (thr = 0; thr < threads_per_core; ++thr) { |
| 901 | if (!paca_ptrs[cpu0+thr]->requested_psscr) |
| 902 | ++awake_threads; |
| 903 | else |
| 904 | poke_threads |= (1 << thr); |
| 905 | } |
| 906 | |
| 907 | /* If at least 3 threads are awake, the core is in SMT4 already */ |
| 908 | if (awake_threads < need_awake) { |
| 909 | /* We have to wake some threads; we'll use msgsnd */ |
| 910 | for (thr = 0; thr < threads_per_core; ++thr) { |
| 911 | if (poke_threads & (1 << thr)) { |
| 912 | ppc_msgsnd_sync(); |
| 913 | ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, |
| 914 | paca_ptrs[cpu0+thr]->hw_cpu_id); |
| 915 | } |
| 916 | } |
| 917 | /* now spin until at least 3 threads are awake */ |
| 918 | do { |
| 919 | for (thr = 0; thr < threads_per_core; ++thr) { |
| 920 | if ((poke_threads & (1 << thr)) && |
| 921 | !paca_ptrs[cpu0+thr]->requested_psscr) { |
| 922 | ++awake_threads; |
| 923 | poke_threads &= ~(1 << thr); |
| 924 | } |
| 925 | } |
| 926 | } while (awake_threads < need_awake); |
| 927 | } |
| 928 | } |
| 929 | EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); |
| 930 | |
| 931 | void pnv_power9_force_smt4_release(void) |
| 932 | { |
| 933 | int cpu, cpu0, thr; |
| 934 | |
| 935 | cpu = smp_processor_id(); |
| 936 | cpu0 = cpu & ~(threads_per_core - 1); |
| 937 | |
| 938 | /* clear all the dont_stop flags */ |
| 939 | for (thr = 0; thr < threads_per_core; ++thr) { |
| 940 | if (cpu != cpu0 + thr) |
| 941 | atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); |
| 942 | } |
| 943 | } |
| 944 | EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); |
| 945 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ |
| 946 | |
| 947 | #ifdef CONFIG_HOTPLUG_CPU |
| 948 | |
| 949 | void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) |
| 950 | { |
| 951 | u64 pir = get_hard_smp_processor_id(cpu); |
| 952 | |
| 953 | mtspr(SPRN_LPCR, lpcr_val); |
| 954 | |
| 955 | /* |
| 956 | * Program the LPCR via stop-api only if the deepest stop state |
| 957 | * can lose hypervisor context. |
| 958 | */ |
| 959 | if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) |
| 960 | opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); |
| 961 | } |
| 962 | |
| 963 | /* |
| 964 | * pnv_cpu_offline: A function that puts the CPU into the deepest |
| 965 | * available platform idle state on a CPU-Offline. |
| 966 | * interrupts hard disabled and no lazy irq pending. |
| 967 | */ |
| 968 | unsigned long pnv_cpu_offline(unsigned int cpu) |
| 969 | { |
| 970 | unsigned long srr1; |
| 971 | |
| 972 | __ppc64_runlatch_off(); |
| 973 | |
| 974 | if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { |
| 975 | unsigned long psscr; |
| 976 | |
| 977 | psscr = mfspr(SPRN_PSSCR); |
| 978 | psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | |
| 979 | pnv_deepest_stop_psscr_val; |
| 980 | srr1 = power9_offline_stop(psscr); |
| 981 | } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { |
| 982 | srr1 = power7_offline(); |
| 983 | } else { |
| 984 | /* This is the fallback method. We emulate snooze */ |
| 985 | while (!generic_check_cpu_restart(cpu)) { |
| 986 | HMT_low(); |
| 987 | HMT_very_low(); |
| 988 | } |
| 989 | srr1 = 0; |
| 990 | HMT_medium(); |
| 991 | } |
| 992 | |
| 993 | __ppc64_runlatch_on(); |
| 994 | |
| 995 | return srr1; |
| 996 | } |
| 997 | #endif |
| 998 | |
| 999 | /* |
| 1000 | * Power ISA 3.0 idle initialization. |
| 1001 | * |
| 1002 | * POWER ISA 3.0 defines a new SPR Processor stop Status and Control |
| 1003 | * Register (PSSCR) to control idle behavior. |
| 1004 | * |
| 1005 | * PSSCR layout: |
| 1006 | * ---------------------------------------------------------- |
| 1007 | * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | |
| 1008 | * ---------------------------------------------------------- |
| 1009 | * 0 4 41 42 43 44 48 54 56 60 |
| 1010 | * |
| 1011 | * PSSCR key fields: |
| 1012 | * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the |
| 1013 | * lowest power-saving state the thread entered since stop instruction was |
| 1014 | * last executed. |
| 1015 | * |
| 1016 | * Bit 41 - Status Disable(SD) |
| 1017 | * 0 - Shows PLS entries |
| 1018 | * 1 - PLS entries are all 0 |
| 1019 | * |
| 1020 | * Bit 42 - Enable State Loss |
| 1021 | * 0 - No state is lost irrespective of other fields |
| 1022 | * 1 - Allows state loss |
| 1023 | * |
| 1024 | * Bit 43 - Exit Criterion |
| 1025 | * 0 - Exit from power-save mode on any interrupt |
| 1026 | * 1 - Exit from power-save mode controlled by LPCR's PECE bits |
| 1027 | * |
| 1028 | * Bits 44:47 - Power-Saving Level Limit |
| 1029 | * This limits the power-saving level that can be entered into. |
| 1030 | * |
| 1031 | * Bits 60:63 - Requested Level |
| 1032 | * Used to specify which power-saving level must be entered on executing |
| 1033 | * stop instruction |
| 1034 | */ |
| 1035 | |
| 1036 | int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) |
| 1037 | { |
| 1038 | int err = 0; |
| 1039 | |
| 1040 | /* |
| 1041 | * psscr_mask == 0xf indicates an older firmware. |
| 1042 | * Set remaining fields of psscr to the default values. |
| 1043 | * See NOTE above definition of PSSCR_HV_DEFAULT_VAL |
| 1044 | */ |
| 1045 | if (*psscr_mask == 0xf) { |
| 1046 | *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; |
| 1047 | *psscr_mask = PSSCR_HV_DEFAULT_MASK; |
| 1048 | return err; |
| 1049 | } |
| 1050 | |
| 1051 | /* |
| 1052 | * New firmware is expected to set the psscr_val bits correctly. |
| 1053 | * Validate that the following invariants are correctly maintained by |
| 1054 | * the new firmware. |
| 1055 | * - ESL bit value matches the EC bit value. |
| 1056 | * - ESL bit is set for all the deep stop states. |
| 1057 | */ |
| 1058 | if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { |
| 1059 | err = ERR_EC_ESL_MISMATCH; |
| 1060 | } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && |
| 1061 | GET_PSSCR_ESL(*psscr_val) == 0) { |
| 1062 | err = ERR_DEEP_STATE_ESL_MISMATCH; |
| 1063 | } |
| 1064 | |
| 1065 | return err; |
| 1066 | } |
| 1067 | |
| 1068 | /* |
| 1069 | * pnv_arch300_idle_init: Initializes the default idle state, first |
| 1070 | * deep idle state and deepest idle state on |
| 1071 | * ISA 3.0 CPUs. |
| 1072 | * |
| 1073 | * @np: /ibm,opal/power-mgt device node |
| 1074 | * @flags: cpu-idle-state-flags array |
| 1075 | * @dt_idle_states: Number of idle state entries |
| 1076 | * Returns 0 on success |
| 1077 | */ |
| 1078 | static void __init pnv_power9_idle_init(void) |
| 1079 | { |
| 1080 | u64 max_residency_ns = 0; |
| 1081 | int i; |
| 1082 | |
| 1083 | /* |
| 1084 | * pnv_deepest_stop_{val,mask} should be set to values corresponding to |
| 1085 | * the deepest stop state. |
| 1086 | * |
| 1087 | * pnv_default_stop_{val,mask} should be set to values corresponding to |
| 1088 | * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. |
| 1089 | */ |
| 1090 | pnv_first_tb_loss_level = MAX_STOP_STATE + 1; |
| 1091 | pnv_first_spr_loss_level = MAX_STOP_STATE + 1; |
| 1092 | for (i = 0; i < nr_pnv_idle_states; i++) { |
| 1093 | int err; |
| 1094 | struct pnv_idle_states_t *state = &pnv_idle_states[i]; |
| 1095 | u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; |
| 1096 | |
| 1097 | if ((state->flags & OPAL_PM_TIMEBASE_STOP) && |
| 1098 | (pnv_first_tb_loss_level > psscr_rl)) |
| 1099 | pnv_first_tb_loss_level = psscr_rl; |
| 1100 | |
| 1101 | if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && |
| 1102 | (pnv_first_spr_loss_level > psscr_rl)) |
| 1103 | pnv_first_spr_loss_level = psscr_rl; |
| 1104 | |
| 1105 | /* |
| 1106 | * The idle code does not deal with TB loss occurring |
| 1107 | * in a shallower state than SPR loss, so force it to |
| 1108 | * behave like SPRs are lost if TB is lost. POWER9 would |
| 1109 | * never encouter this, but a POWER8 core would if it |
| 1110 | * implemented the stop instruction. So this is for forward |
| 1111 | * compatibility. |
| 1112 | */ |
| 1113 | if ((state->flags & OPAL_PM_TIMEBASE_STOP) && |
| 1114 | (pnv_first_spr_loss_level > psscr_rl)) |
| 1115 | pnv_first_spr_loss_level = psscr_rl; |
| 1116 | |
| 1117 | err = validate_psscr_val_mask(&state->psscr_val, |
| 1118 | &state->psscr_mask, |
| 1119 | state->flags); |
| 1120 | if (err) { |
| 1121 | report_invalid_psscr_val(state->psscr_val, err); |
| 1122 | continue; |
| 1123 | } |
| 1124 | |
| 1125 | state->valid = true; |
| 1126 | |
| 1127 | if (max_residency_ns < state->residency_ns) { |
| 1128 | max_residency_ns = state->residency_ns; |
| 1129 | pnv_deepest_stop_psscr_val = state->psscr_val; |
| 1130 | pnv_deepest_stop_psscr_mask = state->psscr_mask; |
| 1131 | pnv_deepest_stop_flag = state->flags; |
| 1132 | deepest_stop_found = true; |
| 1133 | } |
| 1134 | |
| 1135 | if (!default_stop_found && |
| 1136 | (state->flags & OPAL_PM_STOP_INST_FAST)) { |
| 1137 | pnv_default_stop_val = state->psscr_val; |
| 1138 | pnv_default_stop_mask = state->psscr_mask; |
| 1139 | default_stop_found = true; |
| 1140 | WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); |
| 1141 | } |
| 1142 | } |
| 1143 | |
| 1144 | if (unlikely(!default_stop_found)) { |
| 1145 | pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); |
| 1146 | } else { |
| 1147 | ppc_md.power_save = power9_idle; |
| 1148 | pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", |
| 1149 | pnv_default_stop_val, pnv_default_stop_mask); |
| 1150 | } |
| 1151 | |
| 1152 | if (unlikely(!deepest_stop_found)) { |
| 1153 | pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); |
| 1154 | } else { |
| 1155 | pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", |
| 1156 | pnv_deepest_stop_psscr_val, |
| 1157 | pnv_deepest_stop_psscr_mask); |
| 1158 | } |
| 1159 | |
| 1160 | pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", |
| 1161 | pnv_first_spr_loss_level); |
| 1162 | |
| 1163 | pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", |
| 1164 | pnv_first_tb_loss_level); |
| 1165 | } |
| 1166 | |
| 1167 | static void __init pnv_disable_deep_states(void) |
| 1168 | { |
| 1169 | /* |
| 1170 | * The stop-api is unable to restore hypervisor |
| 1171 | * resources on wakeup from platform idle states which |
| 1172 | * lose full context. So disable such states. |
| 1173 | */ |
| 1174 | supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; |
| 1175 | pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); |
| 1176 | pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); |
| 1177 | |
| 1178 | if (cpu_has_feature(CPU_FTR_ARCH_300) && |
| 1179 | (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { |
| 1180 | /* |
| 1181 | * Use the default stop state for CPU-Hotplug |
| 1182 | * if available. |
| 1183 | */ |
| 1184 | if (default_stop_found) { |
| 1185 | pnv_deepest_stop_psscr_val = pnv_default_stop_val; |
| 1186 | pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; |
| 1187 | pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", |
| 1188 | pnv_deepest_stop_psscr_val); |
| 1189 | } else { /* Fallback to snooze loop for CPU-Hotplug */ |
| 1190 | deepest_stop_found = false; |
| 1191 | pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); |
| 1192 | } |
| 1193 | } |
| 1194 | } |
| 1195 | |
| 1196 | /* |
| 1197 | * Probe device tree for supported idle states |
| 1198 | */ |
| 1199 | static void __init pnv_probe_idle_states(void) |
| 1200 | { |
| 1201 | int i; |
| 1202 | |
| 1203 | if (nr_pnv_idle_states < 0) { |
| 1204 | pr_warn("cpuidle-powernv: no idle states found in the DT\n"); |
| 1205 | return; |
| 1206 | } |
| 1207 | |
| 1208 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
| 1209 | pnv_power9_idle_init(); |
| 1210 | |
| 1211 | for (i = 0; i < nr_pnv_idle_states; i++) |
| 1212 | supported_cpuidle_states |= pnv_idle_states[i].flags; |
| 1213 | } |
| 1214 | |
| 1215 | /* |
| 1216 | * This function parses device-tree and populates all the information |
| 1217 | * into pnv_idle_states structure. It also sets up nr_pnv_idle_states |
| 1218 | * which is the number of cpuidle states discovered through device-tree. |
| 1219 | */ |
| 1220 | |
| 1221 | static int pnv_parse_cpuidle_dt(void) |
| 1222 | { |
| 1223 | struct device_node *np; |
| 1224 | int nr_idle_states, i; |
| 1225 | int rc = 0; |
| 1226 | u32 *temp_u32; |
| 1227 | u64 *temp_u64; |
| 1228 | const char **temp_string; |
| 1229 | |
| 1230 | np = of_find_node_by_path("/ibm,opal/power-mgt"); |
| 1231 | if (!np) { |
| 1232 | pr_warn("opal: PowerMgmt Node not found\n"); |
| 1233 | return -ENODEV; |
| 1234 | } |
| 1235 | nr_idle_states = of_property_count_u32_elems(np, |
| 1236 | "ibm,cpu-idle-state-flags"); |
| 1237 | |
| 1238 | pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), |
| 1239 | GFP_KERNEL); |
| 1240 | temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); |
| 1241 | temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); |
| 1242 | temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); |
| 1243 | |
| 1244 | if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { |
| 1245 | pr_err("Could not allocate memory for dt parsing\n"); |
| 1246 | rc = -ENOMEM; |
| 1247 | goto out; |
| 1248 | } |
| 1249 | |
| 1250 | /* Read flags */ |
| 1251 | if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", |
| 1252 | temp_u32, nr_idle_states)) { |
| 1253 | pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); |
| 1254 | rc = -EINVAL; |
| 1255 | goto out; |
| 1256 | } |
| 1257 | for (i = 0; i < nr_idle_states; i++) |
| 1258 | pnv_idle_states[i].flags = temp_u32[i]; |
| 1259 | |
| 1260 | /* Read latencies */ |
| 1261 | if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", |
| 1262 | temp_u32, nr_idle_states)) { |
| 1263 | pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); |
| 1264 | rc = -EINVAL; |
| 1265 | goto out; |
| 1266 | } |
| 1267 | for (i = 0; i < nr_idle_states; i++) |
| 1268 | pnv_idle_states[i].latency_ns = temp_u32[i]; |
| 1269 | |
| 1270 | /* Read residencies */ |
| 1271 | if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", |
| 1272 | temp_u32, nr_idle_states)) { |
| 1273 | pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); |
| 1274 | rc = -EINVAL; |
| 1275 | goto out; |
| 1276 | } |
| 1277 | for (i = 0; i < nr_idle_states; i++) |
| 1278 | pnv_idle_states[i].residency_ns = temp_u32[i]; |
| 1279 | |
| 1280 | /* For power9 */ |
| 1281 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
| 1282 | /* Read pm_crtl_val */ |
| 1283 | if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", |
| 1284 | temp_u64, nr_idle_states)) { |
| 1285 | pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); |
| 1286 | rc = -EINVAL; |
| 1287 | goto out; |
| 1288 | } |
| 1289 | for (i = 0; i < nr_idle_states; i++) |
| 1290 | pnv_idle_states[i].psscr_val = temp_u64[i]; |
| 1291 | |
| 1292 | /* Read pm_crtl_mask */ |
| 1293 | if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", |
| 1294 | temp_u64, nr_idle_states)) { |
| 1295 | pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); |
| 1296 | rc = -EINVAL; |
| 1297 | goto out; |
| 1298 | } |
| 1299 | for (i = 0; i < nr_idle_states; i++) |
| 1300 | pnv_idle_states[i].psscr_mask = temp_u64[i]; |
| 1301 | } |
| 1302 | |
| 1303 | /* |
| 1304 | * power8 specific properties ibm,cpu-idle-state-pmicr-mask and |
| 1305 | * ibm,cpu-idle-state-pmicr-val were never used and there is no |
| 1306 | * plan to use it in near future. Hence, not parsing these properties |
| 1307 | */ |
| 1308 | |
| 1309 | if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", |
| 1310 | temp_string, nr_idle_states) < 0) { |
| 1311 | pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); |
| 1312 | rc = -EINVAL; |
| 1313 | goto out; |
| 1314 | } |
| 1315 | for (i = 0; i < nr_idle_states; i++) |
| 1316 | strlcpy(pnv_idle_states[i].name, temp_string[i], |
| 1317 | PNV_IDLE_NAME_LEN); |
| 1318 | nr_pnv_idle_states = nr_idle_states; |
| 1319 | rc = 0; |
| 1320 | out: |
| 1321 | kfree(temp_u32); |
| 1322 | kfree(temp_u64); |
| 1323 | kfree(temp_string); |
| 1324 | return rc; |
| 1325 | } |
| 1326 | |
| 1327 | static int __init pnv_init_idle_states(void) |
| 1328 | { |
| 1329 | int cpu; |
| 1330 | int rc = 0; |
| 1331 | |
| 1332 | /* Set up PACA fields */ |
| 1333 | for_each_present_cpu(cpu) { |
| 1334 | struct paca_struct *p = paca_ptrs[cpu]; |
| 1335 | |
| 1336 | p->idle_state = 0; |
| 1337 | if (cpu == cpu_first_thread_sibling(cpu)) |
| 1338 | p->idle_state = (1 << threads_per_core) - 1; |
| 1339 | |
| 1340 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) { |
| 1341 | /* P7/P8 nap */ |
| 1342 | p->thread_idle_state = PNV_THREAD_RUNNING; |
| 1343 | } else { |
| 1344 | /* P9 stop */ |
| 1345 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 1346 | p->requested_psscr = 0; |
| 1347 | atomic_set(&p->dont_stop, 0); |
| 1348 | #endif |
| 1349 | } |
| 1350 | } |
| 1351 | |
| 1352 | /* In case we error out nr_pnv_idle_states will be zero */ |
| 1353 | nr_pnv_idle_states = 0; |
| 1354 | supported_cpuidle_states = 0; |
| 1355 | |
| 1356 | if (cpuidle_disable != IDLE_NO_OVERRIDE) |
| 1357 | goto out; |
| 1358 | rc = pnv_parse_cpuidle_dt(); |
| 1359 | if (rc) |
| 1360 | return rc; |
| 1361 | pnv_probe_idle_states(); |
| 1362 | |
| 1363 | if (!cpu_has_feature(CPU_FTR_ARCH_300)) { |
| 1364 | if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { |
| 1365 | power7_fastsleep_workaround_entry = false; |
| 1366 | power7_fastsleep_workaround_exit = false; |
| 1367 | } else { |
| 1368 | /* |
| 1369 | * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that |
| 1370 | * workaround is needed to use fastsleep. Provide sysfs |
| 1371 | * control to choose how this workaround has to be |
| 1372 | * applied. |
| 1373 | */ |
| 1374 | device_create_file(cpu_subsys.dev_root, |
| 1375 | &dev_attr_fastsleep_workaround_applyonce); |
| 1376 | } |
| 1377 | |
| 1378 | update_subcore_sibling_mask(); |
| 1379 | |
| 1380 | if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { |
| 1381 | ppc_md.power_save = power7_idle; |
| 1382 | power7_offline_type = PNV_THREAD_NAP; |
| 1383 | } |
| 1384 | |
| 1385 | if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && |
| 1386 | (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) |
| 1387 | power7_offline_type = PNV_THREAD_WINKLE; |
| 1388 | else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || |
| 1389 | (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) |
| 1390 | power7_offline_type = PNV_THREAD_SLEEP; |
| 1391 | } |
| 1392 | |
| 1393 | if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { |
| 1394 | if (pnv_save_sprs_for_deep_states()) |
| 1395 | pnv_disable_deep_states(); |
| 1396 | } |
| 1397 | |
| 1398 | out: |
| 1399 | return 0; |
| 1400 | } |
| 1401 | machine_subsys_initcall(powernv, pnv_init_idle_states); |