| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 | 
|  | 2 | /* | 
|  | 3 | *	Precise Delay Loops for i386 | 
|  | 4 | * | 
|  | 5 | *	Copyright (C) 1993 Linus Torvalds | 
|  | 6 | *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | 
|  | 7 | *	Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> | 
|  | 8 | * | 
|  | 9 | *	The __delay function must _NOT_ be inlined as its execution time | 
|  | 10 | *	depends wildly on alignment on many x86 processors. The additional | 
|  | 11 | *	jump magic is needed to get the timing stable on all the CPU's | 
|  | 12 | *	we have to worry about. | 
|  | 13 | */ | 
|  | 14 |  | 
|  | 15 | #include <linux/export.h> | 
|  | 16 | #include <linux/sched.h> | 
|  | 17 | #include <linux/timex.h> | 
|  | 18 | #include <linux/preempt.h> | 
|  | 19 | #include <linux/delay.h> | 
|  | 20 |  | 
|  | 21 | #include <asm/processor.h> | 
|  | 22 | #include <asm/delay.h> | 
|  | 23 | #include <asm/timer.h> | 
|  | 24 | #include <asm/mwait.h> | 
|  | 25 |  | 
|  | 26 | #ifdef CONFIG_SMP | 
|  | 27 | # include <asm/smp.h> | 
|  | 28 | #endif | 
|  | 29 |  | 
|  | 30 | /* simple loop based delay: */ | 
|  | 31 | static void delay_loop(unsigned long loops) | 
|  | 32 | { | 
|  | 33 | asm volatile( | 
|  | 34 | "	test %0,%0	\n" | 
|  | 35 | "	jz 3f		\n" | 
|  | 36 | "	jmp 1f		\n" | 
|  | 37 |  | 
|  | 38 | ".align 16		\n" | 
|  | 39 | "1:	jmp 2f		\n" | 
|  | 40 |  | 
|  | 41 | ".align 16		\n" | 
|  | 42 | "2:	dec %0		\n" | 
|  | 43 | "	jnz 2b		\n" | 
|  | 44 | "3:	dec %0		\n" | 
|  | 45 |  | 
|  | 46 | : /* we don't need output */ | 
|  | 47 | :"a" (loops) | 
|  | 48 | ); | 
|  | 49 | } | 
|  | 50 |  | 
|  | 51 | /* TSC based delay: */ | 
|  | 52 | static void delay_tsc(unsigned long __loops) | 
|  | 53 | { | 
|  | 54 | u64 bclock, now, loops = __loops; | 
|  | 55 | int cpu; | 
|  | 56 |  | 
|  | 57 | preempt_disable(); | 
|  | 58 | cpu = smp_processor_id(); | 
|  | 59 | bclock = rdtsc_ordered(); | 
|  | 60 | for (;;) { | 
|  | 61 | now = rdtsc_ordered(); | 
|  | 62 | if ((now - bclock) >= loops) | 
|  | 63 | break; | 
|  | 64 |  | 
|  | 65 | /* Allow RT tasks to run */ | 
|  | 66 | preempt_enable(); | 
|  | 67 | rep_nop(); | 
|  | 68 | preempt_disable(); | 
|  | 69 |  | 
|  | 70 | /* | 
|  | 71 | * It is possible that we moved to another CPU, and | 
|  | 72 | * since TSC's are per-cpu we need to calculate | 
|  | 73 | * that. The delay must guarantee that we wait "at | 
|  | 74 | * least" the amount of time. Being moved to another | 
|  | 75 | * CPU could make the wait longer but we just need to | 
|  | 76 | * make sure we waited long enough. Rebalance the | 
|  | 77 | * counter for this CPU. | 
|  | 78 | */ | 
|  | 79 | if (unlikely(cpu != smp_processor_id())) { | 
|  | 80 | loops -= (now - bclock); | 
|  | 81 | cpu = smp_processor_id(); | 
|  | 82 | bclock = rdtsc_ordered(); | 
|  | 83 | } | 
|  | 84 | } | 
|  | 85 | preempt_enable(); | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | /* | 
|  | 89 | * On some AMD platforms, MWAITX has a configurable 32-bit timer, that | 
|  | 90 | * counts with TSC frequency. The input value is the loop of the | 
|  | 91 | * counter, it will exit when the timer expires. | 
|  | 92 | */ | 
|  | 93 | static void delay_mwaitx(unsigned long __loops) | 
|  | 94 | { | 
|  | 95 | u64 start, end, delay, loops = __loops; | 
|  | 96 |  | 
|  | 97 | /* | 
|  | 98 | * Timer value of 0 causes MWAITX to wait indefinitely, unless there | 
|  | 99 | * is a store on the memory monitored by MONITORX. | 
|  | 100 | */ | 
|  | 101 | if (loops == 0) | 
|  | 102 | return; | 
|  | 103 |  | 
|  | 104 | start = rdtsc_ordered(); | 
|  | 105 |  | 
|  | 106 | for (;;) { | 
|  | 107 | delay = min_t(u64, MWAITX_MAX_LOOPS, loops); | 
|  | 108 |  | 
|  | 109 | /* | 
|  | 110 | * Use cpu_tss_rw as a cacheline-aligned, seldomly | 
|  | 111 | * accessed per-cpu variable as the monitor target. | 
|  | 112 | */ | 
|  | 113 | __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); | 
|  | 114 |  | 
|  | 115 | /* | 
|  | 116 | * AMD, like Intel's MWAIT version, supports the EAX hint and | 
|  | 117 | * EAX=0xf0 means, do not enter any deep C-state and we use it | 
|  | 118 | * here in delay() to minimize wakeup latency. | 
|  | 119 | */ | 
|  | 120 | __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); | 
|  | 121 |  | 
|  | 122 | end = rdtsc_ordered(); | 
|  | 123 |  | 
|  | 124 | if (loops <= end - start) | 
|  | 125 | break; | 
|  | 126 |  | 
|  | 127 | loops -= end - start; | 
|  | 128 |  | 
|  | 129 | start = end; | 
|  | 130 | } | 
|  | 131 | } | 
|  | 132 |  | 
|  | 133 | /* | 
|  | 134 | * Since we calibrate only once at boot, this | 
|  | 135 | * function should be set once at boot and not changed | 
|  | 136 | */ | 
|  | 137 | static void (*delay_fn)(unsigned long) = delay_loop; | 
|  | 138 |  | 
|  | 139 | void use_tsc_delay(void) | 
|  | 140 | { | 
|  | 141 | if (delay_fn == delay_loop) | 
|  | 142 | delay_fn = delay_tsc; | 
|  | 143 | } | 
|  | 144 |  | 
|  | 145 | void use_mwaitx_delay(void) | 
|  | 146 | { | 
|  | 147 | delay_fn = delay_mwaitx; | 
|  | 148 | } | 
|  | 149 |  | 
|  | 150 | int read_current_timer(unsigned long *timer_val) | 
|  | 151 | { | 
|  | 152 | if (delay_fn == delay_tsc) { | 
|  | 153 | *timer_val = rdtsc(); | 
|  | 154 | return 0; | 
|  | 155 | } | 
|  | 156 | return -1; | 
|  | 157 | } | 
|  | 158 |  | 
|  | 159 | void __delay(unsigned long loops) | 
|  | 160 | { | 
|  | 161 | delay_fn(loops); | 
|  | 162 | } | 
|  | 163 | EXPORT_SYMBOL(__delay); | 
|  | 164 |  | 
|  | 165 | void __const_udelay(unsigned long xloops) | 
|  | 166 | { | 
|  | 167 | unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; | 
|  | 168 | int d0; | 
|  | 169 |  | 
|  | 170 | xloops *= 4; | 
|  | 171 | asm("mull %%edx" | 
|  | 172 | :"=d" (xloops), "=&a" (d0) | 
|  | 173 | :"1" (xloops), "0" (lpj * (HZ / 4))); | 
|  | 174 |  | 
|  | 175 | __delay(++xloops); | 
|  | 176 | } | 
|  | 177 | EXPORT_SYMBOL(__const_udelay); | 
|  | 178 |  | 
|  | 179 | void __udelay(unsigned long usecs) | 
|  | 180 | { | 
|  | 181 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | 
|  | 182 | } | 
|  | 183 | EXPORT_SYMBOL(__udelay); | 
|  | 184 |  | 
|  | 185 | void __ndelay(unsigned long nsecs) | 
|  | 186 | { | 
|  | 187 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | 
|  | 188 | } | 
|  | 189 | EXPORT_SYMBOL(__ndelay); |