b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
| 2 | /* |
| 3 | * |
| 4 | * Copyright (C) IBM Corporation, 2012 |
| 5 | * |
| 6 | * Author: Anton Blanchard <anton@au.ibm.com> |
| 7 | */ |
| 8 | #include <asm/page.h> |
| 9 | #include <asm/ppc_asm.h> |
| 10 | |
| 11 | _GLOBAL(copypage_power7) |
| 12 | /* |
| 13 | * We prefetch both the source and destination using enhanced touch |
| 14 | * instructions. We use a stream ID of 0 for the load side and |
| 15 | * 1 for the store side. Since source and destination are page |
| 16 | * aligned we don't need to clear the bottom 7 bits of either |
| 17 | * address. |
| 18 | */ |
| 19 | ori r9,r3,1 /* stream=1 => to */ |
| 20 | |
| 21 | #ifdef CONFIG_PPC_64K_PAGES |
| 22 | lis r7,0x0E01 /* depth=7 |
| 23 | * units/cachelines=512 */ |
| 24 | #else |
| 25 | lis r7,0x0E00 /* depth=7 */ |
| 26 | ori r7,r7,0x1000 /* units/cachelines=32 */ |
| 27 | #endif |
| 28 | ori r10,r7,1 /* stream=1 */ |
| 29 | |
| 30 | lis r8,0x8000 /* GO=1 */ |
| 31 | clrldi r8,r8,32 |
| 32 | |
| 33 | /* setup read stream 0 */ |
| 34 | dcbt 0,r4,0b01000 /* addr from */ |
| 35 | dcbt 0,r7,0b01010 /* length and depth from */ |
| 36 | /* setup write stream 1 */ |
| 37 | dcbtst 0,r9,0b01000 /* addr to */ |
| 38 | dcbtst 0,r10,0b01010 /* length and depth to */ |
| 39 | eieio |
| 40 | dcbt 0,r8,0b01010 /* all streams GO */ |
| 41 | |
| 42 | #ifdef CONFIG_ALTIVEC |
| 43 | mflr r0 |
| 44 | std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) |
| 45 | std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) |
| 46 | std r0,16(r1) |
| 47 | stdu r1,-STACKFRAMESIZE(r1) |
| 48 | bl enter_vmx_ops |
| 49 | cmpwi r3,0 |
| 50 | ld r0,STACKFRAMESIZE+16(r1) |
| 51 | ld r3,STK_REG(R31)(r1) |
| 52 | ld r4,STK_REG(R30)(r1) |
| 53 | mtlr r0 |
| 54 | |
| 55 | li r0,(PAGE_SIZE/128) |
| 56 | mtctr r0 |
| 57 | |
| 58 | beq .Lnonvmx_copy |
| 59 | |
| 60 | addi r1,r1,STACKFRAMESIZE |
| 61 | |
| 62 | li r6,16 |
| 63 | li r7,32 |
| 64 | li r8,48 |
| 65 | li r9,64 |
| 66 | li r10,80 |
| 67 | li r11,96 |
| 68 | li r12,112 |
| 69 | |
| 70 | .align 5 |
| 71 | 1: lvx v7,0,r4 |
| 72 | lvx v6,r4,r6 |
| 73 | lvx v5,r4,r7 |
| 74 | lvx v4,r4,r8 |
| 75 | lvx v3,r4,r9 |
| 76 | lvx v2,r4,r10 |
| 77 | lvx v1,r4,r11 |
| 78 | lvx v0,r4,r12 |
| 79 | addi r4,r4,128 |
| 80 | stvx v7,0,r3 |
| 81 | stvx v6,r3,r6 |
| 82 | stvx v5,r3,r7 |
| 83 | stvx v4,r3,r8 |
| 84 | stvx v3,r3,r9 |
| 85 | stvx v2,r3,r10 |
| 86 | stvx v1,r3,r11 |
| 87 | stvx v0,r3,r12 |
| 88 | addi r3,r3,128 |
| 89 | bdnz 1b |
| 90 | |
| 91 | b exit_vmx_ops /* tail call optimise */ |
| 92 | |
| 93 | #else |
| 94 | li r0,(PAGE_SIZE/128) |
| 95 | mtctr r0 |
| 96 | |
| 97 | stdu r1,-STACKFRAMESIZE(r1) |
| 98 | #endif |
| 99 | |
| 100 | .Lnonvmx_copy: |
| 101 | std r14,STK_REG(R14)(r1) |
| 102 | std r15,STK_REG(R15)(r1) |
| 103 | std r16,STK_REG(R16)(r1) |
| 104 | std r17,STK_REG(R17)(r1) |
| 105 | std r18,STK_REG(R18)(r1) |
| 106 | std r19,STK_REG(R19)(r1) |
| 107 | std r20,STK_REG(R20)(r1) |
| 108 | |
| 109 | 1: ld r0,0(r4) |
| 110 | ld r5,8(r4) |
| 111 | ld r6,16(r4) |
| 112 | ld r7,24(r4) |
| 113 | ld r8,32(r4) |
| 114 | ld r9,40(r4) |
| 115 | ld r10,48(r4) |
| 116 | ld r11,56(r4) |
| 117 | ld r12,64(r4) |
| 118 | ld r14,72(r4) |
| 119 | ld r15,80(r4) |
| 120 | ld r16,88(r4) |
| 121 | ld r17,96(r4) |
| 122 | ld r18,104(r4) |
| 123 | ld r19,112(r4) |
| 124 | ld r20,120(r4) |
| 125 | addi r4,r4,128 |
| 126 | std r0,0(r3) |
| 127 | std r5,8(r3) |
| 128 | std r6,16(r3) |
| 129 | std r7,24(r3) |
| 130 | std r8,32(r3) |
| 131 | std r9,40(r3) |
| 132 | std r10,48(r3) |
| 133 | std r11,56(r3) |
| 134 | std r12,64(r3) |
| 135 | std r14,72(r3) |
| 136 | std r15,80(r3) |
| 137 | std r16,88(r3) |
| 138 | std r17,96(r3) |
| 139 | std r18,104(r3) |
| 140 | std r19,112(r3) |
| 141 | std r20,120(r3) |
| 142 | addi r3,r3,128 |
| 143 | bdnz 1b |
| 144 | |
| 145 | ld r14,STK_REG(R14)(r1) |
| 146 | ld r15,STK_REG(R15)(r1) |
| 147 | ld r16,STK_REG(R16)(r1) |
| 148 | ld r17,STK_REG(R17)(r1) |
| 149 | ld r18,STK_REG(R18)(r1) |
| 150 | ld r19,STK_REG(R19)(r1) |
| 151 | ld r20,STK_REG(R20)(r1) |
| 152 | addi r1,r1,STACKFRAMESIZE |
| 153 | blr |