Text file
src/runtime/memmove_ppc64x.s
Documentation: runtime
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6 // +build ppc64 ppc64le
7
8 #include "textflag.h"
9
10 // See memmove Go doc for important implementation constraints.
11
12 // func memmove(to, from unsafe.Pointer, n uintptr)
13
14 // target address
15 #define TGT R3
16 // source address
17 #define SRC R4
18 // length to move
19 #define LEN R5
20 // number of doublewords
21 #define DWORDS R6
22 // number of bytes < 8
23 #define BYTES R7
24 // const 16 used as index
25 #define IDX16 R8
26 // temp used for copies, etc.
27 #define TMP R9
28 // number of 32 byte chunks
29 #define QWORDS R10
30
31 TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
32 MOVD to+0(FP), TGT
33 MOVD from+8(FP), SRC
34 MOVD n+16(FP), LEN
35
36 // Determine if there are doublewords to
37 // copy so a more efficient move can be done
38 check:
39 ANDCC $7, LEN, BYTES // R7: bytes to copy
40 SRD $3, LEN, DWORDS // R6: double words to copy
41 MOVFL CR0, CR3 // save CR from ANDCC
42 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
43
44 // Determine overlap by subtracting dest - src and comparing against the
45 // length. This catches the cases where src and dest are in different types
46 // of storage such as stack and static to avoid doing backward move when not
47 // necessary.
48
49 SUB SRC, TGT, TMP // dest - src
50 CMPU TMP, LEN, CR2 // < len?
51 BC 12, 8, backward // BLT CR2 backward
52
53 // Copying forward if no overlap.
54
55 BC 12, 6, checkbytes // BEQ CR1, checkbytes
56 SRDCC $2, DWORDS, QWORDS // 32 byte chunks?
57 BEQ lt32gt8 // < 32 bytes
58
59 // Prepare for moves of 32 bytes at a time.
60
61 forward32setup:
62 DCBTST (TGT) // prepare data cache
63 DCBT (SRC)
64 MOVD QWORDS, CTR // Number of 32 byte chunks
65 MOVD $16, IDX16 // 16 for index
66
67 forward32:
68 LXVD2X (R0)(SRC), VS32 // load 16 bytes
69 LXVD2X (IDX16)(SRC), VS33 // load 16 bytes
70 ADD $32, SRC
71 STXVD2X VS32, (R0)(TGT) // store 16 bytes
72 STXVD2X VS33, (IDX16)(TGT)
73 ADD $32,TGT // bump up for next set
74 BC 16, 0, forward32 // continue
75 ANDCC $3, DWORDS // remaining doublewords
76 BEQ checkbytes // only bytes remain
77
78 lt32gt8:
79 // At this point >= 8 and < 32
80 // Move 16 bytes if possible
81 CMP DWORDS, $2
82 BLT lt16
83 LXVD2X (R0)(SRC), VS32
84 ADD $-2, DWORDS
85 STXVD2X VS32, (R0)(TGT)
86 ADD $16, SRC
87 ADD $16, TGT
88
89 lt16: // Move 8 bytes if possible
90 CMP DWORDS, $1
91 BLT checkbytes
92 MOVD 0(SRC), TMP
93 ADD $8, SRC
94 MOVD TMP, 0(TGT)
95 ADD $8, TGT
96 checkbytes:
97 BC 12, 14, LR // BEQ lr
98 lt8: // Move word if possible
99 CMP BYTES, $4
100 BLT lt4
101 MOVWZ 0(SRC), TMP
102 ADD $-4, BYTES
103 MOVW TMP, 0(TGT)
104 ADD $4, SRC
105 ADD $4, TGT
106 lt4: // Move halfword if possible
107 CMP BYTES, $2
108 BLT lt2
109 MOVHZ 0(SRC), TMP
110 ADD $-2, BYTES
111 MOVH TMP, 0(TGT)
112 ADD $2, SRC
113 ADD $2, TGT
114 lt2: // Move last byte if 1 left
115 CMP BYTES, $1
116 BC 12, 0, LR // ble lr
117 MOVBZ 0(SRC), TMP
118 MOVBZ TMP, 0(TGT)
119 RET
120
121 backward:
122 // Copying backwards proceeds by copying R7 bytes then copying R6 double words.
123 // R3 and R4 are advanced to the end of the destination/source buffers
124 // respectively and moved back as we copy.
125
126 ADD LEN, SRC, SRC // end of source
127 ADD TGT, LEN, TGT // end of dest
128
129 BEQ nobackwardtail // earlier condition
130
131 MOVD BYTES, CTR // bytes to move
132
133 backwardtailloop:
134 MOVBZ -1(SRC), TMP // point to last byte
135 SUB $1,SRC
136 MOVBZ TMP, -1(TGT)
137 SUB $1,TGT
138 BC 16, 0, backwardtailloop // bndz
139
140 nobackwardtail:
141 BC 4, 5, LR // ble CR1 lr
142
143 backwardlarge:
144 MOVD DWORDS, CTR
145 SUB TGT, SRC, TMP // Use vsx if moving
146 CMP TMP, $32 // at least 32 byte chunks
147 BLT backwardlargeloop // and distance >= 32
148 SRDCC $2,DWORDS,QWORDS // 32 byte chunks
149 BNE backward32setup
150
151 backwardlargeloop:
152 MOVD -8(SRC), TMP
153 SUB $8,SRC
154 MOVD TMP, -8(TGT)
155 SUB $8,TGT
156 BC 16, 0, backwardlargeloop // bndz
157 RET
158
159 backward32setup:
160 MOVD QWORDS, CTR // set up loop ctr
161 MOVD $16, IDX16 // 32 bytes at a time
162
163 backward32loop:
164 SUB $32, TGT
165 SUB $32, SRC
166 LXVD2X (R0)(TGT), VS32 // load 16 bytes
167 LXVD2X (IDX16)(TGT), VS33
168 STXVD2X VS32, (R0)(SRC) // store 16 bytes
169 STXVD2X VS33, (IDX16)(SRC)
170 BC 16, 0, backward32loop // bndz
171 BC 4, 5, LR // ble CR1 lr
172 MOVD DWORDS, CTR
173 BR backwardlargeloop
174
View as plain text