1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6 // +build ppc64 ppc64le
7
8 #include "textflag.h"
9
10 // For more details about how various memory models are
11 // enforced on POWER, the following paper provides more
12 // details about how they enforce C/C++ like models. This
13 // gives context about why the strange looking code
14 // sequences below work.
15 //
16 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
17
18 // uint32 ·Load(uint32 volatile* ptr)
19 TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
20 MOVD ptr+0(FP), R3
21 SYNC
22 MOVWZ 0(R3), R3
23 CMPW R3, R3, CR7
24 BC 4, 30, 1(PC) // bne- cr7,0x4
25 ISYNC
26 MOVW R3, ret+8(FP)
27 RET
28
29 // uint8 ·Load8(uint8 volatile* ptr)
30 TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
31 MOVD ptr+0(FP), R3
32 SYNC
33 MOVBZ 0(R3), R3
34 CMP R3, R3, CR7
35 BC 4, 30, 1(PC) // bne- cr7,0x4
36 ISYNC
37 MOVB R3, ret+8(FP)
38 RET
39
40 // uint64 ·Load64(uint64 volatile* ptr)
41 TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
42 MOVD ptr+0(FP), R3
43 SYNC
44 MOVD 0(R3), R3
45 CMP R3, R3, CR7
46 BC 4, 30, 1(PC) // bne- cr7,0x4
47 ISYNC
48 MOVD R3, ret+8(FP)
49 RET
50
51 // void *·Loadp(void *volatile *ptr)
52 TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
53 MOVD ptr+0(FP), R3
54 SYNC
55 MOVD 0(R3), R3
56 CMP R3, R3, CR7
57 BC 4, 30, 1(PC) // bne- cr7,0x4
58 ISYNC
59 MOVD R3, ret+8(FP)
60 RET
61
62 // uint32 ·LoadAcq(uint32 volatile* ptr)
63 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
64 MOVD ptr+0(FP), R3
65 MOVWZ 0(R3), R3
66 CMPW R3, R3, CR7
67 BC 4, 30, 1(PC) // bne- cr7, 0x4
68 ISYNC
69 MOVW R3, ret+8(FP)
70 RET
71
72 // uint64 ·LoadAcq64(uint64 volatile* ptr)
73 TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
74 MOVD ptr+0(FP), R3
75 MOVD 0(R3), R3
76 CMP R3, R3, CR7
77 BC 4, 30, 1(PC) // bne- cr7, 0x4
78 ISYNC
79 MOVD R3, ret+8(FP)
80 RET
81
82 // bool cas(uint32 *ptr, uint32 old, uint32 new)
83 // Atomically:
84 // if(*val == old){
85 // *val = new;
86 // return 1;
87 // } else
88 // return 0;
89 TEXT ·Cas(SB), NOSPLIT, $0-17
90 MOVD ptr+0(FP), R3
91 MOVWZ old+8(FP), R4
92 MOVWZ new+12(FP), R5
93 LWSYNC
94 cas_again:
95 LWAR (R3), R6
96 CMPW R6, R4
97 BNE cas_fail
98 STWCCC R5, (R3)
99 BNE cas_again
100 MOVD $1, R3
101 LWSYNC
102 MOVB R3, ret+16(FP)
103 RET
104 cas_fail:
105 MOVB R0, ret+16(FP)
106 RET
107
108 // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
109 // Atomically:
110 // if(*val == old){
111 // *val = new;
112 // return 1;
113 // } else {
114 // return 0;
115 // }
116 TEXT ·Cas64(SB), NOSPLIT, $0-25
117 MOVD ptr+0(FP), R3
118 MOVD old+8(FP), R4
119 MOVD new+16(FP), R5
120 LWSYNC
121 cas64_again:
122 LDAR (R3), R6
123 CMP R6, R4
124 BNE cas64_fail
125 STDCCC R5, (R3)
126 BNE cas64_again
127 MOVD $1, R3
128 LWSYNC
129 MOVB R3, ret+24(FP)
130 RET
131 cas64_fail:
132 MOVB R0, ret+24(FP)
133 RET
134
135 TEXT ·CasRel(SB), NOSPLIT, $0-17
136 MOVD ptr+0(FP), R3
137 MOVWZ old+8(FP), R4
138 MOVWZ new+12(FP), R5
139 LWSYNC
140 cas_again:
141 LWAR (R3), $0, R6 // 0 = Mutex release hint
142 CMPW R6, R4
143 BNE cas_fail
144 STWCCC R5, (R3)
145 BNE cas_again
146 MOVD $1, R3
147 MOVB R3, ret+16(FP)
148 RET
149 cas_fail:
150 MOVB R0, ret+16(FP)
151 RET
152
153 TEXT ·Casint32(SB), NOSPLIT, $0-17
154 BR ·Cas(SB)
155
156 TEXT ·Casint64(SB), NOSPLIT, $0-25
157 BR ·Cas64(SB)
158
159 TEXT ·Casuintptr(SB), NOSPLIT, $0-25
160 BR ·Cas64(SB)
161
162 TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
163 BR ·Load64(SB)
164
165 TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
166 BR ·LoadAcq64(SB)
167
168 TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
169 BR ·Load64(SB)
170
171 TEXT ·Storeint32(SB), NOSPLIT, $0-12
172 BR ·Store(SB)
173
174 TEXT ·Storeint64(SB), NOSPLIT, $0-16
175 BR ·Store64(SB)
176
177 TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
178 BR ·Store64(SB)
179
180 TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
181 BR ·StoreRel64(SB)
182
183 TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
184 BR ·Xadd64(SB)
185
186 TEXT ·Loadint32(SB), NOSPLIT, $0-12
187 BR ·Load(SB)
188
189 TEXT ·Loadint64(SB), NOSPLIT, $0-16
190 BR ·Load64(SB)
191
192 TEXT ·Xaddint32(SB), NOSPLIT, $0-20
193 BR ·Xadd(SB)
194
195 TEXT ·Xaddint64(SB), NOSPLIT, $0-24
196 BR ·Xadd64(SB)
197
198 // bool casp(void **val, void *old, void *new)
199 // Atomically:
200 // if(*val == old){
201 // *val = new;
202 // return 1;
203 // } else
204 // return 0;
205 TEXT ·Casp1(SB), NOSPLIT, $0-25
206 BR ·Cas64(SB)
207
208 // uint32 xadd(uint32 volatile *ptr, int32 delta)
209 // Atomically:
210 // *val += delta;
211 // return *val;
212 TEXT ·Xadd(SB), NOSPLIT, $0-20
213 MOVD ptr+0(FP), R4
214 MOVW delta+8(FP), R5
215 LWSYNC
216 LWAR (R4), R3
217 ADD R5, R3
218 STWCCC R3, (R4)
219 BNE -3(PC)
220 MOVW R3, ret+16(FP)
221 RET
222
223 // uint64 Xadd64(uint64 volatile *val, int64 delta)
224 // Atomically:
225 // *val += delta;
226 // return *val;
227 TEXT ·Xadd64(SB), NOSPLIT, $0-24
228 MOVD ptr+0(FP), R4
229 MOVD delta+8(FP), R5
230 LWSYNC
231 LDAR (R4), R3
232 ADD R5, R3
233 STDCCC R3, (R4)
234 BNE -3(PC)
235 MOVD R3, ret+16(FP)
236 RET
237
238 // uint32 Xchg(ptr *uint32, new uint32)
239 // Atomically:
240 // old := *ptr;
241 // *ptr = new;
242 // return old;
243 TEXT ·Xchg(SB), NOSPLIT, $0-20
244 MOVD ptr+0(FP), R4
245 MOVW new+8(FP), R5
246 LWSYNC
247 LWAR (R4), R3
248 STWCCC R5, (R4)
249 BNE -2(PC)
250 ISYNC
251 MOVW R3, ret+16(FP)
252 RET
253
254 // uint64 Xchg64(ptr *uint64, new uint64)
255 // Atomically:
256 // old := *ptr;
257 // *ptr = new;
258 // return old;
259 TEXT ·Xchg64(SB), NOSPLIT, $0-24
260 MOVD ptr+0(FP), R4
261 MOVD new+8(FP), R5
262 LWSYNC
263 LDAR (R4), R3
264 STDCCC R5, (R4)
265 BNE -2(PC)
266 ISYNC
267 MOVD R3, ret+16(FP)
268 RET
269
270 TEXT ·Xchgint32(SB), NOSPLIT, $0-20
271 BR ·Xchg(SB)
272
273 TEXT ·Xchgint64(SB), NOSPLIT, $0-24
274 BR ·Xchg64(SB)
275
276 TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
277 BR ·Xchg64(SB)
278
279 TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
280 BR ·Store64(SB)
281
282 TEXT ·Store(SB), NOSPLIT, $0-12
283 MOVD ptr+0(FP), R3
284 MOVW val+8(FP), R4
285 SYNC
286 MOVW R4, 0(R3)
287 RET
288
289 TEXT ·Store8(SB), NOSPLIT, $0-9
290 MOVD ptr+0(FP), R3
291 MOVB val+8(FP), R4
292 SYNC
293 MOVB R4, 0(R3)
294 RET
295
296 TEXT ·Store64(SB), NOSPLIT, $0-16
297 MOVD ptr+0(FP), R3
298 MOVD val+8(FP), R4
299 SYNC
300 MOVD R4, 0(R3)
301 RET
302
303 TEXT ·StoreRel(SB), NOSPLIT, $0-12
304 MOVD ptr+0(FP), R3
305 MOVW val+8(FP), R4
306 LWSYNC
307 MOVW R4, 0(R3)
308 RET
309
310 TEXT ·StoreRel64(SB), NOSPLIT, $0-16
311 MOVD ptr+0(FP), R3
312 MOVD val+8(FP), R4
313 LWSYNC
314 MOVD R4, 0(R3)
315 RET
316
317 // void ·Or8(byte volatile*, byte);
318 TEXT ·Or8(SB), NOSPLIT, $0-9
319 MOVD ptr+0(FP), R3
320 MOVBZ val+8(FP), R4
321 LWSYNC
322 again:
323 LBAR (R3), R6
324 OR R4, R6
325 STBCCC R6, (R3)
326 BNE again
327 RET
328
329 // void ·And8(byte volatile*, byte);
330 TEXT ·And8(SB), NOSPLIT, $0-9
331 MOVD ptr+0(FP), R3
332 MOVBZ val+8(FP), R4
333 LWSYNC
334 again:
335 LBAR (R3), R6
336 AND R4, R6
337 STBCCC R6, (R3)
338 BNE again
339 RET
340
341 // func Or(addr *uint32, v uint32)
342 TEXT ·Or(SB), NOSPLIT, $0-12
343 MOVD ptr+0(FP), R3
344 MOVW val+8(FP), R4
345 LWSYNC
346 again:
347 LWAR (R3), R6
348 OR R4, R6
349 STWCCC R6, (R3)
350 BNE again
351 RET
352
353 // func And(addr *uint32, v uint32)
354 TEXT ·And(SB), NOSPLIT, $0-12
355 MOVD ptr+0(FP), R3
356 MOVW val+8(FP), R4
357 LWSYNC
358 again:
359 LWAR (R3),R6
360 AND R4, R6
361 STWCCC R6, (R3)
362 BNE again
363 RET
364
View as plain text