Text file
src/runtime/race_arm64.s
Documentation: runtime
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6 // +build race
7
8 #include "go_asm.h"
9 #include "funcdata.h"
10 #include "textflag.h"
11 #include "tls_arm64.h"
12
13 // The following thunks allow calling the gcc-compiled race runtime directly
14 // from Go code without going all the way through cgo.
15 // First, it's much faster (up to 50% speedup for real Go programs).
16 // Second, it eliminates race-related special cases from cgocall and scheduler.
17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19 // A brief recap of the arm64 calling convention.
20 // Arguments are passed in R0...R7, the rest is on stack.
21 // Callee-saved registers are: R19...R28.
22 // Temporary registers are: R9...R15
23 // SP must be 16-byte aligned.
24
25 // When calling racecalladdr, R9 is the call target address.
26
27 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
28
29 // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
30 // No-op on other OSes.
31 #ifdef TLS_darwin
32 #define TP_ALIGN AND $~7, R0
33 #else
34 #define TP_ALIGN
35 #endif
36
37 // Load g from TLS. (See tls_arm64.s)
38 #define load_g \
39 MRS_TPIDR_R0 \
40 TP_ALIGN \
41 MOVD runtime·tls_g(SB), R11 \
42 MOVD (R0)(R11), g
43
44 // func runtime·raceread(addr uintptr)
45 // Called from instrumented code.
46 TEXT runtime·raceread(SB), NOSPLIT, $0-8
47 MOVD addr+0(FP), R1
48 MOVD LR, R2
49 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
50 MOVD $__tsan_read(SB), R9
51 JMP racecalladdr<>(SB)
52
53 // func runtime·RaceRead(addr uintptr)
54 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
55 // This needs to be a tail call, because raceread reads caller pc.
56 JMP runtime·raceread(SB)
57
58 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
59 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
60 MOVD addr+0(FP), R1
61 MOVD callpc+8(FP), R2
62 MOVD pc+16(FP), R3
63 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
64 MOVD $__tsan_read_pc(SB), R9
65 JMP racecalladdr<>(SB)
66
67 // func runtime·racewrite(addr uintptr)
68 // Called from instrumented code.
69 TEXT runtime·racewrite(SB), NOSPLIT, $0-8
70 MOVD addr+0(FP), R1
71 MOVD LR, R2
72 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
73 MOVD $__tsan_write(SB), R9
74 JMP racecalladdr<>(SB)
75
76 // func runtime·RaceWrite(addr uintptr)
77 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
78 // This needs to be a tail call, because racewrite reads caller pc.
79 JMP runtime·racewrite(SB)
80
81 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
82 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
83 MOVD addr+0(FP), R1
84 MOVD callpc+8(FP), R2
85 MOVD pc+16(FP), R3
86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
87 MOVD $__tsan_write_pc(SB), R9
88 JMP racecalladdr<>(SB)
89
90 // func runtime·racereadrange(addr, size uintptr)
91 // Called from instrumented code.
92 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
93 MOVD addr+0(FP), R1
94 MOVD size+8(FP), R2
95 MOVD LR, R3
96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
97 MOVD $__tsan_read_range(SB), R9
98 JMP racecalladdr<>(SB)
99
100 // func runtime·RaceReadRange(addr, size uintptr)
101 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
102 // This needs to be a tail call, because racereadrange reads caller pc.
103 JMP runtime·racereadrange(SB)
104
105 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
106 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
107 MOVD addr+0(FP), R1
108 MOVD size+8(FP), R2
109 MOVD pc+16(FP), R3
110 ADD $4, R3 // pc is function start, tsan wants return address.
111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
112 MOVD $__tsan_read_range(SB), R9
113 JMP racecalladdr<>(SB)
114
115 // func runtime·racewriterange(addr, size uintptr)
116 // Called from instrumented code.
117 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16
118 MOVD addr+0(FP), R1
119 MOVD size+8(FP), R2
120 MOVD LR, R3
121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
122 MOVD $__tsan_write_range(SB), R9
123 JMP racecalladdr<>(SB)
124
125 // func runtime·RaceWriteRange(addr, size uintptr)
126 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
127 // This needs to be a tail call, because racewriterange reads caller pc.
128 JMP runtime·racewriterange(SB)
129
130 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
131 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
132 MOVD addr+0(FP), R1
133 MOVD size+8(FP), R2
134 MOVD pc+16(FP), R3
135 ADD $4, R3 // pc is function start, tsan wants return address.
136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
137 MOVD $__tsan_write_range(SB), R9
138 JMP racecalladdr<>(SB)
139
140 // If addr (R1) is out of range, do nothing.
141 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
142 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
143 load_g
144 MOVD g_racectx(g), R0
145 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
146 MOVD runtime·racearenastart(SB), R10
147 CMP R10, R1
148 BLT data
149 MOVD runtime·racearenaend(SB), R10
150 CMP R10, R1
151 BLT call
152 data:
153 MOVD runtime·racedatastart(SB), R10
154 CMP R10, R1
155 BLT ret
156 MOVD runtime·racedataend(SB), R10
157 CMP R10, R1
158 BGT ret
159 call:
160 JMP racecall<>(SB)
161 ret:
162 RET
163
164 // func runtime·racefuncenter(pc uintptr)
165 // Called from instrumented code.
166 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
167 MOVD callpc+0(FP), R9
168 JMP racefuncenter<>(SB)
169
170 // Common code for racefuncenter
171 // R9 = caller's return address
172 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
173 load_g
174 MOVD g_racectx(g), R0 // goroutine racectx
175 MOVD R9, R1
176 // void __tsan_func_enter(ThreadState *thr, void *pc);
177 MOVD $__tsan_func_enter(SB), R9
178 BL racecall<>(SB)
179 RET
180
181 // func runtime·racefuncexit()
182 // Called from instrumented code.
183 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
184 load_g
185 MOVD g_racectx(g), R0 // race context
186 // void __tsan_func_exit(ThreadState *thr);
187 MOVD $__tsan_func_exit(SB), R9
188 JMP racecall<>(SB)
189
190 // Atomic operations for sync/atomic package.
191 // R3 = addr of arguments passed to this function, it can
192 // be fetched at 40(RSP) in racecallatomic after two times BL
193 // R0, R1, R2 set in racecallatomic
194
195 // Load
196 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
197 GO_ARGS
198 MOVD $__tsan_go_atomic32_load(SB), R9
199 BL racecallatomic<>(SB)
200 RET
201
202 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
203 GO_ARGS
204 MOVD $__tsan_go_atomic64_load(SB), R9
205 BL racecallatomic<>(SB)
206 RET
207
208 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
209 GO_ARGS
210 JMP sync∕atomic·LoadInt32(SB)
211
212 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
213 GO_ARGS
214 JMP sync∕atomic·LoadInt64(SB)
215
216 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
217 GO_ARGS
218 JMP sync∕atomic·LoadInt64(SB)
219
220 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·LoadInt64(SB)
223
224 // Store
225 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
226 GO_ARGS
227 MOVD $__tsan_go_atomic32_store(SB), R9
228 BL racecallatomic<>(SB)
229 RET
230
231 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
232 GO_ARGS
233 MOVD $__tsan_go_atomic64_store(SB), R9
234 BL racecallatomic<>(SB)
235 RET
236
237 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
238 GO_ARGS
239 JMP sync∕atomic·StoreInt32(SB)
240
241 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
242 GO_ARGS
243 JMP sync∕atomic·StoreInt64(SB)
244
245 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
246 GO_ARGS
247 JMP sync∕atomic·StoreInt64(SB)
248
249 // Swap
250 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
251 GO_ARGS
252 MOVD $__tsan_go_atomic32_exchange(SB), R9
253 BL racecallatomic<>(SB)
254 RET
255
256 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
257 GO_ARGS
258 MOVD $__tsan_go_atomic64_exchange(SB), R9
259 BL racecallatomic<>(SB)
260 RET
261
262 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
263 GO_ARGS
264 JMP sync∕atomic·SwapInt32(SB)
265
266 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
267 GO_ARGS
268 JMP sync∕atomic·SwapInt64(SB)
269
270 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
271 GO_ARGS
272 JMP sync∕atomic·SwapInt64(SB)
273
274 // Add
275 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
276 GO_ARGS
277 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
278 BL racecallatomic<>(SB)
279 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
280 MOVW ret+16(FP), R1
281 ADD R0, R1, R0
282 MOVW R0, ret+16(FP)
283 RET
284
285 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
286 GO_ARGS
287 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
288 BL racecallatomic<>(SB)
289 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
290 MOVD ret+16(FP), R1
291 ADD R0, R1, R0
292 MOVD R0, ret+16(FP)
293 RET
294
295 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
296 GO_ARGS
297 JMP sync∕atomic·AddInt32(SB)
298
299 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
300 GO_ARGS
301 JMP sync∕atomic·AddInt64(SB)
302
303 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
304 GO_ARGS
305 JMP sync∕atomic·AddInt64(SB)
306
307 // CompareAndSwap
308 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
309 GO_ARGS
310 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
311 BL racecallatomic<>(SB)
312 RET
313
314 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
315 GO_ARGS
316 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
317 BL racecallatomic<>(SB)
318 RET
319
320 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
321 GO_ARGS
322 JMP sync∕atomic·CompareAndSwapInt32(SB)
323
324 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
325 GO_ARGS
326 JMP sync∕atomic·CompareAndSwapInt64(SB)
327
328 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
329 GO_ARGS
330 JMP sync∕atomic·CompareAndSwapInt64(SB)
331
332 // Generic atomic operation implementation.
333 // R9 = addr of target function
334 TEXT racecallatomic<>(SB), NOSPLIT, $0
335 // Set up these registers
336 // R0 = *ThreadState
337 // R1 = caller pc
338 // R2 = pc
339 // R3 = addr of incoming arg list
340
341 // Trigger SIGSEGV early.
342 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
343 MOVD (R3), R13 // segv here if addr is bad
344 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
345 MOVD runtime·racearenastart(SB), R10
346 CMP R10, R3
347 BLT racecallatomic_data
348 MOVD runtime·racearenaend(SB), R10
349 CMP R10, R3
350 BLT racecallatomic_ok
351 racecallatomic_data:
352 MOVD runtime·racedatastart(SB), R10
353 CMP R10, R3
354 BLT racecallatomic_ignore
355 MOVD runtime·racedataend(SB), R10
356 CMP R10, R3
357 BGE racecallatomic_ignore
358 racecallatomic_ok:
359 // Addr is within the good range, call the atomic function.
360 load_g
361 MOVD g_racectx(g), R0 // goroutine context
362 MOVD 16(RSP), R1 // caller pc
363 MOVD R9, R2 // pc
364 ADD $40, RSP, R3
365 JMP racecall<>(SB) // does not return
366 racecallatomic_ignore:
367 // Addr is outside the good range.
368 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
369 // An attempt to synchronize on the address would cause crash.
370 MOVD R9, R20 // remember the original function
371 MOVD $__tsan_go_ignore_sync_begin(SB), R9
372 load_g
373 MOVD g_racectx(g), R0 // goroutine context
374 BL racecall<>(SB)
375 MOVD R20, R9 // restore the original function
376 // Call the atomic function.
377 // racecall will call LLVM race code which might clobber R28 (g)
378 load_g
379 MOVD g_racectx(g), R0 // goroutine context
380 MOVD 16(RSP), R1 // caller pc
381 MOVD R9, R2 // pc
382 ADD $40, RSP, R3 // arguments
383 BL racecall<>(SB)
384 // Call __tsan_go_ignore_sync_end.
385 MOVD $__tsan_go_ignore_sync_end(SB), R9
386 MOVD g_racectx(g), R0 // goroutine context
387 BL racecall<>(SB)
388 RET
389
390 // func runtime·racecall(void(*f)(...), ...)
391 // Calls C function f from race runtime and passes up to 4 arguments to it.
392 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
393 TEXT runtime·racecall(SB), NOSPLIT, $0-0
394 MOVD fn+0(FP), R9
395 MOVD arg0+8(FP), R0
396 MOVD arg1+16(FP), R1
397 MOVD arg2+24(FP), R2
398 MOVD arg3+32(FP), R3
399 JMP racecall<>(SB)
400
401 // Switches SP to g0 stack and calls (R9). Arguments already set.
402 TEXT racecall<>(SB), NOSPLIT, $0-0
403 MOVD g_m(g), R10
404 // Switch to g0 stack.
405 MOVD RSP, R19 // callee-saved, preserved across the CALL
406 MOVD m_g0(R10), R11
407 CMP R11, g
408 BEQ call // already on g0
409 MOVD (g_sched+gobuf_sp)(R11), R12
410 MOVD R12, RSP
411 call:
412 BL R9
413 MOVD R19, RSP
414 RET
415
416 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
417 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
418 // The overall effect of Go->C->Go call chain is similar to that of mcall.
419 // R0 contains command code. R1 contains command-specific context.
420 // See racecallback for command codes.
421 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
422 // Handle command raceGetProcCmd (0) here.
423 // First, code below assumes that we are on curg, while raceGetProcCmd
424 // can be executed on g0. Second, it is called frequently, so will
425 // benefit from this fast path.
426 CBNZ R0, rest
427 MOVD g, R13
428 #ifdef TLS_darwin
429 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
430 #endif
431 load_g
432 #ifdef TLS_darwin
433 MOVD R12, R27
434 #endif
435 MOVD g_m(g), R0
436 MOVD m_p(R0), R0
437 MOVD p_raceprocctx(R0), R0
438 MOVD R0, (R1)
439 MOVD R13, g
440 JMP (LR)
441 rest:
442 // Save callee-saved registers (Go code won't respect that).
443 // 8(RSP) and 16(RSP) are for args passed through racecallback
444 SUB $112, RSP
445 MOVD LR, 0(RSP)
446 STP (R19, R20), 24(RSP)
447 STP (R21, R22), 40(RSP)
448 STP (R23, R24), 56(RSP)
449 STP (R25, R26), 72(RSP)
450 STP (R27, g), 88(RSP)
451 // Set g = g0.
452 // load_g will clobber R0, Save R0
453 MOVD R0, R13
454 load_g
455 // restore R0
456 MOVD R13, R0
457 MOVD g_m(g), R13
458 MOVD m_g0(R13), R14
459 CMP R14, g
460 BEQ noswitch // branch if already on g0
461 MOVD R14, g
462
463 MOVD R0, 8(RSP) // func arg
464 MOVD R1, 16(RSP) // func arg
465 BL runtime·racecallback(SB)
466
467 // All registers are smashed after Go code, reload.
468 MOVD g_m(g), R13
469 MOVD m_curg(R13), g // g = m->curg
470 ret:
471 // Restore callee-saved registers.
472 MOVD 0(RSP), LR
473 LDP 24(RSP), (R19, R20)
474 LDP 40(RSP), (R21, R22)
475 LDP 56(RSP), (R23, R24)
476 LDP 72(RSP), (R25, R26)
477 LDP 88(RSP), (R27, g)
478 ADD $112, RSP
479 JMP (LR)
480
481 noswitch:
482 // already on g0
483 MOVD R0, 8(RSP) // func arg
484 MOVD R1, 16(RSP) // func arg
485 BL runtime·racecallback(SB)
486 JMP ret
487
488 #ifndef TLSG_IS_VARIABLE
489 // tls_g, g value for each thread in TLS
490 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
491 #endif
492
View as plain text