Text file
src/runtime/race_amd64.s
Documentation: runtime
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6 // +build race
7
8 #include "go_asm.h"
9 #include "go_tls.h"
10 #include "funcdata.h"
11 #include "textflag.h"
12 #include "cgo/abi_amd64.h"
13
14 // The following thunks allow calling the gcc-compiled race runtime directly
15 // from Go code without going all the way through cgo.
16 // First, it's much faster (up to 50% speedup for real Go programs).
17 // Second, it eliminates race-related special cases from cgocall and scheduler.
18 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
19
20 // A brief recap of the amd64 calling convention.
21 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
22 // Callee-saved registers are: BX, BP, R12-R15.
23 // SP must be 16-byte aligned.
24 // On Windows:
25 // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
26 // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
27 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
28 // https://msdn.microsoft.com/en-us/library/ms235286.aspx
29 // We do not do this, because it seems to be intended for vararg/unprototyped functions.
30 // Gcc-compiled race runtime does not try to use that space.
31
32 #ifdef GOOS_windows
33 #define RARG0 CX
34 #define RARG1 DX
35 #define RARG2 R8
36 #define RARG3 R9
37 #else
38 #define RARG0 DI
39 #define RARG1 SI
40 #define RARG2 DX
41 #define RARG3 CX
42 #endif
43
44 // func runtime·raceread(addr uintptr)
45 // Called from instrumented code.
46 // Defined as ABIInternal so as to avoid introducing a wrapper,
47 // which would render runtime.getcallerpc ineffective.
48 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
49 #ifdef GOEXPERIMENT_regabiargs
50 MOVQ AX, RARG1
51 #else
52 MOVQ addr+0(FP), RARG1
53 #endif
54 MOVQ (SP), RARG2
55 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
56 MOVQ $__tsan_read(SB), AX
57 JMP racecalladdr<>(SB)
58
59 // func runtime·RaceRead(addr uintptr)
60 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
61 // This needs to be a tail call, because raceread reads caller pc.
62 JMP runtime·raceread(SB)
63
64 // void runtime·racereadpc(void *addr, void *callpc, void *pc)
65 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
66 MOVQ addr+0(FP), RARG1
67 MOVQ callpc+8(FP), RARG2
68 MOVQ pc+16(FP), RARG3
69 ADDQ $1, RARG3 // pc is function start, tsan wants return address
70 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
71 MOVQ $__tsan_read_pc(SB), AX
72 JMP racecalladdr<>(SB)
73
74 // func runtime·racewrite(addr uintptr)
75 // Called from instrumented code.
76 // Defined as ABIInternal so as to avoid introducing a wrapper,
77 // which would render runtime.getcallerpc ineffective.
78 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
79 #ifdef GOEXPERIMENT_regabiargs
80 MOVQ AX, RARG1
81 #else
82 MOVQ addr+0(FP), RARG1
83 #endif
84 MOVQ (SP), RARG2
85 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
86 MOVQ $__tsan_write(SB), AX
87 JMP racecalladdr<>(SB)
88
89 // func runtime·RaceWrite(addr uintptr)
90 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
91 // This needs to be a tail call, because racewrite reads caller pc.
92 JMP runtime·racewrite(SB)
93
94 // void runtime·racewritepc(void *addr, void *callpc, void *pc)
95 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
96 MOVQ addr+0(FP), RARG1
97 MOVQ callpc+8(FP), RARG2
98 MOVQ pc+16(FP), RARG3
99 ADDQ $1, RARG3 // pc is function start, tsan wants return address
100 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
101 MOVQ $__tsan_write_pc(SB), AX
102 JMP racecalladdr<>(SB)
103
104 // func runtime·racereadrange(addr, size uintptr)
105 // Called from instrumented code.
106 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
107 MOVQ addr+0(FP), RARG1
108 MOVQ size+8(FP), RARG2
109 MOVQ (SP), RARG3
110 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
111 MOVQ $__tsan_read_range(SB), AX
112 JMP racecalladdr<>(SB)
113
114 // func runtime·RaceReadRange(addr, size uintptr)
115 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
116 // This needs to be a tail call, because racereadrange reads caller pc.
117 JMP runtime·racereadrange(SB)
118
119 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
120 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
121 MOVQ addr+0(FP), RARG1
122 MOVQ size+8(FP), RARG2
123 MOVQ pc+16(FP), RARG3
124 ADDQ $1, RARG3 // pc is function start, tsan wants return address
125 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
126 MOVQ $__tsan_read_range(SB), AX
127 JMP racecalladdr<>(SB)
128
129 // func runtime·racewriterange(addr, size uintptr)
130 // Called from instrumented code.
131 // Defined as ABIInternal so as to avoid introducing a wrapper,
132 // which would render runtime.getcallerpc ineffective.
133 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
134 #ifdef GOEXPERIMENT_regabiargs
135 MOVQ AX, RARG1
136 MOVQ BX, RARG2
137 #else
138 MOVQ addr+0(FP), RARG1
139 MOVQ size+8(FP), RARG2
140 #endif
141 MOVQ (SP), RARG3
142 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
143 MOVQ $__tsan_write_range(SB), AX
144 JMP racecalladdr<>(SB)
145
146 // func runtime·RaceWriteRange(addr, size uintptr)
147 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
148 // This needs to be a tail call, because racewriterange reads caller pc.
149 JMP runtime·racewriterange(SB)
150
151 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
152 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
153 MOVQ addr+0(FP), RARG1
154 MOVQ size+8(FP), RARG2
155 MOVQ pc+16(FP), RARG3
156 ADDQ $1, RARG3 // pc is function start, tsan wants return address
157 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
158 MOVQ $__tsan_write_range(SB), AX
159 JMP racecalladdr<>(SB)
160
161 // If addr (RARG1) is out of range, do nothing.
162 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
163 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
164 #ifndef GOEXPERIMENT_regabig
165 get_tls(R12)
166 MOVQ g(R12), R14
167 #endif
168 MOVQ g_racectx(R14), RARG0 // goroutine context
169 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
170 CMPQ RARG1, runtime·racearenastart(SB)
171 JB data
172 CMPQ RARG1, runtime·racearenaend(SB)
173 JB call
174 data:
175 CMPQ RARG1, runtime·racedatastart(SB)
176 JB ret
177 CMPQ RARG1, runtime·racedataend(SB)
178 JAE ret
179 call:
180 MOVQ AX, AX // w/o this 6a miscompiles this function
181 JMP racecall<>(SB)
182 ret:
183 RET
184
185 // func runtime·racefuncenter(pc uintptr)
186 // Called from instrumented code.
187 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
188 MOVQ callpc+0(FP), R11
189 JMP racefuncenter<>(SB)
190
191 // Common code for racefuncenter
192 // R11 = caller's return address
193 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
194 MOVQ DX, BX // save function entry context (for closures)
195 #ifndef GOEXPERIMENT_regabig
196 get_tls(R12)
197 MOVQ g(R12), R14
198 #endif
199 MOVQ g_racectx(R14), RARG0 // goroutine context
200 MOVQ R11, RARG1
201 // void __tsan_func_enter(ThreadState *thr, void *pc);
202 MOVQ $__tsan_func_enter(SB), AX
203 // racecall<> preserves BX
204 CALL racecall<>(SB)
205 MOVQ BX, DX // restore function entry context
206 RET
207
208 // func runtime·racefuncexit()
209 // Called from instrumented code.
210 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
211 #ifndef GOEXPERIMENT_regabig
212 get_tls(R12)
213 MOVQ g(R12), R14
214 #endif
215 MOVQ g_racectx(R14), RARG0 // goroutine context
216 // void __tsan_func_exit(ThreadState *thr);
217 MOVQ $__tsan_func_exit(SB), AX
218 JMP racecall<>(SB)
219
220 // Atomic operations for sync/atomic package.
221
222 // Load
223 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
224 GO_ARGS
225 MOVQ $__tsan_go_atomic32_load(SB), AX
226 CALL racecallatomic<>(SB)
227 RET
228
229 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
230 GO_ARGS
231 MOVQ $__tsan_go_atomic64_load(SB), AX
232 CALL racecallatomic<>(SB)
233 RET
234
235 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
236 GO_ARGS
237 JMP sync∕atomic·LoadInt32(SB)
238
239 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
240 GO_ARGS
241 JMP sync∕atomic·LoadInt64(SB)
242
243 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
244 GO_ARGS
245 JMP sync∕atomic·LoadInt64(SB)
246
247 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
248 GO_ARGS
249 JMP sync∕atomic·LoadInt64(SB)
250
251 // Store
252 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
253 GO_ARGS
254 MOVQ $__tsan_go_atomic32_store(SB), AX
255 CALL racecallatomic<>(SB)
256 RET
257
258 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
259 GO_ARGS
260 MOVQ $__tsan_go_atomic64_store(SB), AX
261 CALL racecallatomic<>(SB)
262 RET
263
264 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
265 GO_ARGS
266 JMP sync∕atomic·StoreInt32(SB)
267
268 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
269 GO_ARGS
270 JMP sync∕atomic·StoreInt64(SB)
271
272 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
273 GO_ARGS
274 JMP sync∕atomic·StoreInt64(SB)
275
276 // Swap
277 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
278 GO_ARGS
279 MOVQ $__tsan_go_atomic32_exchange(SB), AX
280 CALL racecallatomic<>(SB)
281 RET
282
283 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
284 GO_ARGS
285 MOVQ $__tsan_go_atomic64_exchange(SB), AX
286 CALL racecallatomic<>(SB)
287 RET
288
289 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
290 GO_ARGS
291 JMP sync∕atomic·SwapInt32(SB)
292
293 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
294 GO_ARGS
295 JMP sync∕atomic·SwapInt64(SB)
296
297 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
298 GO_ARGS
299 JMP sync∕atomic·SwapInt64(SB)
300
301 // Add
302 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
303 GO_ARGS
304 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
305 CALL racecallatomic<>(SB)
306 MOVL add+8(FP), AX // convert fetch_add to add_fetch
307 ADDL AX, ret+16(FP)
308 RET
309
310 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
311 GO_ARGS
312 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
313 CALL racecallatomic<>(SB)
314 MOVQ add+8(FP), AX // convert fetch_add to add_fetch
315 ADDQ AX, ret+16(FP)
316 RET
317
318 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
319 GO_ARGS
320 JMP sync∕atomic·AddInt32(SB)
321
322 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
323 GO_ARGS
324 JMP sync∕atomic·AddInt64(SB)
325
326 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
327 GO_ARGS
328 JMP sync∕atomic·AddInt64(SB)
329
330 // CompareAndSwap
331 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
332 GO_ARGS
333 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
334 CALL racecallatomic<>(SB)
335 RET
336
337 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
338 GO_ARGS
339 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
340 CALL racecallatomic<>(SB)
341 RET
342
343 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
344 GO_ARGS
345 JMP sync∕atomic·CompareAndSwapInt32(SB)
346
347 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
348 GO_ARGS
349 JMP sync∕atomic·CompareAndSwapInt64(SB)
350
351 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
352 GO_ARGS
353 JMP sync∕atomic·CompareAndSwapInt64(SB)
354
355 // Generic atomic operation implementation.
356 // AX already contains target function.
357 TEXT racecallatomic<>(SB), NOSPLIT, $0-0
358 // Trigger SIGSEGV early.
359 MOVQ 16(SP), R12
360 MOVL (R12), R13
361 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
362 CMPQ R12, runtime·racearenastart(SB)
363 JB racecallatomic_data
364 CMPQ R12, runtime·racearenaend(SB)
365 JB racecallatomic_ok
366 racecallatomic_data:
367 CMPQ R12, runtime·racedatastart(SB)
368 JB racecallatomic_ignore
369 CMPQ R12, runtime·racedataend(SB)
370 JAE racecallatomic_ignore
371 racecallatomic_ok:
372 // Addr is within the good range, call the atomic function.
373 #ifndef GOEXPERIMENT_regabig
374 get_tls(R12)
375 MOVQ g(R12), R14
376 #endif
377 MOVQ g_racectx(R14), RARG0 // goroutine context
378 MOVQ 8(SP), RARG1 // caller pc
379 MOVQ (SP), RARG2 // pc
380 LEAQ 16(SP), RARG3 // arguments
381 JMP racecall<>(SB) // does not return
382 racecallatomic_ignore:
383 // Addr is outside the good range.
384 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
385 // An attempt to synchronize on the address would cause crash.
386 MOVQ AX, BX // remember the original function
387 MOVQ $__tsan_go_ignore_sync_begin(SB), AX
388 #ifndef GOEXPERIMENT_regabig
389 get_tls(R12)
390 MOVQ g(R12), R14
391 #endif
392 MOVQ g_racectx(R14), RARG0 // goroutine context
393 CALL racecall<>(SB)
394 MOVQ BX, AX // restore the original function
395 // Call the atomic function.
396 MOVQ g_racectx(R14), RARG0 // goroutine context
397 MOVQ 8(SP), RARG1 // caller pc
398 MOVQ (SP), RARG2 // pc
399 LEAQ 16(SP), RARG3 // arguments
400 CALL racecall<>(SB)
401 // Call __tsan_go_ignore_sync_end.
402 MOVQ $__tsan_go_ignore_sync_end(SB), AX
403 MOVQ g_racectx(R14), RARG0 // goroutine context
404 JMP racecall<>(SB)
405
406 // void runtime·racecall(void(*f)(...), ...)
407 // Calls C function f from race runtime and passes up to 4 arguments to it.
408 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
409 TEXT runtime·racecall(SB), NOSPLIT, $0-0
410 MOVQ fn+0(FP), AX
411 MOVQ arg0+8(FP), RARG0
412 MOVQ arg1+16(FP), RARG1
413 MOVQ arg2+24(FP), RARG2
414 MOVQ arg3+32(FP), RARG3
415 JMP racecall<>(SB)
416
417 // Switches SP to g0 stack and calls (AX). Arguments already set.
418 TEXT racecall<>(SB), NOSPLIT, $0-0
419 #ifndef GOEXPERIMENT_regabig
420 get_tls(R12)
421 MOVQ g(R12), R14
422 #endif
423 MOVQ g_m(R14), R13
424 // Switch to g0 stack.
425 MOVQ SP, R12 // callee-saved, preserved across the CALL
426 MOVQ m_g0(R13), R10
427 CMPQ R10, R14
428 JE call // already on g0
429 MOVQ (g_sched+gobuf_sp)(R10), SP
430 call:
431 ANDQ $~15, SP // alignment for gcc ABI
432 CALL AX
433 MOVQ R12, SP
434 // Back to Go world, set special registers.
435 // The g register (R14) is preserved in C.
436 XORPS X15, X15
437 RET
438
439 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
440 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
441 // The overall effect of Go->C->Go call chain is similar to that of mcall.
442 // RARG0 contains command code. RARG1 contains command-specific context.
443 // See racecallback for command codes.
444 // Defined as ABIInternal so as to avoid introducing a wrapper,
445 // because its address is passed to C via funcPC.
446 TEXT runtime·racecallbackthunk<ABIInternal>(SB), NOSPLIT, $0-0
447 // Handle command raceGetProcCmd (0) here.
448 // First, code below assumes that we are on curg, while raceGetProcCmd
449 // can be executed on g0. Second, it is called frequently, so will
450 // benefit from this fast path.
451 CMPQ RARG0, $0
452 JNE rest
453 get_tls(RARG0)
454 MOVQ g(RARG0), RARG0
455 MOVQ g_m(RARG0), RARG0
456 MOVQ m_p(RARG0), RARG0
457 MOVQ p_raceprocctx(RARG0), RARG0
458 MOVQ RARG0, (RARG1)
459 RET
460
461 rest:
462 // Transition from C ABI to Go ABI.
463 PUSH_REGS_HOST_TO_ABI0()
464 // Set g = g0.
465 get_tls(R12)
466 MOVQ g(R12), R14
467 MOVQ g_m(R14), R13
468 MOVQ m_g0(R13), R15
469 CMPQ R13, R15
470 JEQ noswitch // branch if already on g0
471 MOVQ R15, g(R12) // g = m->g0
472 MOVQ R15, R14 // set g register
473 PUSHQ RARG1 // func arg
474 PUSHQ RARG0 // func arg
475 CALL runtime·racecallback(SB)
476 POPQ R12
477 POPQ R12
478 // All registers are smashed after Go code, reload.
479 get_tls(R12)
480 MOVQ g(R12), R13
481 MOVQ g_m(R13), R13
482 MOVQ m_curg(R13), R14
483 MOVQ R14, g(R12) // g = m->curg
484 ret:
485 POP_REGS_HOST_TO_ABI0()
486 RET
487
488 noswitch:
489 // already on g0
490 PUSHQ RARG1 // func arg
491 PUSHQ RARG0 // func arg
492 CALL runtime·racecallback(SB)
493 POPQ R12
494 POPQ R12
495 JMP ret
496
View as plain text