Black Lives Matter. Support the Equal Justice Initiative.

Text file src/runtime/race_amd64.s

Documentation: runtime

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  // +build race
     7  
     8  #include "go_asm.h"
     9  #include "go_tls.h"
    10  #include "funcdata.h"
    11  #include "textflag.h"
    12  #include "cgo/abi_amd64.h"
    13  
    14  // The following thunks allow calling the gcc-compiled race runtime directly
    15  // from Go code without going all the way through cgo.
    16  // First, it's much faster (up to 50% speedup for real Go programs).
    17  // Second, it eliminates race-related special cases from cgocall and scheduler.
    18  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    19  
    20  // A brief recap of the amd64 calling convention.
    21  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    22  // Callee-saved registers are: BX, BP, R12-R15.
    23  // SP must be 16-byte aligned.
    24  // On Windows:
    25  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    26  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    27  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    28  // https://msdn.microsoft.com/en-us/library/ms235286.aspx
    29  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    30  // Gcc-compiled race runtime does not try to use that space.
    31  
    32  #ifdef GOOS_windows
    33  #define RARG0 CX
    34  #define RARG1 DX
    35  #define RARG2 R8
    36  #define RARG3 R9
    37  #else
    38  #define RARG0 DI
    39  #define RARG1 SI
    40  #define RARG2 DX
    41  #define RARG3 CX
    42  #endif
    43  
    44  // func runtime·raceread(addr uintptr)
    45  // Called from instrumented code.
    46  // Defined as ABIInternal so as to avoid introducing a wrapper,
    47  // which would render runtime.getcallerpc ineffective.
    48  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    49  #ifdef GOEXPERIMENT_regabiargs
    50  	MOVQ	AX, RARG1
    51  #else
    52  	MOVQ	addr+0(FP), RARG1
    53  #endif
    54  	MOVQ	(SP), RARG2
    55  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    56  	MOVQ	$__tsan_read(SB), AX
    57  	JMP	racecalladdr<>(SB)
    58  
    59  // func runtime·RaceRead(addr uintptr)
    60  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    61  	// This needs to be a tail call, because raceread reads caller pc.
    62  	JMP	runtime·raceread(SB)
    63  
    64  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    65  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    66  	MOVQ	addr+0(FP), RARG1
    67  	MOVQ	callpc+8(FP), RARG2
    68  	MOVQ	pc+16(FP), RARG3
    69  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    70  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    71  	MOVQ	$__tsan_read_pc(SB), AX
    72  	JMP	racecalladdr<>(SB)
    73  
    74  // func runtime·racewrite(addr uintptr)
    75  // Called from instrumented code.
    76  // Defined as ABIInternal so as to avoid introducing a wrapper,
    77  // which would render runtime.getcallerpc ineffective.
    78  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    79  #ifdef GOEXPERIMENT_regabiargs
    80  	MOVQ	AX, RARG1
    81  #else
    82  	MOVQ	addr+0(FP), RARG1
    83  #endif
    84  	MOVQ	(SP), RARG2
    85  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    86  	MOVQ	$__tsan_write(SB), AX
    87  	JMP	racecalladdr<>(SB)
    88  
    89  // func runtime·RaceWrite(addr uintptr)
    90  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    91  	// This needs to be a tail call, because racewrite reads caller pc.
    92  	JMP	runtime·racewrite(SB)
    93  
    94  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    95  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    96  	MOVQ	addr+0(FP), RARG1
    97  	MOVQ	callpc+8(FP), RARG2
    98  	MOVQ	pc+16(FP), RARG3
    99  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   100  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
   101  	MOVQ	$__tsan_write_pc(SB), AX
   102  	JMP	racecalladdr<>(SB)
   103  
   104  // func runtime·racereadrange(addr, size uintptr)
   105  // Called from instrumented code.
   106  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
   107  	MOVQ	addr+0(FP), RARG1
   108  	MOVQ	size+8(FP), RARG2
   109  	MOVQ	(SP), RARG3
   110  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   111  	MOVQ	$__tsan_read_range(SB), AX
   112  	JMP	racecalladdr<>(SB)
   113  
   114  // func runtime·RaceReadRange(addr, size uintptr)
   115  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   116  	// This needs to be a tail call, because racereadrange reads caller pc.
   117  	JMP	runtime·racereadrange(SB)
   118  
   119  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   120  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   121  	MOVQ	addr+0(FP), RARG1
   122  	MOVQ	size+8(FP), RARG2
   123  	MOVQ	pc+16(FP), RARG3
   124  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   125  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   126  	MOVQ	$__tsan_read_range(SB), AX
   127  	JMP	racecalladdr<>(SB)
   128  
   129  // func runtime·racewriterange(addr, size uintptr)
   130  // Called from instrumented code.
   131  // Defined as ABIInternal so as to avoid introducing a wrapper,
   132  // which would render runtime.getcallerpc ineffective.
   133  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   134  #ifdef GOEXPERIMENT_regabiargs
   135  	MOVQ	AX, RARG1
   136  	MOVQ	BX, RARG2
   137  #else
   138  	MOVQ	addr+0(FP), RARG1
   139  	MOVQ	size+8(FP), RARG2
   140  #endif
   141  	MOVQ	(SP), RARG3
   142  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   143  	MOVQ	$__tsan_write_range(SB), AX
   144  	JMP	racecalladdr<>(SB)
   145  
   146  // func runtime·RaceWriteRange(addr, size uintptr)
   147  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   148  	// This needs to be a tail call, because racewriterange reads caller pc.
   149  	JMP	runtime·racewriterange(SB)
   150  
   151  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   152  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   153  	MOVQ	addr+0(FP), RARG1
   154  	MOVQ	size+8(FP), RARG2
   155  	MOVQ	pc+16(FP), RARG3
   156  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   157  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   158  	MOVQ	$__tsan_write_range(SB), AX
   159  	JMP	racecalladdr<>(SB)
   160  
   161  // If addr (RARG1) is out of range, do nothing.
   162  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   163  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   164  #ifndef GOEXPERIMENT_regabig
   165  	get_tls(R12)
   166  	MOVQ	g(R12), R14
   167  #endif
   168  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   169  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   170  	CMPQ	RARG1, runtime·racearenastart(SB)
   171  	JB	data
   172  	CMPQ	RARG1, runtime·racearenaend(SB)
   173  	JB	call
   174  data:
   175  	CMPQ	RARG1, runtime·racedatastart(SB)
   176  	JB	ret
   177  	CMPQ	RARG1, runtime·racedataend(SB)
   178  	JAE	ret
   179  call:
   180  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   181  	JMP	racecall<>(SB)
   182  ret:
   183  	RET
   184  
   185  // func runtime·racefuncenter(pc uintptr)
   186  // Called from instrumented code.
   187  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   188  	MOVQ	callpc+0(FP), R11
   189  	JMP	racefuncenter<>(SB)
   190  
   191  // Common code for racefuncenter
   192  // R11 = caller's return address
   193  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   194  	MOVQ	DX, BX		// save function entry context (for closures)
   195  #ifndef GOEXPERIMENT_regabig
   196  	get_tls(R12)
   197  	MOVQ	g(R12), R14
   198  #endif
   199  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   200  	MOVQ	R11, RARG1
   201  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   202  	MOVQ	$__tsan_func_enter(SB), AX
   203  	// racecall<> preserves BX
   204  	CALL	racecall<>(SB)
   205  	MOVQ	BX, DX	// restore function entry context
   206  	RET
   207  
   208  // func runtime·racefuncexit()
   209  // Called from instrumented code.
   210  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   211  #ifndef GOEXPERIMENT_regabig
   212  	get_tls(R12)
   213  	MOVQ	g(R12), R14
   214  #endif
   215  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   216  	// void __tsan_func_exit(ThreadState *thr);
   217  	MOVQ	$__tsan_func_exit(SB), AX
   218  	JMP	racecall<>(SB)
   219  
   220  // Atomic operations for sync/atomic package.
   221  
   222  // Load
   223  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   224  	GO_ARGS
   225  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   226  	CALL	racecallatomic<>(SB)
   227  	RET
   228  
   229  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   230  	GO_ARGS
   231  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   232  	CALL	racecallatomic<>(SB)
   233  	RET
   234  
   235  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   236  	GO_ARGS
   237  	JMP	sync∕atomic·LoadInt32(SB)
   238  
   239  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   240  	GO_ARGS
   241  	JMP	sync∕atomic·LoadInt64(SB)
   242  
   243  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   244  	GO_ARGS
   245  	JMP	sync∕atomic·LoadInt64(SB)
   246  
   247  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   248  	GO_ARGS
   249  	JMP	sync∕atomic·LoadInt64(SB)
   250  
   251  // Store
   252  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   253  	GO_ARGS
   254  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   255  	CALL	racecallatomic<>(SB)
   256  	RET
   257  
   258  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   259  	GO_ARGS
   260  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   261  	CALL	racecallatomic<>(SB)
   262  	RET
   263  
   264  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   265  	GO_ARGS
   266  	JMP	sync∕atomic·StoreInt32(SB)
   267  
   268  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   269  	GO_ARGS
   270  	JMP	sync∕atomic·StoreInt64(SB)
   271  
   272  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   273  	GO_ARGS
   274  	JMP	sync∕atomic·StoreInt64(SB)
   275  
   276  // Swap
   277  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   278  	GO_ARGS
   279  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   280  	CALL	racecallatomic<>(SB)
   281  	RET
   282  
   283  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   284  	GO_ARGS
   285  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   286  	CALL	racecallatomic<>(SB)
   287  	RET
   288  
   289  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   290  	GO_ARGS
   291  	JMP	sync∕atomic·SwapInt32(SB)
   292  
   293  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   294  	GO_ARGS
   295  	JMP	sync∕atomic·SwapInt64(SB)
   296  
   297  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   298  	GO_ARGS
   299  	JMP	sync∕atomic·SwapInt64(SB)
   300  
   301  // Add
   302  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   303  	GO_ARGS
   304  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   305  	CALL	racecallatomic<>(SB)
   306  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   307  	ADDL	AX, ret+16(FP)
   308  	RET
   309  
   310  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   311  	GO_ARGS
   312  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   313  	CALL	racecallatomic<>(SB)
   314  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   315  	ADDQ	AX, ret+16(FP)
   316  	RET
   317  
   318  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   319  	GO_ARGS
   320  	JMP	sync∕atomic·AddInt32(SB)
   321  
   322  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   323  	GO_ARGS
   324  	JMP	sync∕atomic·AddInt64(SB)
   325  
   326  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   327  	GO_ARGS
   328  	JMP	sync∕atomic·AddInt64(SB)
   329  
   330  // CompareAndSwap
   331  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   332  	GO_ARGS
   333  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   334  	CALL	racecallatomic<>(SB)
   335  	RET
   336  
   337  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   338  	GO_ARGS
   339  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   340  	CALL	racecallatomic<>(SB)
   341  	RET
   342  
   343  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   344  	GO_ARGS
   345  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   346  
   347  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   348  	GO_ARGS
   349  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   350  
   351  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   352  	GO_ARGS
   353  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   354  
   355  // Generic atomic operation implementation.
   356  // AX already contains target function.
   357  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   358  	// Trigger SIGSEGV early.
   359  	MOVQ	16(SP), R12
   360  	MOVL	(R12), R13
   361  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   362  	CMPQ	R12, runtime·racearenastart(SB)
   363  	JB	racecallatomic_data
   364  	CMPQ	R12, runtime·racearenaend(SB)
   365  	JB	racecallatomic_ok
   366  racecallatomic_data:
   367  	CMPQ	R12, runtime·racedatastart(SB)
   368  	JB	racecallatomic_ignore
   369  	CMPQ	R12, runtime·racedataend(SB)
   370  	JAE	racecallatomic_ignore
   371  racecallatomic_ok:
   372  	// Addr is within the good range, call the atomic function.
   373  #ifndef GOEXPERIMENT_regabig
   374  	get_tls(R12)
   375  	MOVQ	g(R12), R14
   376  #endif
   377  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   378  	MOVQ	8(SP), RARG1	// caller pc
   379  	MOVQ	(SP), RARG2	// pc
   380  	LEAQ	16(SP), RARG3	// arguments
   381  	JMP	racecall<>(SB)	// does not return
   382  racecallatomic_ignore:
   383  	// Addr is outside the good range.
   384  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   385  	// An attempt to synchronize on the address would cause crash.
   386  	MOVQ	AX, BX	// remember the original function
   387  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   388  #ifndef GOEXPERIMENT_regabig
   389  	get_tls(R12)
   390  	MOVQ	g(R12), R14
   391  #endif
   392  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   393  	CALL	racecall<>(SB)
   394  	MOVQ	BX, AX	// restore the original function
   395  	// Call the atomic function.
   396  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   397  	MOVQ	8(SP), RARG1	// caller pc
   398  	MOVQ	(SP), RARG2	// pc
   399  	LEAQ	16(SP), RARG3	// arguments
   400  	CALL	racecall<>(SB)
   401  	// Call __tsan_go_ignore_sync_end.
   402  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   403  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   404  	JMP	racecall<>(SB)
   405  
   406  // void runtime·racecall(void(*f)(...), ...)
   407  // Calls C function f from race runtime and passes up to 4 arguments to it.
   408  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   409  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   410  	MOVQ	fn+0(FP), AX
   411  	MOVQ	arg0+8(FP), RARG0
   412  	MOVQ	arg1+16(FP), RARG1
   413  	MOVQ	arg2+24(FP), RARG2
   414  	MOVQ	arg3+32(FP), RARG3
   415  	JMP	racecall<>(SB)
   416  
   417  // Switches SP to g0 stack and calls (AX). Arguments already set.
   418  TEXT	racecall<>(SB), NOSPLIT, $0-0
   419  #ifndef GOEXPERIMENT_regabig
   420  	get_tls(R12)
   421  	MOVQ	g(R12), R14
   422  #endif
   423  	MOVQ	g_m(R14), R13
   424  	// Switch to g0 stack.
   425  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   426  	MOVQ	m_g0(R13), R10
   427  	CMPQ	R10, R14
   428  	JE	call	// already on g0
   429  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   430  call:
   431  	ANDQ	$~15, SP	// alignment for gcc ABI
   432  	CALL	AX
   433  	MOVQ	R12, SP
   434  	// Back to Go world, set special registers.
   435  	// The g register (R14) is preserved in C.
   436  	XORPS	X15, X15
   437  	RET
   438  
   439  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   440  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   441  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   442  // RARG0 contains command code. RARG1 contains command-specific context.
   443  // See racecallback for command codes.
   444  // Defined as ABIInternal so as to avoid introducing a wrapper,
   445  // because its address is passed to C via funcPC.
   446  TEXT	runtime·racecallbackthunk<ABIInternal>(SB), NOSPLIT, $0-0
   447  	// Handle command raceGetProcCmd (0) here.
   448  	// First, code below assumes that we are on curg, while raceGetProcCmd
   449  	// can be executed on g0. Second, it is called frequently, so will
   450  	// benefit from this fast path.
   451  	CMPQ	RARG0, $0
   452  	JNE	rest
   453  	get_tls(RARG0)
   454  	MOVQ	g(RARG0), RARG0
   455  	MOVQ	g_m(RARG0), RARG0
   456  	MOVQ	m_p(RARG0), RARG0
   457  	MOVQ	p_raceprocctx(RARG0), RARG0
   458  	MOVQ	RARG0, (RARG1)
   459  	RET
   460  
   461  rest:
   462  	// Transition from C ABI to Go ABI.
   463  	PUSH_REGS_HOST_TO_ABI0()
   464  	// Set g = g0.
   465  	get_tls(R12)
   466  	MOVQ	g(R12), R14
   467  	MOVQ	g_m(R14), R13
   468  	MOVQ	m_g0(R13), R15
   469  	CMPQ	R13, R15
   470  	JEQ	noswitch	// branch if already on g0
   471  	MOVQ	R15, g(R12)	// g = m->g0
   472  	MOVQ	R15, R14	// set g register
   473  	PUSHQ	RARG1	// func arg
   474  	PUSHQ	RARG0	// func arg
   475  	CALL	runtime·racecallback(SB)
   476  	POPQ	R12
   477  	POPQ	R12
   478  	// All registers are smashed after Go code, reload.
   479  	get_tls(R12)
   480  	MOVQ	g(R12), R13
   481  	MOVQ	g_m(R13), R13
   482  	MOVQ	m_curg(R13), R14
   483  	MOVQ	R14, g(R12)	// g = m->curg
   484  ret:
   485  	POP_REGS_HOST_TO_ABI0()
   486  	RET
   487  
   488  noswitch:
   489  	// already on g0
   490  	PUSHQ	RARG1	// func arg
   491  	PUSHQ	RARG0	// func arg
   492  	CALL	runtime·racecallback(SB)
   493  	POPQ	R12
   494  	POPQ	R12
   495  	JMP	ret
   496  

View as plain text