Black Lives Matter. Support the Equal Justice Initiative.

Text file src/runtime/race_arm64.s

Documentation: runtime

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  // +build race
     7  
     8  #include "go_asm.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "tls_arm64.h"
    12  
    13  // The following thunks allow calling the gcc-compiled race runtime directly
    14  // from Go code without going all the way through cgo.
    15  // First, it's much faster (up to 50% speedup for real Go programs).
    16  // Second, it eliminates race-related special cases from cgocall and scheduler.
    17  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18  
    19  // A brief recap of the arm64 calling convention.
    20  // Arguments are passed in R0...R7, the rest is on stack.
    21  // Callee-saved registers are: R19...R28.
    22  // Temporary registers are: R9...R15
    23  // SP must be 16-byte aligned.
    24  
    25  // When calling racecalladdr, R9 is the call target address.
    26  
    27  // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    28  
    29  // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
    30  // No-op on other OSes.
    31  #ifdef TLS_darwin
    32  #define TP_ALIGN	AND	$~7, R0
    33  #else
    34  #define TP_ALIGN
    35  #endif
    36  
    37  // Load g from TLS. (See tls_arm64.s)
    38  #define load_g \
    39  	MRS_TPIDR_R0 \
    40  	TP_ALIGN \
    41  	MOVD    runtime·tls_g(SB), R11 \
    42  	MOVD    (R0)(R11), g
    43  
    44  // func runtime·raceread(addr uintptr)
    45  // Called from instrumented code.
    46  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    47  	MOVD	addr+0(FP), R1
    48  	MOVD	LR, R2
    49  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    50  	MOVD	$__tsan_read(SB), R9
    51  	JMP	racecalladdr<>(SB)
    52  
    53  // func runtime·RaceRead(addr uintptr)
    54  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    55  	// This needs to be a tail call, because raceread reads caller pc.
    56  	JMP	runtime·raceread(SB)
    57  
    58  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    59  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    60  	MOVD	addr+0(FP), R1
    61  	MOVD	callpc+8(FP), R2
    62  	MOVD	pc+16(FP), R3
    63  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    64  	MOVD	$__tsan_read_pc(SB), R9
    65  	JMP	racecalladdr<>(SB)
    66  
    67  // func runtime·racewrite(addr uintptr)
    68  // Called from instrumented code.
    69  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    70  	MOVD	addr+0(FP), R1
    71  	MOVD	LR, R2
    72  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    73  	MOVD	$__tsan_write(SB), R9
    74  	JMP	racecalladdr<>(SB)
    75  
    76  // func runtime·RaceWrite(addr uintptr)
    77  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    78  	// This needs to be a tail call, because racewrite reads caller pc.
    79  	JMP	runtime·racewrite(SB)
    80  
    81  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    82  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    83  	MOVD	addr+0(FP), R1
    84  	MOVD	callpc+8(FP), R2
    85  	MOVD	pc+16(FP), R3
    86  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    87  	MOVD	$__tsan_write_pc(SB), R9
    88  	JMP	racecalladdr<>(SB)
    89  
    90  // func runtime·racereadrange(addr, size uintptr)
    91  // Called from instrumented code.
    92  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    93  	MOVD	addr+0(FP), R1
    94  	MOVD	size+8(FP), R2
    95  	MOVD	LR, R3
    96  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97  	MOVD	$__tsan_read_range(SB), R9
    98  	JMP	racecalladdr<>(SB)
    99  
   100  // func runtime·RaceReadRange(addr, size uintptr)
   101  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   102  	// This needs to be a tail call, because racereadrange reads caller pc.
   103  	JMP	runtime·racereadrange(SB)
   104  
   105  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   106  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   107  	MOVD	addr+0(FP), R1
   108  	MOVD	size+8(FP), R2
   109  	MOVD	pc+16(FP), R3
   110  	ADD	$4, R3	// pc is function start, tsan wants return address.
   111  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112  	MOVD	$__tsan_read_range(SB), R9
   113  	JMP	racecalladdr<>(SB)
   114  
   115  // func runtime·racewriterange(addr, size uintptr)
   116  // Called from instrumented code.
   117  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   118  	MOVD	addr+0(FP), R1
   119  	MOVD	size+8(FP), R2
   120  	MOVD	LR, R3
   121  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   122  	MOVD	$__tsan_write_range(SB), R9
   123  	JMP	racecalladdr<>(SB)
   124  
   125  // func runtime·RaceWriteRange(addr, size uintptr)
   126  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   127  	// This needs to be a tail call, because racewriterange reads caller pc.
   128  	JMP	runtime·racewriterange(SB)
   129  
   130  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   131  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   132  	MOVD	addr+0(FP), R1
   133  	MOVD	size+8(FP), R2
   134  	MOVD	pc+16(FP), R3
   135  	ADD	$4, R3	// pc is function start, tsan wants return address.
   136  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   137  	MOVD	$__tsan_write_range(SB), R9
   138  	JMP	racecalladdr<>(SB)
   139  
   140  // If addr (R1) is out of range, do nothing.
   141  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   142  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   143  	load_g
   144  	MOVD	g_racectx(g), R0
   145  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   146  	MOVD	runtime·racearenastart(SB), R10
   147  	CMP	R10, R1
   148  	BLT	data
   149  	MOVD	runtime·racearenaend(SB), R10
   150  	CMP	R10, R1
   151  	BLT	call
   152  data:
   153  	MOVD	runtime·racedatastart(SB), R10
   154  	CMP	R10, R1
   155  	BLT	ret
   156  	MOVD	runtime·racedataend(SB), R10
   157  	CMP	R10, R1
   158  	BGT	ret
   159  call:
   160  	JMP	racecall<>(SB)
   161  ret:
   162  	RET
   163  
   164  // func runtime·racefuncenter(pc uintptr)
   165  // Called from instrumented code.
   166  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   167  	MOVD	callpc+0(FP), R9
   168  	JMP	racefuncenter<>(SB)
   169  
   170  // Common code for racefuncenter
   171  // R9 = caller's return address
   172  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   173  	load_g
   174  	MOVD	g_racectx(g), R0	// goroutine racectx
   175  	MOVD	R9, R1
   176  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   177  	MOVD	$__tsan_func_enter(SB), R9
   178  	BL	racecall<>(SB)
   179  	RET
   180  
   181  // func runtime·racefuncexit()
   182  // Called from instrumented code.
   183  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   184  	load_g
   185  	MOVD	g_racectx(g), R0	// race context
   186  	// void __tsan_func_exit(ThreadState *thr);
   187  	MOVD	$__tsan_func_exit(SB), R9
   188  	JMP	racecall<>(SB)
   189  
   190  // Atomic operations for sync/atomic package.
   191  // R3 = addr of arguments passed to this function, it can
   192  // be fetched at 40(RSP) in racecallatomic after two times BL
   193  // R0, R1, R2 set in racecallatomic
   194  
   195  // Load
   196  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   197  	GO_ARGS
   198  	MOVD	$__tsan_go_atomic32_load(SB), R9
   199  	BL	racecallatomic<>(SB)
   200  	RET
   201  
   202  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   203  	GO_ARGS
   204  	MOVD	$__tsan_go_atomic64_load(SB), R9
   205  	BL	racecallatomic<>(SB)
   206  	RET
   207  
   208  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   209  	GO_ARGS
   210  	JMP	sync∕atomic·LoadInt32(SB)
   211  
   212  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   213  	GO_ARGS
   214  	JMP	sync∕atomic·LoadInt64(SB)
   215  
   216  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   217  	GO_ARGS
   218  	JMP	sync∕atomic·LoadInt64(SB)
   219  
   220  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   221  	GO_ARGS
   222  	JMP	sync∕atomic·LoadInt64(SB)
   223  
   224  // Store
   225  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   226  	GO_ARGS
   227  	MOVD	$__tsan_go_atomic32_store(SB), R9
   228  	BL	racecallatomic<>(SB)
   229  	RET
   230  
   231  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   232  	GO_ARGS
   233  	MOVD	$__tsan_go_atomic64_store(SB), R9
   234  	BL	racecallatomic<>(SB)
   235  	RET
   236  
   237  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   238  	GO_ARGS
   239  	JMP	sync∕atomic·StoreInt32(SB)
   240  
   241  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   242  	GO_ARGS
   243  	JMP	sync∕atomic·StoreInt64(SB)
   244  
   245  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   246  	GO_ARGS
   247  	JMP	sync∕atomic·StoreInt64(SB)
   248  
   249  // Swap
   250  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   251  	GO_ARGS
   252  	MOVD	$__tsan_go_atomic32_exchange(SB), R9
   253  	BL	racecallatomic<>(SB)
   254  	RET
   255  
   256  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   257  	GO_ARGS
   258  	MOVD	$__tsan_go_atomic64_exchange(SB), R9
   259  	BL	racecallatomic<>(SB)
   260  	RET
   261  
   262  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   263  	GO_ARGS
   264  	JMP	sync∕atomic·SwapInt32(SB)
   265  
   266  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   267  	GO_ARGS
   268  	JMP	sync∕atomic·SwapInt64(SB)
   269  
   270  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   271  	GO_ARGS
   272  	JMP	sync∕atomic·SwapInt64(SB)
   273  
   274  // Add
   275  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   276  	GO_ARGS
   277  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   278  	BL	racecallatomic<>(SB)
   279  	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   280  	MOVW	ret+16(FP), R1
   281  	ADD	R0, R1, R0
   282  	MOVW	R0, ret+16(FP)
   283  	RET
   284  
   285  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   286  	GO_ARGS
   287  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   288  	BL	racecallatomic<>(SB)
   289  	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   290  	MOVD	ret+16(FP), R1
   291  	ADD	R0, R1, R0
   292  	MOVD	R0, ret+16(FP)
   293  	RET
   294  
   295  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   296  	GO_ARGS
   297  	JMP	sync∕atomic·AddInt32(SB)
   298  
   299  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   300  	GO_ARGS
   301  	JMP	sync∕atomic·AddInt64(SB)
   302  
   303  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   304  	GO_ARGS
   305  	JMP	sync∕atomic·AddInt64(SB)
   306  
   307  // CompareAndSwap
   308  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   309  	GO_ARGS
   310  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   311  	BL	racecallatomic<>(SB)
   312  	RET
   313  
   314  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   315  	GO_ARGS
   316  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   317  	BL	racecallatomic<>(SB)
   318  	RET
   319  
   320  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   321  	GO_ARGS
   322  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   323  
   324  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   325  	GO_ARGS
   326  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   327  
   328  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   329  	GO_ARGS
   330  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   331  
   332  // Generic atomic operation implementation.
   333  // R9 = addr of target function
   334  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   335  	// Set up these registers
   336  	// R0 = *ThreadState
   337  	// R1 = caller pc
   338  	// R2 = pc
   339  	// R3 = addr of incoming arg list
   340  
   341  	// Trigger SIGSEGV early.
   342  	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   343  	MOVD	(R3), R13	// segv here if addr is bad
   344  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   345  	MOVD	runtime·racearenastart(SB), R10
   346  	CMP	R10, R3
   347  	BLT	racecallatomic_data
   348  	MOVD	runtime·racearenaend(SB), R10
   349  	CMP	R10, R3
   350  	BLT	racecallatomic_ok
   351  racecallatomic_data:
   352  	MOVD	runtime·racedatastart(SB), R10
   353  	CMP	R10, R3
   354  	BLT	racecallatomic_ignore
   355  	MOVD	runtime·racedataend(SB), R10
   356  	CMP	R10, R3
   357  	BGE	racecallatomic_ignore
   358  racecallatomic_ok:
   359  	// Addr is within the good range, call the atomic function.
   360  	load_g
   361  	MOVD	g_racectx(g), R0	// goroutine context
   362  	MOVD	16(RSP), R1	// caller pc
   363  	MOVD	R9, R2	// pc
   364  	ADD	$40, RSP, R3
   365  	JMP	racecall<>(SB)	// does not return
   366  racecallatomic_ignore:
   367  	// Addr is outside the good range.
   368  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   369  	// An attempt to synchronize on the address would cause crash.
   370  	MOVD	R9, R20	// remember the original function
   371  	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   372  	load_g
   373  	MOVD	g_racectx(g), R0	// goroutine context
   374  	BL	racecall<>(SB)
   375  	MOVD	R20, R9	// restore the original function
   376  	// Call the atomic function.
   377  	// racecall will call LLVM race code which might clobber R28 (g)
   378  	load_g
   379  	MOVD	g_racectx(g), R0	// goroutine context
   380  	MOVD	16(RSP), R1	// caller pc
   381  	MOVD	R9, R2	// pc
   382  	ADD	$40, RSP, R3	// arguments
   383  	BL	racecall<>(SB)
   384  	// Call __tsan_go_ignore_sync_end.
   385  	MOVD	$__tsan_go_ignore_sync_end(SB), R9
   386  	MOVD	g_racectx(g), R0	// goroutine context
   387  	BL	racecall<>(SB)
   388  	RET
   389  
   390  // func runtime·racecall(void(*f)(...), ...)
   391  // Calls C function f from race runtime and passes up to 4 arguments to it.
   392  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   393  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   394  	MOVD	fn+0(FP), R9
   395  	MOVD	arg0+8(FP), R0
   396  	MOVD	arg1+16(FP), R1
   397  	MOVD	arg2+24(FP), R2
   398  	MOVD	arg3+32(FP), R3
   399  	JMP	racecall<>(SB)
   400  
   401  // Switches SP to g0 stack and calls (R9). Arguments already set.
   402  TEXT	racecall<>(SB), NOSPLIT, $0-0
   403  	MOVD	g_m(g), R10
   404  	// Switch to g0 stack.
   405  	MOVD	RSP, R19	// callee-saved, preserved across the CALL
   406  	MOVD	m_g0(R10), R11
   407  	CMP	R11, g
   408  	BEQ	call	// already on g0
   409  	MOVD	(g_sched+gobuf_sp)(R11), R12
   410  	MOVD	R12, RSP
   411  call:
   412  	BL	R9
   413  	MOVD	R19, RSP
   414  	RET
   415  
   416  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   417  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   418  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   419  // R0 contains command code. R1 contains command-specific context.
   420  // See racecallback for command codes.
   421  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   422  	// Handle command raceGetProcCmd (0) here.
   423  	// First, code below assumes that we are on curg, while raceGetProcCmd
   424  	// can be executed on g0. Second, it is called frequently, so will
   425  	// benefit from this fast path.
   426  	CBNZ	R0, rest
   427  	MOVD	g, R13
   428  #ifdef TLS_darwin
   429  	MOVD	R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
   430  #endif
   431  	load_g
   432  #ifdef TLS_darwin
   433  	MOVD	R12, R27
   434  #endif
   435  	MOVD	g_m(g), R0
   436  	MOVD	m_p(R0), R0
   437  	MOVD	p_raceprocctx(R0), R0
   438  	MOVD	R0, (R1)
   439  	MOVD	R13, g
   440  	JMP	(LR)
   441  rest:
   442  	// Save callee-saved registers (Go code won't respect that).
   443  	// 8(RSP) and 16(RSP) are for args passed through racecallback
   444  	SUB	$112, RSP
   445  	MOVD	LR, 0(RSP)
   446  	STP	(R19, R20), 24(RSP)
   447  	STP	(R21, R22), 40(RSP)
   448  	STP	(R23, R24), 56(RSP)
   449  	STP	(R25, R26), 72(RSP)
   450  	STP	(R27,   g), 88(RSP)
   451  	// Set g = g0.
   452  	// load_g will clobber R0, Save R0
   453  	MOVD	R0, R13
   454  	load_g
   455  	// restore R0
   456  	MOVD	R13, R0
   457  	MOVD	g_m(g), R13
   458  	MOVD	m_g0(R13), R14
   459  	CMP	R14, g
   460  	BEQ	noswitch	// branch if already on g0
   461  	MOVD	R14, g
   462  
   463  	MOVD	R0, 8(RSP)	// func arg
   464  	MOVD	R1, 16(RSP)	// func arg
   465  	BL	runtime·racecallback(SB)
   466  
   467  	// All registers are smashed after Go code, reload.
   468  	MOVD	g_m(g), R13
   469  	MOVD	m_curg(R13), g	// g = m->curg
   470  ret:
   471  	// Restore callee-saved registers.
   472  	MOVD	0(RSP), LR
   473  	LDP	24(RSP), (R19, R20)
   474  	LDP	40(RSP), (R21, R22)
   475  	LDP	56(RSP), (R23, R24)
   476  	LDP	72(RSP), (R25, R26)
   477  	LDP	88(RSP), (R27,   g)
   478  	ADD	$112, RSP
   479  	JMP	(LR)
   480  
   481  noswitch:
   482  	// already on g0
   483  	MOVD	R0, 8(RSP)	// func arg
   484  	MOVD	R1, 16(RSP)	// func arg
   485  	BL	runtime·racecallback(SB)
   486  	JMP	ret
   487  
   488  #ifndef TLSG_IS_VARIABLE
   489  // tls_g, g value for each thread in TLS
   490  GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
   491  #endif
   492  

View as plain text