Text file
src/runtime/memclr_arm64.s
Documentation: runtime
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // See memclrNoHeapPointers Go doc for important implementation constraints.
8
9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
10 // Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention).
11 TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16
12 MOVD ptr+0(FP), R0
13 MOVD n+8(FP), R1
14
15 CMP $16, R1
16 // If n is equal to 16 bytes, use zero_exact_16 to zero
17 BEQ zero_exact_16
18
19 // If n is greater than 16 bytes, use zero_by_16 to zero
20 BHI zero_by_16
21
22 // n is less than 16 bytes
23 ADD R1, R0, R7
24 TBZ $3, R1, less_than_8
25 MOVD ZR, (R0)
26 MOVD ZR, -8(R7)
27 RET
28
29 less_than_8:
30 TBZ $2, R1, less_than_4
31 MOVW ZR, (R0)
32 MOVW ZR, -4(R7)
33 RET
34
35 less_than_4:
36 CBZ R1, ending
37 MOVB ZR, (R0)
38 TBZ $1, R1, ending
39 MOVH ZR, -2(R7)
40
41 ending:
42 RET
43
44 zero_exact_16:
45 // n is exactly 16 bytes
46 STP (ZR, ZR), (R0)
47 RET
48
49 zero_by_16:
50 // n greater than 16 bytes, check if the start address is aligned
51 NEG R0, R4
52 ANDS $15, R4, R4
53 // Try zeroing using zva if the start address is aligned with 16
54 BEQ try_zva
55
56 // Non-aligned store
57 STP (ZR, ZR), (R0)
58 // Make the destination aligned
59 SUB R4, R1, R1
60 ADD R4, R0, R0
61 B try_zva
62
63 tail_maybe_long:
64 CMP $64, R1
65 BHS no_zva
66
67 tail63:
68 ANDS $48, R1, R3
69 BEQ last16
70 CMPW $32, R3
71 BEQ last48
72 BLT last32
73 STP.P (ZR, ZR), 16(R0)
74 last48:
75 STP.P (ZR, ZR), 16(R0)
76 last32:
77 STP.P (ZR, ZR), 16(R0)
78 // The last store length is at most 16, so it is safe to use
79 // stp to write last 16 bytes
80 last16:
81 ANDS $15, R1, R1
82 CBZ R1, last_end
83 ADD R1, R0, R0
84 STP (ZR, ZR), -16(R0)
85 last_end:
86 RET
87
88 no_zva:
89 SUB $16, R0, R0
90 SUB $64, R1, R1
91
92 loop_64:
93 STP (ZR, ZR), 16(R0)
94 STP (ZR, ZR), 32(R0)
95 STP (ZR, ZR), 48(R0)
96 STP.W (ZR, ZR), 64(R0)
97 SUBS $64, R1, R1
98 BGE loop_64
99 ANDS $63, R1, ZR
100 ADD $16, R0, R0
101 BNE tail63
102 RET
103
104 try_zva:
105 // Try using the ZVA feature to zero entire cache lines
106 // It is not meaningful to use ZVA if the block size is less than 64,
107 // so make sure that n is greater than or equal to 64
108 CMP $63, R1
109 BLE tail63
110
111 CMP $128, R1
112 // Ensure n is at least 128 bytes, so that there is enough to copy after
113 // alignment.
114 BLT no_zva
115 // Check if ZVA is allowed from user code, and if so get the block size
116 MOVW block_size<>(SB), R5
117 TBNZ $31, R5, no_zva
118 CBNZ R5, zero_by_line
119 // DCZID_EL0 bit assignments
120 // [63:5] Reserved
121 // [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted
122 // [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words
123 MRS DCZID_EL0, R3
124 TBZ $4, R3, init
125 // ZVA not available
126 MOVW $~0, R5
127 MOVW R5, block_size<>(SB)
128 B no_zva
129
130 init:
131 MOVW $4, R9
132 ANDW $15, R3, R5
133 LSLW R5, R9, R5
134 MOVW R5, block_size<>(SB)
135
136 ANDS $63, R5, R9
137 // Block size is less than 64.
138 BNE no_zva
139
140 zero_by_line:
141 CMP R5, R1
142 // Not enough memory to reach alignment
143 BLO no_zva
144 SUB $1, R5, R6
145 NEG R0, R4
146 ANDS R6, R4, R4
147 // Already aligned
148 BEQ aligned
149
150 // check there is enough to copy after alignment
151 SUB R4, R1, R3
152
153 // Check that the remaining length to ZVA after alignment
154 // is greater than 64.
155 CMP $64, R3
156 CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010
157 BLT no_zva
158
159 // We now have at least 64 bytes to zero, update n
160 MOVD R3, R1
161
162 loop_zva_prolog:
163 STP (ZR, ZR), (R0)
164 STP (ZR, ZR), 16(R0)
165 STP (ZR, ZR), 32(R0)
166 SUBS $64, R4, R4
167 STP (ZR, ZR), 48(R0)
168 ADD $64, R0, R0
169 BGE loop_zva_prolog
170
171 ADD R4, R0, R0
172
173 aligned:
174 SUB R5, R1, R1
175
176 loop_zva:
177 WORD $0xd50b7420 // DC ZVA, R0
178 ADD R5, R0, R0
179 SUBS R5, R1, R1
180 BHS loop_zva
181 ANDS R6, R1, R1
182 BNE tail_maybe_long
183 RET
184
185 GLOBL block_size<>(SB), NOPTR, $8
186
View as plain text