1
2
3
4
5 package amd64
6
7 import (
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/objw"
11 "cmd/compile/internal/types"
12 "cmd/internal/obj"
13 "cmd/internal/obj/x86"
14 "internal/buildcfg"
15 )
16
17
18 var isPlan9 = buildcfg.GOOS == "plan9"
19
20
21
22 const (
23 dzBlocks = 16
24 dzBlockLen = 4
25 dzBlockSize = 23
26 dzMovSize = 5
27 dzLeaqSize = 4
28 dzClearStep = 16
29
30 dzClearLen = dzClearStep * dzBlockLen
31 dzSize = dzBlocks * dzBlockSize
32 )
33
34
35
36 func dzOff(b int64) int64 {
37 off := int64(dzSize)
38 off -= b / dzClearLen * dzBlockSize
39 tailLen := b % dzClearLen
40 if tailLen >= dzClearStep {
41 off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
42 }
43 return off
44 }
45
46
47
48 func dzDI(b int64) int64 {
49 tailLen := b % dzClearLen
50 if tailLen < dzClearStep {
51 return 0
52 }
53 tailSteps := tailLen / dzClearStep
54 return -dzClearStep * (dzBlockLen - tailSteps)
55 }
56
57 func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
58 const (
59 r13 = 1 << iota
60 x15
61 )
62
63 if cnt == 0 {
64 return p
65 }
66
67 if cnt%int64(types.RegSize) != 0 {
68
69 if cnt%int64(types.PtrSize) != 0 {
70 base.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
71 }
72 if *state&r13 == 0 {
73 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
74 *state |= r13
75 }
76 p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
77 off += int64(types.PtrSize)
78 cnt -= int64(types.PtrSize)
79 }
80
81 if cnt == 8 {
82 if *state&r13 == 0 {
83 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
84 *state |= r13
85 }
86 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
87 } else if !isPlan9 && cnt <= int64(8*types.RegSize) {
88 if !buildcfg.Experiment.RegabiG && *state&x15 == 0 {
89 p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
90 *state |= x15
91 }
92
93 for i := int64(0); i < cnt/16; i++ {
94 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
95 }
96
97 if cnt%16 != 0 {
98 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
99 }
100 } else if !isPlan9 && (cnt <= int64(128*types.RegSize)) {
101 if !buildcfg.Experiment.RegabiG && *state&x15 == 0 {
102 p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
103 *state |= x15
104 }
105
106
107 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
108
109 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
110 p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
111 p.To.Sym = ir.Syms.Duffzero
112 if cnt%16 != 0 {
113 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
114 }
115
116 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
117
118 } else {
119
120
121
122
123
124
125
126
127
128 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
129 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
130 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
131
132
133 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
134 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
135 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
136 p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
137 p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
138
139
140 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
141 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
142 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
143
144
145 *state &= ^uint32(r13)
146 }
147
148 return p
149 }
150
151 func ginsnop(pp *objw.Progs) *obj.Prog {
152
153
154
155
156
157
158 p := pp.Prog(x86.AXCHGL)
159 p.From.Type = obj.TYPE_REG
160 p.From.Reg = x86.REG_AX
161 p.To.Type = obj.TYPE_REG
162 p.To.Reg = x86.REG_AX
163 return p
164 }
165
View as plain text