Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/compile/internal/amd64/ssa.go

Documentation: cmd/compile/internal/amd64

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"internal/buildcfg"
    10  	"math"
    11  
    12  	"cmd/compile/internal/base"
    13  	"cmd/compile/internal/ir"
    14  	"cmd/compile/internal/logopt"
    15  	"cmd/compile/internal/objw"
    16  	"cmd/compile/internal/ssa"
    17  	"cmd/compile/internal/ssagen"
    18  	"cmd/compile/internal/types"
    19  	"cmd/internal/obj"
    20  	"cmd/internal/obj/x86"
    21  )
    22  
    23  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    24  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    25  	flive := b.FlagsLiveAtEnd
    26  	for _, c := range b.ControlValues() {
    27  		flive = c.Type.IsFlags() || flive
    28  	}
    29  	for i := len(b.Values) - 1; i >= 0; i-- {
    30  		v := b.Values[i]
    31  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    32  			// The "mark" is any non-nil Aux value.
    33  			v.Aux = v
    34  		}
    35  		if v.Type.IsFlags() {
    36  			flive = false
    37  		}
    38  		for _, a := range v.Args {
    39  			if a.Type.IsFlags() {
    40  				flive = true
    41  			}
    42  		}
    43  	}
    44  }
    45  
    46  // loadByType returns the load instruction of the given type.
    47  func loadByType(t *types.Type) obj.As {
    48  	// Avoid partial register write
    49  	if !t.IsFloat() {
    50  		switch t.Size() {
    51  		case 1:
    52  			return x86.AMOVBLZX
    53  		case 2:
    54  			return x86.AMOVWLZX
    55  		}
    56  	}
    57  	// Otherwise, there's no difference between load and store opcodes.
    58  	return storeByType(t)
    59  }
    60  
    61  // storeByType returns the store instruction of the given type.
    62  func storeByType(t *types.Type) obj.As {
    63  	width := t.Size()
    64  	if t.IsFloat() {
    65  		switch width {
    66  		case 4:
    67  			return x86.AMOVSS
    68  		case 8:
    69  			return x86.AMOVSD
    70  		}
    71  	} else {
    72  		switch width {
    73  		case 1:
    74  			return x86.AMOVB
    75  		case 2:
    76  			return x86.AMOVW
    77  		case 4:
    78  			return x86.AMOVL
    79  		case 8:
    80  			return x86.AMOVQ
    81  		}
    82  	}
    83  	panic(fmt.Sprintf("bad store type %v", t))
    84  }
    85  
    86  // moveByType returns the reg->reg move instruction of the given type.
    87  func moveByType(t *types.Type) obj.As {
    88  	if t.IsFloat() {
    89  		// Moving the whole sse2 register is faster
    90  		// than moving just the correct low portion of it.
    91  		// There is no xmm->xmm move with 1 byte opcode,
    92  		// so use movups, which has 2 byte opcode.
    93  		return x86.AMOVUPS
    94  	} else {
    95  		switch t.Size() {
    96  		case 1:
    97  			// Avoids partial register write
    98  			return x86.AMOVL
    99  		case 2:
   100  			return x86.AMOVL
   101  		case 4:
   102  			return x86.AMOVL
   103  		case 8:
   104  			return x86.AMOVQ
   105  		case 16:
   106  			return x86.AMOVUPS // int128s are in SSE registers
   107  		default:
   108  			panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
   109  		}
   110  	}
   111  }
   112  
   113  // opregreg emits instructions for
   114  //     dest := dest(To) op src(From)
   115  // and also returns the created obj.Prog so it
   116  // may be further adjusted (offset, scale, etc).
   117  func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
   118  	p := s.Prog(op)
   119  	p.From.Type = obj.TYPE_REG
   120  	p.To.Type = obj.TYPE_REG
   121  	p.To.Reg = dest
   122  	p.From.Reg = src
   123  	return p
   124  }
   125  
   126  // memIdx fills out a as an indexed memory reference for v.
   127  // It assumes that the base register and the index register
   128  // are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
   129  // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
   130  func memIdx(a *obj.Addr, v *ssa.Value) {
   131  	r, i := v.Args[0].Reg(), v.Args[1].Reg()
   132  	a.Type = obj.TYPE_MEM
   133  	a.Scale = v.Op.Scale()
   134  	if a.Scale == 1 && i == x86.REG_SP {
   135  		r, i = i, r
   136  	}
   137  	a.Reg = r
   138  	a.Index = i
   139  }
   140  
   141  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
   142  // See runtime/mkduff.go.
   143  func duffStart(size int64) int64 {
   144  	x, _ := duff(size)
   145  	return x
   146  }
   147  func duffAdj(size int64) int64 {
   148  	_, x := duff(size)
   149  	return x
   150  }
   151  
   152  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   153  // required to use the duffzero mechanism for a block of the given size.
   154  func duff(size int64) (int64, int64) {
   155  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   156  		panic("bad duffzero size")
   157  	}
   158  	steps := size / dzClearStep
   159  	blocks := steps / dzBlockLen
   160  	steps %= dzBlockLen
   161  	off := dzBlockSize * (dzBlocks - blocks)
   162  	var adj int64
   163  	if steps != 0 {
   164  		off -= dzLeaqSize
   165  		off -= dzMovSize * steps
   166  		adj -= dzClearStep * (dzBlockLen - steps)
   167  	}
   168  	return off, adj
   169  }
   170  
   171  func getgFromTLS(s *ssagen.State, r int16) {
   172  	// See the comments in cmd/internal/obj/x86/obj6.go
   173  	// near CanUse1InsnTLS for a detailed explanation of these instructions.
   174  	if x86.CanUse1InsnTLS(base.Ctxt) {
   175  		// MOVQ (TLS), r
   176  		p := s.Prog(x86.AMOVQ)
   177  		p.From.Type = obj.TYPE_MEM
   178  		p.From.Reg = x86.REG_TLS
   179  		p.To.Type = obj.TYPE_REG
   180  		p.To.Reg = r
   181  	} else {
   182  		// MOVQ TLS, r
   183  		// MOVQ (r)(TLS*1), r
   184  		p := s.Prog(x86.AMOVQ)
   185  		p.From.Type = obj.TYPE_REG
   186  		p.From.Reg = x86.REG_TLS
   187  		p.To.Type = obj.TYPE_REG
   188  		p.To.Reg = r
   189  		q := s.Prog(x86.AMOVQ)
   190  		q.From.Type = obj.TYPE_MEM
   191  		q.From.Reg = r
   192  		q.From.Index = x86.REG_TLS
   193  		q.From.Scale = 1
   194  		q.To.Type = obj.TYPE_REG
   195  		q.To.Reg = r
   196  	}
   197  }
   198  
   199  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   200  	switch v.Op {
   201  	case ssa.OpAMD64VFMADD231SD:
   202  		p := s.Prog(v.Op.Asm())
   203  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
   204  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   205  		p.SetFrom3Reg(v.Args[1].Reg())
   206  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   207  		r := v.Reg()
   208  		r1 := v.Args[0].Reg()
   209  		r2 := v.Args[1].Reg()
   210  		switch {
   211  		case r == r1:
   212  			p := s.Prog(v.Op.Asm())
   213  			p.From.Type = obj.TYPE_REG
   214  			p.From.Reg = r2
   215  			p.To.Type = obj.TYPE_REG
   216  			p.To.Reg = r
   217  		case r == r2:
   218  			p := s.Prog(v.Op.Asm())
   219  			p.From.Type = obj.TYPE_REG
   220  			p.From.Reg = r1
   221  			p.To.Type = obj.TYPE_REG
   222  			p.To.Reg = r
   223  		default:
   224  			var asm obj.As
   225  			if v.Op == ssa.OpAMD64ADDQ {
   226  				asm = x86.ALEAQ
   227  			} else {
   228  				asm = x86.ALEAL
   229  			}
   230  			p := s.Prog(asm)
   231  			p.From.Type = obj.TYPE_MEM
   232  			p.From.Reg = r1
   233  			p.From.Scale = 1
   234  			p.From.Index = r2
   235  			p.To.Type = obj.TYPE_REG
   236  			p.To.Reg = r
   237  		}
   238  	// 2-address opcode arithmetic
   239  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   240  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   241  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   242  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   243  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   244  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   245  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   246  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   247  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   248  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   249  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   250  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   251  		ssa.OpAMD64PXOR,
   252  		ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
   253  		ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
   254  		ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
   255  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   256  
   257  	case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
   258  		p := s.Prog(v.Op.Asm())
   259  		lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
   260  		p.From.Type = obj.TYPE_REG
   261  		p.From.Reg = bits
   262  		p.To.Type = obj.TYPE_REG
   263  		p.To.Reg = lo
   264  		p.SetFrom3Reg(hi)
   265  
   266  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   267  		// Arg[0] (the dividend) is in AX.
   268  		// Arg[1] (the divisor) can be in any other register.
   269  		// Result[0] (the quotient) is in AX.
   270  		// Result[1] (the remainder) is in DX.
   271  		r := v.Args[1].Reg()
   272  
   273  		// Zero extend dividend.
   274  		c := s.Prog(x86.AXORL)
   275  		c.From.Type = obj.TYPE_REG
   276  		c.From.Reg = x86.REG_DX
   277  		c.To.Type = obj.TYPE_REG
   278  		c.To.Reg = x86.REG_DX
   279  
   280  		// Issue divide.
   281  		p := s.Prog(v.Op.Asm())
   282  		p.From.Type = obj.TYPE_REG
   283  		p.From.Reg = r
   284  
   285  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   286  		// Arg[0] (the dividend) is in AX.
   287  		// Arg[1] (the divisor) can be in any other register.
   288  		// Result[0] (the quotient) is in AX.
   289  		// Result[1] (the remainder) is in DX.
   290  		r := v.Args[1].Reg()
   291  		var j1 *obj.Prog
   292  
   293  		// CPU faults upon signed overflow, which occurs when the most
   294  		// negative int is divided by -1. Handle divide by -1 as a special case.
   295  		if ssa.DivisionNeedsFixUp(v) {
   296  			var c *obj.Prog
   297  			switch v.Op {
   298  			case ssa.OpAMD64DIVQ:
   299  				c = s.Prog(x86.ACMPQ)
   300  			case ssa.OpAMD64DIVL:
   301  				c = s.Prog(x86.ACMPL)
   302  			case ssa.OpAMD64DIVW:
   303  				c = s.Prog(x86.ACMPW)
   304  			}
   305  			c.From.Type = obj.TYPE_REG
   306  			c.From.Reg = r
   307  			c.To.Type = obj.TYPE_CONST
   308  			c.To.Offset = -1
   309  			j1 = s.Prog(x86.AJEQ)
   310  			j1.To.Type = obj.TYPE_BRANCH
   311  		}
   312  
   313  		// Sign extend dividend.
   314  		switch v.Op {
   315  		case ssa.OpAMD64DIVQ:
   316  			s.Prog(x86.ACQO)
   317  		case ssa.OpAMD64DIVL:
   318  			s.Prog(x86.ACDQ)
   319  		case ssa.OpAMD64DIVW:
   320  			s.Prog(x86.ACWD)
   321  		}
   322  
   323  		// Issue divide.
   324  		p := s.Prog(v.Op.Asm())
   325  		p.From.Type = obj.TYPE_REG
   326  		p.From.Reg = r
   327  
   328  		if j1 != nil {
   329  			// Skip over -1 fixup code.
   330  			j2 := s.Prog(obj.AJMP)
   331  			j2.To.Type = obj.TYPE_BRANCH
   332  
   333  			// Issue -1 fixup code.
   334  			// n / -1 = -n
   335  			var n1 *obj.Prog
   336  			switch v.Op {
   337  			case ssa.OpAMD64DIVQ:
   338  				n1 = s.Prog(x86.ANEGQ)
   339  			case ssa.OpAMD64DIVL:
   340  				n1 = s.Prog(x86.ANEGL)
   341  			case ssa.OpAMD64DIVW:
   342  				n1 = s.Prog(x86.ANEGW)
   343  			}
   344  			n1.To.Type = obj.TYPE_REG
   345  			n1.To.Reg = x86.REG_AX
   346  
   347  			// n % -1 == 0
   348  			n2 := s.Prog(x86.AXORL)
   349  			n2.From.Type = obj.TYPE_REG
   350  			n2.From.Reg = x86.REG_DX
   351  			n2.To.Type = obj.TYPE_REG
   352  			n2.To.Reg = x86.REG_DX
   353  
   354  			// TODO(khr): issue only the -1 fixup code we need.
   355  			// For instance, if only the quotient is used, no point in zeroing the remainder.
   356  
   357  			j1.To.SetTarget(n1)
   358  			j2.To.SetTarget(s.Pc())
   359  		}
   360  
   361  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   362  		// the frontend rewrites constant division by 8/16/32 bit integers into
   363  		// HMUL by a constant
   364  		// SSA rewrites generate the 64 bit versions
   365  
   366  		// Arg[0] is already in AX as it's the only register we allow
   367  		// and DX is the only output we care about (the high bits)
   368  		p := s.Prog(v.Op.Asm())
   369  		p.From.Type = obj.TYPE_REG
   370  		p.From.Reg = v.Args[1].Reg()
   371  
   372  		// IMULB puts the high portion in AH instead of DL,
   373  		// so move it to DL for consistency
   374  		if v.Type.Size() == 1 {
   375  			m := s.Prog(x86.AMOVB)
   376  			m.From.Type = obj.TYPE_REG
   377  			m.From.Reg = x86.REG_AH
   378  			m.To.Type = obj.TYPE_REG
   379  			m.To.Reg = x86.REG_DX
   380  		}
   381  
   382  	case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
   383  		// Arg[0] is already in AX as it's the only register we allow
   384  		// results lo in AX
   385  		p := s.Prog(v.Op.Asm())
   386  		p.From.Type = obj.TYPE_REG
   387  		p.From.Reg = v.Args[1].Reg()
   388  
   389  	case ssa.OpAMD64MULQU2:
   390  		// Arg[0] is already in AX as it's the only register we allow
   391  		// results hi in DX, lo in AX
   392  		p := s.Prog(v.Op.Asm())
   393  		p.From.Type = obj.TYPE_REG
   394  		p.From.Reg = v.Args[1].Reg()
   395  
   396  	case ssa.OpAMD64DIVQU2:
   397  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   398  		// results q in AX, r in DX
   399  		p := s.Prog(v.Op.Asm())
   400  		p.From.Type = obj.TYPE_REG
   401  		p.From.Reg = v.Args[2].Reg()
   402  
   403  	case ssa.OpAMD64AVGQU:
   404  		// compute (x+y)/2 unsigned.
   405  		// Do a 64-bit add, the overflow goes into the carry.
   406  		// Shift right once and pull the carry back into the 63rd bit.
   407  		p := s.Prog(x86.AADDQ)
   408  		p.From.Type = obj.TYPE_REG
   409  		p.To.Type = obj.TYPE_REG
   410  		p.To.Reg = v.Reg()
   411  		p.From.Reg = v.Args[1].Reg()
   412  		p = s.Prog(x86.ARCRQ)
   413  		p.From.Type = obj.TYPE_CONST
   414  		p.From.Offset = 1
   415  		p.To.Type = obj.TYPE_REG
   416  		p.To.Reg = v.Reg()
   417  
   418  	case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
   419  		r := v.Reg0()
   420  		r0 := v.Args[0].Reg()
   421  		r1 := v.Args[1].Reg()
   422  		switch r {
   423  		case r0:
   424  			p := s.Prog(v.Op.Asm())
   425  			p.From.Type = obj.TYPE_REG
   426  			p.From.Reg = r1
   427  			p.To.Type = obj.TYPE_REG
   428  			p.To.Reg = r
   429  		case r1:
   430  			p := s.Prog(v.Op.Asm())
   431  			p.From.Type = obj.TYPE_REG
   432  			p.From.Reg = r0
   433  			p.To.Type = obj.TYPE_REG
   434  			p.To.Reg = r
   435  		default:
   436  			v.Fatalf("output not in same register as an input %s", v.LongString())
   437  		}
   438  
   439  	case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
   440  		p := s.Prog(v.Op.Asm())
   441  		p.From.Type = obj.TYPE_REG
   442  		p.From.Reg = v.Args[1].Reg()
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = v.Reg0()
   445  
   446  	case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
   447  		p := s.Prog(v.Op.Asm())
   448  		p.From.Type = obj.TYPE_CONST
   449  		p.From.Offset = v.AuxInt
   450  		p.To.Type = obj.TYPE_REG
   451  		p.To.Reg = v.Reg0()
   452  
   453  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   454  		r := v.Reg()
   455  		a := v.Args[0].Reg()
   456  		if r == a {
   457  			switch v.AuxInt {
   458  			case 1:
   459  				var asm obj.As
   460  				// Software optimization manual recommends add $1,reg.
   461  				// But inc/dec is 1 byte smaller. ICC always uses inc
   462  				// Clang/GCC choose depending on flags, but prefer add.
   463  				// Experiments show that inc/dec is both a little faster
   464  				// and make a binary a little smaller.
   465  				if v.Op == ssa.OpAMD64ADDQconst {
   466  					asm = x86.AINCQ
   467  				} else {
   468  					asm = x86.AINCL
   469  				}
   470  				p := s.Prog(asm)
   471  				p.To.Type = obj.TYPE_REG
   472  				p.To.Reg = r
   473  				return
   474  			case -1:
   475  				var asm obj.As
   476  				if v.Op == ssa.OpAMD64ADDQconst {
   477  					asm = x86.ADECQ
   478  				} else {
   479  					asm = x86.ADECL
   480  				}
   481  				p := s.Prog(asm)
   482  				p.To.Type = obj.TYPE_REG
   483  				p.To.Reg = r
   484  				return
   485  			case 0x80:
   486  				// 'SUBQ $-0x80, r' is shorter to encode than
   487  				// and functionally equivalent to 'ADDQ $0x80, r'.
   488  				asm := x86.ASUBL
   489  				if v.Op == ssa.OpAMD64ADDQconst {
   490  					asm = x86.ASUBQ
   491  				}
   492  				p := s.Prog(asm)
   493  				p.From.Type = obj.TYPE_CONST
   494  				p.From.Offset = -0x80
   495  				p.To.Type = obj.TYPE_REG
   496  				p.To.Reg = r
   497  				return
   498  
   499  			}
   500  			p := s.Prog(v.Op.Asm())
   501  			p.From.Type = obj.TYPE_CONST
   502  			p.From.Offset = v.AuxInt
   503  			p.To.Type = obj.TYPE_REG
   504  			p.To.Reg = r
   505  			return
   506  		}
   507  		var asm obj.As
   508  		if v.Op == ssa.OpAMD64ADDQconst {
   509  			asm = x86.ALEAQ
   510  		} else {
   511  			asm = x86.ALEAL
   512  		}
   513  		p := s.Prog(asm)
   514  		p.From.Type = obj.TYPE_MEM
   515  		p.From.Reg = a
   516  		p.From.Offset = v.AuxInt
   517  		p.To.Type = obj.TYPE_REG
   518  		p.To.Reg = r
   519  
   520  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
   521  		ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
   522  		ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
   523  		ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
   524  		ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
   525  		ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
   526  		ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
   527  		ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
   528  		ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
   529  		ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
   530  		ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
   531  		ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
   532  		p := s.Prog(v.Op.Asm())
   533  		p.From.Type = obj.TYPE_REG
   534  		p.From.Reg = v.Args[1].Reg()
   535  		p.To.Type = obj.TYPE_REG
   536  		p.To.Reg = v.Reg()
   537  
   538  	case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
   539  		// Flag condition: ^ZERO || PARITY
   540  		// Generate:
   541  		//   CMOV*NE  SRC,DST
   542  		//   CMOV*PS  SRC,DST
   543  		p := s.Prog(v.Op.Asm())
   544  		p.From.Type = obj.TYPE_REG
   545  		p.From.Reg = v.Args[1].Reg()
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = v.Reg()
   548  		var q *obj.Prog
   549  		if v.Op == ssa.OpAMD64CMOVQNEF {
   550  			q = s.Prog(x86.ACMOVQPS)
   551  		} else if v.Op == ssa.OpAMD64CMOVLNEF {
   552  			q = s.Prog(x86.ACMOVLPS)
   553  		} else {
   554  			q = s.Prog(x86.ACMOVWPS)
   555  		}
   556  		q.From.Type = obj.TYPE_REG
   557  		q.From.Reg = v.Args[1].Reg()
   558  		q.To.Type = obj.TYPE_REG
   559  		q.To.Reg = v.Reg()
   560  
   561  	case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
   562  		// Flag condition: ZERO && !PARITY
   563  		// Generate:
   564  		//   MOV      SRC,AX
   565  		//   CMOV*NE  DST,AX
   566  		//   CMOV*PC  AX,DST
   567  		//
   568  		// TODO(rasky): we could generate:
   569  		//   CMOV*NE  DST,SRC
   570  		//   CMOV*PC  SRC,DST
   571  		// But this requires a way for regalloc to know that SRC might be
   572  		// clobbered by this instruction.
   573  		if v.Args[1].Reg() != x86.REG_AX {
   574  			opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
   575  		}
   576  		p := s.Prog(v.Op.Asm())
   577  		p.From.Type = obj.TYPE_REG
   578  		p.From.Reg = v.Reg()
   579  		p.To.Type = obj.TYPE_REG
   580  		p.To.Reg = x86.REG_AX
   581  		var q *obj.Prog
   582  		if v.Op == ssa.OpAMD64CMOVQEQF {
   583  			q = s.Prog(x86.ACMOVQPC)
   584  		} else if v.Op == ssa.OpAMD64CMOVLEQF {
   585  			q = s.Prog(x86.ACMOVLPC)
   586  		} else {
   587  			q = s.Prog(x86.ACMOVWPC)
   588  		}
   589  		q.From.Type = obj.TYPE_REG
   590  		q.From.Reg = x86.REG_AX
   591  		q.To.Type = obj.TYPE_REG
   592  		q.To.Reg = v.Reg()
   593  
   594  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   595  		r := v.Reg()
   596  		p := s.Prog(v.Op.Asm())
   597  		p.From.Type = obj.TYPE_CONST
   598  		p.From.Offset = v.AuxInt
   599  		p.To.Type = obj.TYPE_REG
   600  		p.To.Reg = r
   601  		p.SetFrom3Reg(v.Args[0].Reg())
   602  
   603  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   604  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   605  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   606  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   607  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   608  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   609  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   610  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   611  		p := s.Prog(v.Op.Asm())
   612  		p.From.Type = obj.TYPE_CONST
   613  		p.From.Offset = v.AuxInt
   614  		p.To.Type = obj.TYPE_REG
   615  		p.To.Reg = v.Reg()
   616  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   617  		r := v.Reg()
   618  		p := s.Prog(v.Op.Asm())
   619  		p.From.Type = obj.TYPE_REG
   620  		p.From.Reg = r
   621  		p.To.Type = obj.TYPE_REG
   622  		p.To.Reg = r
   623  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
   624  		ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
   625  		ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   626  		p := s.Prog(v.Op.Asm())
   627  		memIdx(&p.From, v)
   628  		o := v.Reg()
   629  		p.To.Type = obj.TYPE_REG
   630  		p.To.Reg = o
   631  		if v.AuxInt != 0 && v.Aux == nil {
   632  			// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
   633  			switch v.Op {
   634  			case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   635  				p = s.Prog(x86.ALEAQ)
   636  			case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
   637  				p = s.Prog(x86.ALEAL)
   638  			case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   639  				p = s.Prog(x86.ALEAW)
   640  			}
   641  			p.From.Type = obj.TYPE_MEM
   642  			p.From.Reg = o
   643  			p.To.Type = obj.TYPE_REG
   644  			p.To.Reg = o
   645  		}
   646  		ssagen.AddAux(&p.From, v)
   647  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
   648  		p := s.Prog(v.Op.Asm())
   649  		p.From.Type = obj.TYPE_MEM
   650  		p.From.Reg = v.Args[0].Reg()
   651  		ssagen.AddAux(&p.From, v)
   652  		p.To.Type = obj.TYPE_REG
   653  		p.To.Reg = v.Reg()
   654  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   655  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   656  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   657  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   658  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   659  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   660  		// must account for that right here.
   661  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   662  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   663  		p := s.Prog(v.Op.Asm())
   664  		p.From.Type = obj.TYPE_REG
   665  		p.From.Reg = v.Args[0].Reg()
   666  		p.To.Type = obj.TYPE_CONST
   667  		p.To.Offset = v.AuxInt
   668  	case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
   669  		ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   670  		ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
   671  		ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
   672  		ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
   673  		op := v.Op
   674  		if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
   675  			// Emit 32-bit version because it's shorter
   676  			op = ssa.OpAMD64BTLconst
   677  		}
   678  		p := s.Prog(op.Asm())
   679  		p.From.Type = obj.TYPE_CONST
   680  		p.From.Offset = v.AuxInt
   681  		p.To.Type = obj.TYPE_REG
   682  		p.To.Reg = v.Args[0].Reg()
   683  	case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
   684  		p := s.Prog(v.Op.Asm())
   685  		p.From.Type = obj.TYPE_MEM
   686  		p.From.Reg = v.Args[0].Reg()
   687  		ssagen.AddAux(&p.From, v)
   688  		p.To.Type = obj.TYPE_REG
   689  		p.To.Reg = v.Args[1].Reg()
   690  	case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
   691  		sc := v.AuxValAndOff()
   692  		p := s.Prog(v.Op.Asm())
   693  		p.From.Type = obj.TYPE_MEM
   694  		p.From.Reg = v.Args[0].Reg()
   695  		ssagen.AddAux2(&p.From, v, sc.Off64())
   696  		p.To.Type = obj.TYPE_CONST
   697  		p.To.Offset = sc.Val64()
   698  	case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
   699  		p := s.Prog(v.Op.Asm())
   700  		memIdx(&p.From, v)
   701  		ssagen.AddAux(&p.From, v)
   702  		p.To.Type = obj.TYPE_REG
   703  		p.To.Reg = v.Args[2].Reg()
   704  	case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
   705  		sc := v.AuxValAndOff()
   706  		p := s.Prog(v.Op.Asm())
   707  		memIdx(&p.From, v)
   708  		ssagen.AddAux2(&p.From, v, sc.Off64())
   709  		p.To.Type = obj.TYPE_CONST
   710  		p.To.Offset = sc.Val64()
   711  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   712  		x := v.Reg()
   713  
   714  		// If flags aren't live (indicated by v.Aux == nil),
   715  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   716  		if v.AuxInt == 0 && v.Aux == nil {
   717  			p := s.Prog(x86.AXORL)
   718  			p.From.Type = obj.TYPE_REG
   719  			p.From.Reg = x
   720  			p.To.Type = obj.TYPE_REG
   721  			p.To.Reg = x
   722  			break
   723  		}
   724  
   725  		asm := v.Op.Asm()
   726  		// Use MOVL to move a small constant into a register
   727  		// when the constant is positive and fits into 32 bits.
   728  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   729  			// The upper 32bit are zeroed automatically when using MOVL.
   730  			asm = x86.AMOVL
   731  		}
   732  		p := s.Prog(asm)
   733  		p.From.Type = obj.TYPE_CONST
   734  		p.From.Offset = v.AuxInt
   735  		p.To.Type = obj.TYPE_REG
   736  		p.To.Reg = x
   737  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   738  		x := v.Reg()
   739  		p := s.Prog(v.Op.Asm())
   740  		p.From.Type = obj.TYPE_FCONST
   741  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   742  		p.To.Type = obj.TYPE_REG
   743  		p.To.Reg = x
   744  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   745  		p := s.Prog(v.Op.Asm())
   746  		p.From.Type = obj.TYPE_MEM
   747  		p.From.Reg = v.Args[0].Reg()
   748  		ssagen.AddAux(&p.From, v)
   749  		p.To.Type = obj.TYPE_REG
   750  		p.To.Reg = v.Reg()
   751  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
   752  		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2:
   753  		p := s.Prog(v.Op.Asm())
   754  		memIdx(&p.From, v)
   755  		ssagen.AddAux(&p.From, v)
   756  		p.To.Type = obj.TYPE_REG
   757  		p.To.Reg = v.Reg()
   758  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
   759  		ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
   760  		ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
   761  		p := s.Prog(v.Op.Asm())
   762  		p.From.Type = obj.TYPE_REG
   763  		p.From.Reg = v.Args[1].Reg()
   764  		p.To.Type = obj.TYPE_MEM
   765  		p.To.Reg = v.Args[0].Reg()
   766  		ssagen.AddAux(&p.To, v)
   767  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
   768  		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
   769  		ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
   770  		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
   771  		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
   772  		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
   773  		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8:
   774  		p := s.Prog(v.Op.Asm())
   775  		p.From.Type = obj.TYPE_REG
   776  		p.From.Reg = v.Args[2].Reg()
   777  		memIdx(&p.To, v)
   778  		ssagen.AddAux(&p.To, v)
   779  	case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
   780  		sc := v.AuxValAndOff()
   781  		off := sc.Off64()
   782  		val := sc.Val()
   783  		if val == 1 || val == -1 {
   784  			var asm obj.As
   785  			if v.Op == ssa.OpAMD64ADDQconstmodify {
   786  				if val == 1 {
   787  					asm = x86.AINCQ
   788  				} else {
   789  					asm = x86.ADECQ
   790  				}
   791  			} else {
   792  				if val == 1 {
   793  					asm = x86.AINCL
   794  				} else {
   795  					asm = x86.ADECL
   796  				}
   797  			}
   798  			p := s.Prog(asm)
   799  			p.To.Type = obj.TYPE_MEM
   800  			p.To.Reg = v.Args[0].Reg()
   801  			ssagen.AddAux2(&p.To, v, off)
   802  			break
   803  		}
   804  		fallthrough
   805  	case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
   806  		ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
   807  		sc := v.AuxValAndOff()
   808  		off := sc.Off64()
   809  		val := sc.Val64()
   810  		p := s.Prog(v.Op.Asm())
   811  		p.From.Type = obj.TYPE_CONST
   812  		p.From.Offset = val
   813  		p.To.Type = obj.TYPE_MEM
   814  		p.To.Reg = v.Args[0].Reg()
   815  		ssagen.AddAux2(&p.To, v, off)
   816  
   817  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   818  		p := s.Prog(v.Op.Asm())
   819  		p.From.Type = obj.TYPE_CONST
   820  		sc := v.AuxValAndOff()
   821  		p.From.Offset = sc.Val64()
   822  		p.To.Type = obj.TYPE_MEM
   823  		p.To.Reg = v.Args[0].Reg()
   824  		ssagen.AddAux2(&p.To, v, sc.Off64())
   825  	case ssa.OpAMD64MOVOstorezero:
   826  		if !buildcfg.Experiment.RegabiG || s.ABI != obj.ABIInternal {
   827  			// zero X15 manually
   828  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   829  		}
   830  		p := s.Prog(v.Op.Asm())
   831  		p.From.Type = obj.TYPE_REG
   832  		p.From.Reg = x86.REG_X15
   833  		p.To.Type = obj.TYPE_MEM
   834  		p.To.Reg = v.Args[0].Reg()
   835  		ssagen.AddAux(&p.To, v)
   836  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
   837  		ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
   838  		ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
   839  		ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
   840  		ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
   841  		p := s.Prog(v.Op.Asm())
   842  		p.From.Type = obj.TYPE_CONST
   843  		sc := v.AuxValAndOff()
   844  		p.From.Offset = sc.Val64()
   845  		switch {
   846  		case p.As == x86.AADDQ && p.From.Offset == 1:
   847  			p.As = x86.AINCQ
   848  			p.From.Type = obj.TYPE_NONE
   849  		case p.As == x86.AADDQ && p.From.Offset == -1:
   850  			p.As = x86.ADECQ
   851  			p.From.Type = obj.TYPE_NONE
   852  		case p.As == x86.AADDL && p.From.Offset == 1:
   853  			p.As = x86.AINCL
   854  			p.From.Type = obj.TYPE_NONE
   855  		case p.As == x86.AADDL && p.From.Offset == -1:
   856  			p.As = x86.ADECL
   857  			p.From.Type = obj.TYPE_NONE
   858  		}
   859  		memIdx(&p.To, v)
   860  		ssagen.AddAux2(&p.To, v, sc.Off64())
   861  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   862  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   863  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   864  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   865  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   866  		r := v.Reg()
   867  		// Break false dependency on destination register.
   868  		opregreg(s, x86.AXORPS, r, r)
   869  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   870  	case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   871  		var p *obj.Prog
   872  		switch v.Op {
   873  		case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
   874  			p = s.Prog(x86.AMOVQ)
   875  		case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   876  			p = s.Prog(x86.AMOVL)
   877  		}
   878  		p.From.Type = obj.TYPE_REG
   879  		p.From.Reg = v.Args[0].Reg()
   880  		p.To.Type = obj.TYPE_REG
   881  		p.To.Reg = v.Reg()
   882  	case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
   883  		ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
   884  		ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
   885  		ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
   886  		ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
   887  		p := s.Prog(v.Op.Asm())
   888  		p.From.Type = obj.TYPE_MEM
   889  		p.From.Reg = v.Args[1].Reg()
   890  		ssagen.AddAux(&p.From, v)
   891  		p.To.Type = obj.TYPE_REG
   892  		p.To.Reg = v.Reg()
   893  	case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
   894  		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
   895  		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
   896  		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
   897  		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
   898  		ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
   899  		ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
   900  		ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
   901  		ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
   902  		p := s.Prog(v.Op.Asm())
   903  
   904  		r, i := v.Args[1].Reg(), v.Args[2].Reg()
   905  		p.From.Type = obj.TYPE_MEM
   906  		p.From.Scale = v.Op.Scale()
   907  		if p.From.Scale == 1 && i == x86.REG_SP {
   908  			r, i = i, r
   909  		}
   910  		p.From.Reg = r
   911  		p.From.Index = i
   912  
   913  		ssagen.AddAux(&p.From, v)
   914  		p.To.Type = obj.TYPE_REG
   915  		p.To.Reg = v.Reg()
   916  	case ssa.OpAMD64DUFFZERO:
   917  		if !buildcfg.Experiment.RegabiG || s.ABI != obj.ABIInternal {
   918  			// zero X15 manually
   919  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   920  		}
   921  		off := duffStart(v.AuxInt)
   922  		adj := duffAdj(v.AuxInt)
   923  		var p *obj.Prog
   924  		if adj != 0 {
   925  			p = s.Prog(x86.ALEAQ)
   926  			p.From.Type = obj.TYPE_MEM
   927  			p.From.Offset = adj
   928  			p.From.Reg = x86.REG_DI
   929  			p.To.Type = obj.TYPE_REG
   930  			p.To.Reg = x86.REG_DI
   931  		}
   932  		p = s.Prog(obj.ADUFFZERO)
   933  		p.To.Type = obj.TYPE_ADDR
   934  		p.To.Sym = ir.Syms.Duffzero
   935  		p.To.Offset = off
   936  	case ssa.OpAMD64DUFFCOPY:
   937  		p := s.Prog(obj.ADUFFCOPY)
   938  		p.To.Type = obj.TYPE_ADDR
   939  		p.To.Sym = ir.Syms.Duffcopy
   940  		if v.AuxInt%16 != 0 {
   941  			v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
   942  		}
   943  		p.To.Offset = 14 * (64 - v.AuxInt/16)
   944  		// 14 and 64 are magic constants.  14 is the number of bytes to encode:
   945  		//	MOVUPS	(SI), X0
   946  		//	ADDQ	$16, SI
   947  		//	MOVUPS	X0, (DI)
   948  		//	ADDQ	$16, DI
   949  		// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
   950  
   951  	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   952  		if v.Type.IsMemory() {
   953  			return
   954  		}
   955  		x := v.Args[0].Reg()
   956  		y := v.Reg()
   957  		if x != y {
   958  			opregreg(s, moveByType(v.Type), y, x)
   959  		}
   960  	case ssa.OpLoadReg:
   961  		if v.Type.IsFlags() {
   962  			v.Fatalf("load flags not implemented: %v", v.LongString())
   963  			return
   964  		}
   965  		p := s.Prog(loadByType(v.Type))
   966  		ssagen.AddrAuto(&p.From, v.Args[0])
   967  		p.To.Type = obj.TYPE_REG
   968  		p.To.Reg = v.Reg()
   969  
   970  	case ssa.OpStoreReg:
   971  		if v.Type.IsFlags() {
   972  			v.Fatalf("store flags not implemented: %v", v.LongString())
   973  			return
   974  		}
   975  		p := s.Prog(storeByType(v.Type))
   976  		p.From.Type = obj.TYPE_REG
   977  		p.From.Reg = v.Args[0].Reg()
   978  		ssagen.AddrAuto(&p.To, v)
   979  	case ssa.OpAMD64LoweredHasCPUFeature:
   980  		p := s.Prog(x86.AMOVBQZX)
   981  		p.From.Type = obj.TYPE_MEM
   982  		ssagen.AddAux(&p.From, v)
   983  		p.To.Type = obj.TYPE_REG
   984  		p.To.Reg = v.Reg()
   985  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   986  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   987  		// The loop only runs once.
   988  		for _, ap := range v.Block.Func.RegArgs {
   989  			// Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack.
   990  			addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
   991  			s.FuncInfo().AddSpill(
   992  				obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)})
   993  		}
   994  		v.Block.Func.RegArgs = nil
   995  		ssagen.CheckArgReg(v)
   996  	case ssa.OpAMD64LoweredGetClosurePtr:
   997  		// Closure pointer is DX.
   998  		ssagen.CheckLoweredGetClosurePtr(v)
   999  	case ssa.OpAMD64LoweredGetG:
  1000  		if buildcfg.Experiment.RegabiG && s.ABI == obj.ABIInternal {
  1001  			v.Fatalf("LoweredGetG should not appear in ABIInternal")
  1002  		}
  1003  		r := v.Reg()
  1004  		getgFromTLS(s, r)
  1005  	case ssa.OpAMD64CALLstatic:
  1006  		if buildcfg.Experiment.RegabiG && s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
  1007  			// zeroing X15 when entering ABIInternal from ABI0
  1008  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1009  			// set G register from TLS
  1010  			getgFromTLS(s, x86.REG_R14)
  1011  		}
  1012  		s.Call(v)
  1013  		if buildcfg.Experiment.RegabiG && s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
  1014  			// zeroing X15 when entering ABIInternal from ABI0
  1015  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1016  			// set G register from TLS
  1017  			getgFromTLS(s, x86.REG_R14)
  1018  		}
  1019  	case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
  1020  		s.Call(v)
  1021  
  1022  	case ssa.OpAMD64LoweredGetCallerPC:
  1023  		p := s.Prog(x86.AMOVQ)
  1024  		p.From.Type = obj.TYPE_MEM
  1025  		p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
  1026  		p.From.Name = obj.NAME_PARAM
  1027  		p.To.Type = obj.TYPE_REG
  1028  		p.To.Reg = v.Reg()
  1029  
  1030  	case ssa.OpAMD64LoweredGetCallerSP:
  1031  		// caller's SP is the address of the first arg
  1032  		mov := x86.AMOVQ
  1033  		if types.PtrSize == 4 {
  1034  			mov = x86.AMOVL
  1035  		}
  1036  		p := s.Prog(mov)
  1037  		p.From.Type = obj.TYPE_ADDR
  1038  		p.From.Offset = -base.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures
  1039  		p.From.Name = obj.NAME_PARAM
  1040  		p.To.Type = obj.TYPE_REG
  1041  		p.To.Reg = v.Reg()
  1042  
  1043  	case ssa.OpAMD64LoweredWB:
  1044  		p := s.Prog(obj.ACALL)
  1045  		p.To.Type = obj.TYPE_MEM
  1046  		p.To.Name = obj.NAME_EXTERN
  1047  		// arg0 is in DI. Set sym to match where regalloc put arg1.
  1048  		p.To.Sym = ssagen.GCWriteBarrierReg[v.Args[1].Reg()]
  1049  
  1050  	case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
  1051  		p := s.Prog(obj.ACALL)
  1052  		p.To.Type = obj.TYPE_MEM
  1053  		p.To.Name = obj.NAME_EXTERN
  1054  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1055  		s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs
  1056  
  1057  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
  1058  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
  1059  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
  1060  		p := s.Prog(v.Op.Asm())
  1061  		p.To.Type = obj.TYPE_REG
  1062  		p.To.Reg = v.Reg()
  1063  
  1064  	case ssa.OpAMD64NEGLflags:
  1065  		p := s.Prog(v.Op.Asm())
  1066  		p.To.Type = obj.TYPE_REG
  1067  		p.To.Reg = v.Reg0()
  1068  
  1069  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
  1070  		p := s.Prog(v.Op.Asm())
  1071  		p.From.Type = obj.TYPE_REG
  1072  		p.From.Reg = v.Args[0].Reg()
  1073  		p.To.Type = obj.TYPE_REG
  1074  		switch v.Op {
  1075  		case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
  1076  			p.To.Reg = v.Reg0()
  1077  		case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
  1078  			p.To.Reg = v.Reg()
  1079  		}
  1080  	case ssa.OpAMD64ROUNDSD:
  1081  		p := s.Prog(v.Op.Asm())
  1082  		val := v.AuxInt
  1083  		// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
  1084  		if val < 0 || val > 3 {
  1085  			v.Fatalf("Invalid rounding mode")
  1086  		}
  1087  		p.From.Offset = val
  1088  		p.From.Type = obj.TYPE_CONST
  1089  		p.SetFrom3Reg(v.Args[0].Reg())
  1090  		p.To.Type = obj.TYPE_REG
  1091  		p.To.Reg = v.Reg()
  1092  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
  1093  		if v.Args[0].Reg() != v.Reg() {
  1094  			// POPCNT on Intel has a false dependency on the destination register.
  1095  			// Xor register with itself to break the dependency.
  1096  			p := s.Prog(x86.AXORQ)
  1097  			p.From.Type = obj.TYPE_REG
  1098  			p.From.Reg = v.Reg()
  1099  			p.To.Type = obj.TYPE_REG
  1100  			p.To.Reg = v.Reg()
  1101  		}
  1102  		p := s.Prog(v.Op.Asm())
  1103  		p.From.Type = obj.TYPE_REG
  1104  		p.From.Reg = v.Args[0].Reg()
  1105  		p.To.Type = obj.TYPE_REG
  1106  		p.To.Reg = v.Reg()
  1107  
  1108  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
  1109  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
  1110  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
  1111  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
  1112  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
  1113  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
  1114  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
  1115  		ssa.OpAMD64SETO:
  1116  		p := s.Prog(v.Op.Asm())
  1117  		p.To.Type = obj.TYPE_REG
  1118  		p.To.Reg = v.Reg()
  1119  
  1120  	case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
  1121  		ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
  1122  		ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
  1123  		ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
  1124  		ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
  1125  		p := s.Prog(v.Op.Asm())
  1126  		p.To.Type = obj.TYPE_MEM
  1127  		p.To.Reg = v.Args[0].Reg()
  1128  		ssagen.AddAux(&p.To, v)
  1129  
  1130  	case ssa.OpAMD64SETNEF:
  1131  		p := s.Prog(v.Op.Asm())
  1132  		p.To.Type = obj.TYPE_REG
  1133  		p.To.Reg = v.Reg()
  1134  		q := s.Prog(x86.ASETPS)
  1135  		q.To.Type = obj.TYPE_REG
  1136  		q.To.Reg = x86.REG_AX
  1137  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
  1138  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
  1139  
  1140  	case ssa.OpAMD64SETEQF:
  1141  		p := s.Prog(v.Op.Asm())
  1142  		p.To.Type = obj.TYPE_REG
  1143  		p.To.Reg = v.Reg()
  1144  		q := s.Prog(x86.ASETPC)
  1145  		q.To.Type = obj.TYPE_REG
  1146  		q.To.Reg = x86.REG_AX
  1147  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
  1148  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
  1149  
  1150  	case ssa.OpAMD64InvertFlags:
  1151  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1152  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
  1153  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1154  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
  1155  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
  1156  	case ssa.OpAMD64REPSTOSQ:
  1157  		s.Prog(x86.AREP)
  1158  		s.Prog(x86.ASTOSQ)
  1159  	case ssa.OpAMD64REPMOVSQ:
  1160  		s.Prog(x86.AREP)
  1161  		s.Prog(x86.AMOVSQ)
  1162  	case ssa.OpAMD64LoweredNilCheck:
  1163  		// Issue a load which will fault if the input is nil.
  1164  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
  1165  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
  1166  		// but it doesn't have false dependency on AX.
  1167  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
  1168  		// That trades clobbering flags for clobbering a register.
  1169  		p := s.Prog(x86.ATESTB)
  1170  		p.From.Type = obj.TYPE_REG
  1171  		p.From.Reg = x86.REG_AX
  1172  		p.To.Type = obj.TYPE_MEM
  1173  		p.To.Reg = v.Args[0].Reg()
  1174  		if logopt.Enabled() {
  1175  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1176  		}
  1177  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1178  			base.WarnfAt(v.Pos, "generated nil check")
  1179  		}
  1180  	case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
  1181  		p := s.Prog(v.Op.Asm())
  1182  		p.From.Type = obj.TYPE_MEM
  1183  		p.From.Reg = v.Args[0].Reg()
  1184  		ssagen.AddAux(&p.From, v)
  1185  		p.To.Type = obj.TYPE_REG
  1186  		p.To.Reg = v.Reg0()
  1187  	case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
  1188  		p := s.Prog(v.Op.Asm())
  1189  		p.From.Type = obj.TYPE_REG
  1190  		p.From.Reg = v.Reg0()
  1191  		p.To.Type = obj.TYPE_MEM
  1192  		p.To.Reg = v.Args[1].Reg()
  1193  		ssagen.AddAux(&p.To, v)
  1194  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
  1195  		s.Prog(x86.ALOCK)
  1196  		p := s.Prog(v.Op.Asm())
  1197  		p.From.Type = obj.TYPE_REG
  1198  		p.From.Reg = v.Reg0()
  1199  		p.To.Type = obj.TYPE_MEM
  1200  		p.To.Reg = v.Args[1].Reg()
  1201  		ssagen.AddAux(&p.To, v)
  1202  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
  1203  		if v.Args[1].Reg() != x86.REG_AX {
  1204  			v.Fatalf("input[1] not in AX %s", v.LongString())
  1205  		}
  1206  		s.Prog(x86.ALOCK)
  1207  		p := s.Prog(v.Op.Asm())
  1208  		p.From.Type = obj.TYPE_REG
  1209  		p.From.Reg = v.Args[2].Reg()
  1210  		p.To.Type = obj.TYPE_MEM
  1211  		p.To.Reg = v.Args[0].Reg()
  1212  		ssagen.AddAux(&p.To, v)
  1213  		p = s.Prog(x86.ASETEQ)
  1214  		p.To.Type = obj.TYPE_REG
  1215  		p.To.Reg = v.Reg0()
  1216  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
  1217  		s.Prog(x86.ALOCK)
  1218  		p := s.Prog(v.Op.Asm())
  1219  		p.From.Type = obj.TYPE_REG
  1220  		p.From.Reg = v.Args[1].Reg()
  1221  		p.To.Type = obj.TYPE_MEM
  1222  		p.To.Reg = v.Args[0].Reg()
  1223  		ssagen.AddAux(&p.To, v)
  1224  	case ssa.OpClobber:
  1225  		p := s.Prog(x86.AMOVL)
  1226  		p.From.Type = obj.TYPE_CONST
  1227  		p.From.Offset = 0xdeaddead
  1228  		p.To.Type = obj.TYPE_MEM
  1229  		p.To.Reg = x86.REG_SP
  1230  		ssagen.AddAux(&p.To, v)
  1231  		p = s.Prog(x86.AMOVL)
  1232  		p.From.Type = obj.TYPE_CONST
  1233  		p.From.Offset = 0xdeaddead
  1234  		p.To.Type = obj.TYPE_MEM
  1235  		p.To.Reg = x86.REG_SP
  1236  		ssagen.AddAux(&p.To, v)
  1237  		p.To.Offset += 4
  1238  	case ssa.OpClobberReg:
  1239  		x := uint64(0xdeaddeaddeaddead)
  1240  		p := s.Prog(x86.AMOVQ)
  1241  		p.From.Type = obj.TYPE_CONST
  1242  		p.From.Offset = int64(x)
  1243  		p.To.Type = obj.TYPE_REG
  1244  		p.To.Reg = v.Reg()
  1245  	default:
  1246  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1247  	}
  1248  }
  1249  
  1250  var blockJump = [...]struct {
  1251  	asm, invasm obj.As
  1252  }{
  1253  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
  1254  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
  1255  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
  1256  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
  1257  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
  1258  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
  1259  	ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
  1260  	ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
  1261  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
  1262  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
  1263  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
  1264  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
  1265  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
  1266  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
  1267  }
  1268  
  1269  var eqfJumps = [2][2]ssagen.IndexJump{
  1270  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
  1271  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
  1272  }
  1273  var nefJumps = [2][2]ssagen.IndexJump{
  1274  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
  1275  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
  1276  }
  1277  
  1278  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1279  	switch b.Kind {
  1280  	case ssa.BlockPlain:
  1281  		if b.Succs[0].Block() != next {
  1282  			p := s.Prog(obj.AJMP)
  1283  			p.To.Type = obj.TYPE_BRANCH
  1284  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1285  		}
  1286  	case ssa.BlockDefer:
  1287  		// defer returns in rax:
  1288  		// 0 if we should continue executing
  1289  		// 1 if we should jump to deferreturn call
  1290  		p := s.Prog(x86.ATESTL)
  1291  		p.From.Type = obj.TYPE_REG
  1292  		p.From.Reg = x86.REG_AX
  1293  		p.To.Type = obj.TYPE_REG
  1294  		p.To.Reg = x86.REG_AX
  1295  		p = s.Prog(x86.AJNE)
  1296  		p.To.Type = obj.TYPE_BRANCH
  1297  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  1298  		if b.Succs[0].Block() != next {
  1299  			p := s.Prog(obj.AJMP)
  1300  			p.To.Type = obj.TYPE_BRANCH
  1301  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1302  		}
  1303  	case ssa.BlockExit:
  1304  	case ssa.BlockRet:
  1305  		s.Prog(obj.ARET)
  1306  	case ssa.BlockRetJmp:
  1307  		if buildcfg.Experiment.RegabiG && s.ABI == obj.ABI0 && b.Aux.(*obj.LSym).ABI() == obj.ABIInternal {
  1308  			// zeroing X15 when entering ABIInternal from ABI0
  1309  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1310  			// set G register from TLS
  1311  			getgFromTLS(s, x86.REG_R14)
  1312  		}
  1313  		p := s.Prog(obj.ARET)
  1314  		p.To.Type = obj.TYPE_MEM
  1315  		p.To.Name = obj.NAME_EXTERN
  1316  		p.To.Sym = b.Aux.(*obj.LSym)
  1317  
  1318  	case ssa.BlockAMD64EQF:
  1319  		s.CombJump(b, next, &eqfJumps)
  1320  
  1321  	case ssa.BlockAMD64NEF:
  1322  		s.CombJump(b, next, &nefJumps)
  1323  
  1324  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1325  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1326  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1327  		ssa.BlockAMD64OS, ssa.BlockAMD64OC,
  1328  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1329  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1330  		jmp := blockJump[b.Kind]
  1331  		switch next {
  1332  		case b.Succs[0].Block():
  1333  			s.Br(jmp.invasm, b.Succs[1].Block())
  1334  		case b.Succs[1].Block():
  1335  			s.Br(jmp.asm, b.Succs[0].Block())
  1336  		default:
  1337  			if b.Likely != ssa.BranchUnlikely {
  1338  				s.Br(jmp.asm, b.Succs[0].Block())
  1339  				s.Br(obj.AJMP, b.Succs[1].Block())
  1340  			} else {
  1341  				s.Br(jmp.invasm, b.Succs[1].Block())
  1342  				s.Br(obj.AJMP, b.Succs[0].Block())
  1343  			}
  1344  		}
  1345  
  1346  	default:
  1347  		b.Fatalf("branch not implemented: %s", b.LongString())
  1348  	}
  1349  }
  1350  
  1351  func loadRegResults(s *ssagen.State, f *ssa.Func) {
  1352  	for _, o := range f.OwnAux.ABIInfo().OutParams() {
  1353  		n := o.Name.(*ir.Name)
  1354  		rts, offs := o.RegisterTypesAndOffsets()
  1355  		for i := range o.Registers {
  1356  			p := s.Prog(loadByType(rts[i]))
  1357  			p.From.Type = obj.TYPE_MEM
  1358  			p.From.Name = obj.NAME_AUTO
  1359  			p.From.Sym = n.Linksym()
  1360  			p.From.Offset = n.FrameOffset() + offs[i]
  1361  			p.To.Type = obj.TYPE_REG
  1362  			p.To.Reg = ssa.ObjRegForAbiReg(o.Registers[i], f.Config)
  1363  		}
  1364  	}
  1365  }
  1366  
  1367  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1368  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1369  	p.To.Name = obj.NAME_PARAM
  1370  	p.To.Sym = n.Linksym()
  1371  	p.Pos = p.Pos.WithNotStmt()
  1372  	return p
  1373  }
  1374  

View as plain text