ggen.go

Documentation: cmd/compile/internal/amd64

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/objw"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/x86"
    14  	"internal/buildcfg"
    15  )
    16  
    17  // no floating point in note handlers on Plan 9
    18  var isPlan9 = buildcfg.GOOS == "plan9"
    19  
    20  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
    21  // See runtime/mkduff.go.
    22  const (
    23  	dzBlocks    = 16 // number of MOV/ADD blocks
    24  	dzBlockLen  = 4  // number of clears per block
    25  	dzBlockSize = 23 // size of instructions in a single block
    26  	dzMovSize   = 5  // size of single MOV instruction w/ offset
    27  	dzLeaqSize  = 4  // size of single LEAQ instruction
    28  	dzClearStep = 16 // number of bytes cleared by each MOV instruction
    29  
    30  	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
    31  	dzSize     = dzBlocks * dzBlockSize
    32  )
    33  
    34  // dzOff returns the offset for a jump into DUFFZERO.
    35  // b is the number of bytes to zero.
    36  func dzOff(b int64) int64 {
    37  	off := int64(dzSize)
    38  	off -= b / dzClearLen * dzBlockSize
    39  	tailLen := b % dzClearLen
    40  	if tailLen >= dzClearStep {
    41  		off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
    42  	}
    43  	return off
    44  }
    45  
    46  // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
    47  // b is the number of bytes to zero.
    48  func dzDI(b int64) int64 {
    49  	tailLen := b % dzClearLen
    50  	if tailLen < dzClearStep {
    51  		return 0
    52  	}
    53  	tailSteps := tailLen / dzClearStep
    54  	return -dzClearStep * (dzBlockLen - tailSteps)
    55  }
    56  
    57  func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
    58  	const (
    59  		r13 = 1 << iota // if R13 is already zeroed.
    60  		x15             // if X15 is already zeroed. Note: in new ABI, X15 is always zero.
    61  	)
    62  
    63  	if cnt == 0 {
    64  		return p
    65  	}
    66  
    67  	if cnt%int64(types.RegSize) != 0 {
    68  		// should only happen with nacl
    69  		if cnt%int64(types.PtrSize) != 0 {
    70  			base.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
    71  		}
    72  		if *state&r13 == 0 {
    73  			p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
    74  			*state |= r13
    75  		}
    76  		p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
    77  		off += int64(types.PtrSize)
    78  		cnt -= int64(types.PtrSize)
    79  	}
    80  
    81  	if cnt == 8 {
    82  		if *state&r13 == 0 {
    83  			p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
    84  			*state |= r13
    85  		}
    86  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
    87  	} else if !isPlan9 && cnt <= int64(8*types.RegSize) {
    88  		if !buildcfg.Experiment.RegabiG && *state&x15 == 0 {
    89  			p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
    90  			*state |= x15
    91  		}
    92  
    93  		for i := int64(0); i < cnt/16; i++ {
    94  			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
    95  		}
    96  
    97  		if cnt%16 != 0 {
    98  			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
    99  		}
   100  	} else if !isPlan9 && (cnt <= int64(128*types.RegSize)) {
   101  		if !buildcfg.Experiment.RegabiG && *state&x15 == 0 {
   102  			p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
   103  			*state |= x15
   104  		}
   105  		// Save DI to r12. With the amd64 Go register abi, DI can contain
   106  		// an incoming parameter, whereas R12 is always scratch.
   107  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
   108  		// Emit duffzero call
   109  		p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
   110  		p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
   111  		p.To.Sym = ir.Syms.Duffzero
   112  		if cnt%16 != 0 {
   113  			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
   114  		}
   115  		// Restore DI from r12
   116  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
   117  
   118  	} else {
   119  		// When the register ABI is in effect, at this point in the
   120  		// prolog we may have live values in all of RAX,RDI,RCX. Save
   121  		// them off to registers before the REPSTOSQ below, then
   122  		// restore. Note that R12 and R13 are always available as
   123  		// scratch regs; here we also use R15 (this is safe to do
   124  		// since there won't be any globals accessed in the prolog).
   125  		// See rewriteToUseGot() in obj6.go for more on r15 use.
   126  
   127  		// Save rax/rdi/rcx
   128  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
   129  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
   130  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
   131  
   132  		// Set up the REPSTOSQ and kick it off.
   133  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
   134  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
   135  		p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
   136  		p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   137  		p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   138  
   139  		// Restore rax/rdi/rcx
   140  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
   141  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
   142  		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
   143  
   144  		// Record the fact that r13 is no longer zero.
   145  		*state &= ^uint32(r13)
   146  	}
   147  
   148  	return p
   149  }
   150  
   151  func ginsnop(pp *objw.Progs) *obj.Prog {
   152  	// This is a hardware nop (1-byte 0x90) instruction,
   153  	// even though we describe it as an explicit XCHGL here.
   154  	// Particularly, this does not zero the high 32 bits
   155  	// like typical *L opcodes.
   156  	// (gas assembles "xchg %eax,%eax" to 0x87 0xc0, which
   157  	// does zero the high 32 bits.)
   158  	p := pp.Prog(x86.AXCHGL)
   159  	p.From.Type = obj.TYPE_REG
   160  	p.From.Reg = x86.REG_AX
   161  	p.To.Type = obj.TYPE_REG
   162  	p.To.Reg = x86.REG_AX
   163  	return p
   164  }
   165
View as plain text