Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/internal/obj/x86/asm6.go

Documentation: cmd/internal/obj/x86

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  	deferreturn   *obj.LSym
    47  )
    48  
    49  // Instruction layout.
    50  
    51  // Loop alignment constants:
    52  // want to align loop entry to loopAlign-byte boundary,
    53  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    54  // We define a loop entry as the target of a backward jump.
    55  //
    56  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    57  // and it aligns all jump targets, not just backward jump targets.
    58  //
    59  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    60  // is very slight but negative, so the alignment is disabled by
    61  // setting MaxLoopPad = 0. The code is here for reference and
    62  // for future experiments.
    63  //
    64  const (
    65  	loopAlign  = 16
    66  	maxLoopPad = 0
    67  )
    68  
    69  // Bit flags that are used to express jump target properties.
    70  const (
    71  	// branchBackwards marks targets that are located behind.
    72  	// Used to express jumps to loop headers.
    73  	branchBackwards = (1 << iota)
    74  	// branchShort marks branches those target is close,
    75  	// with offset is in -128..127 range.
    76  	branchShort
    77  	// branchLoopHead marks loop entry.
    78  	// Used to insert padding for misaligned loops.
    79  	branchLoopHead
    80  )
    81  
    82  // opBytes holds optab encoding bytes.
    83  // Each ytab reserves fixed amount of bytes in this array.
    84  //
    85  // The size should be the minimal number of bytes that
    86  // are enough to hold biggest optab op lines.
    87  type opBytes [31]uint8
    88  
    89  type Optab struct {
    90  	as     obj.As
    91  	ytab   []ytab
    92  	prefix uint8
    93  	op     opBytes
    94  }
    95  
    96  type movtab struct {
    97  	as   obj.As
    98  	ft   uint8
    99  	f3t  uint8
   100  	tt   uint8
   101  	code uint8
   102  	op   [4]uint8
   103  }
   104  
   105  const (
   106  	Yxxx = iota
   107  	Ynone
   108  	Yi0 // $0
   109  	Yi1 // $1
   110  	Yu2 // $x, x fits in uint2
   111  	Yi8 // $x, x fits in int8
   112  	Yu8 // $x, x fits in uint8
   113  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   114  	Ys32
   115  	Yi32
   116  	Yi64
   117  	Yiauto
   118  	Yal
   119  	Ycl
   120  	Yax
   121  	Ycx
   122  	Yrb
   123  	Yrl
   124  	Yrl32 // Yrl on 32-bit system
   125  	Yrf
   126  	Yf0
   127  	Yrx
   128  	Ymb
   129  	Yml
   130  	Ym
   131  	Ybr
   132  	Ycs
   133  	Yss
   134  	Yds
   135  	Yes
   136  	Yfs
   137  	Ygs
   138  	Ygdtr
   139  	Yidtr
   140  	Yldtr
   141  	Ymsw
   142  	Ytask
   143  	Ycr0
   144  	Ycr1
   145  	Ycr2
   146  	Ycr3
   147  	Ycr4
   148  	Ycr5
   149  	Ycr6
   150  	Ycr7
   151  	Ycr8
   152  	Ydr0
   153  	Ydr1
   154  	Ydr2
   155  	Ydr3
   156  	Ydr4
   157  	Ydr5
   158  	Ydr6
   159  	Ydr7
   160  	Ytr0
   161  	Ytr1
   162  	Ytr2
   163  	Ytr3
   164  	Ytr4
   165  	Ytr5
   166  	Ytr6
   167  	Ytr7
   168  	Ymr
   169  	Ymm
   170  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   171  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   172  	Yxr           // X0..X15
   173  	YxrEvex       // X0..X31
   174  	Yxm
   175  	YxmEvex       // YxrEvex+Ym
   176  	Yxvm          // VSIB vector array; vm32x/vm64x
   177  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   178  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   179  	Yyr           // Y0..Y15
   180  	YyrEvex       // Y0..Y31
   181  	Yym
   182  	YymEvex   // YyrEvex+Ym
   183  	Yyvm      // VSIB vector array; vm32y/vm64y
   184  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   185  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   186  	Yzr       // Z0..Z31
   187  	Yzm       // Yzr+Ym
   188  	Yzvm      // VSIB vector array; vm32z/vm64z
   189  	Yk0       // K0
   190  	Yknot0    // K1..K7; write mask
   191  	Yk        // K0..K7; used for KOP
   192  	Ykm       // Yk+Ym; used for KOP
   193  	Ytls
   194  	Ytextsize
   195  	Yindir
   196  	Ymax
   197  )
   198  
   199  const (
   200  	Zxxx = iota
   201  	Zlit
   202  	Zlitm_r
   203  	Zlitr_m
   204  	Zlit_m_r
   205  	Z_rp
   206  	Zbr
   207  	Zcall
   208  	Zcallcon
   209  	Zcallduff
   210  	Zcallind
   211  	Zcallindreg
   212  	Zib_
   213  	Zib_rp
   214  	Zibo_m
   215  	Zibo_m_xm
   216  	Zil_
   217  	Zil_rp
   218  	Ziq_rp
   219  	Zilo_m
   220  	Zjmp
   221  	Zjmpcon
   222  	Zloop
   223  	Zo_iw
   224  	Zm_o
   225  	Zm_r
   226  	Z_m_r
   227  	Zm2_r
   228  	Zm_r_xm
   229  	Zm_r_i_xm
   230  	Zm_r_xm_nr
   231  	Zr_m_xm_nr
   232  	Zibm_r // mmx1,mmx2/mem64,imm8
   233  	Zibr_m
   234  	Zmb_r
   235  	Zaut_r
   236  	Zo_m
   237  	Zo_m64
   238  	Zpseudo
   239  	Zr_m
   240  	Zr_m_xm
   241  	Zrp_
   242  	Z_ib
   243  	Z_il
   244  	Zm_ibo
   245  	Zm_ilo
   246  	Zib_rr
   247  	Zil_rr
   248  	Zbyte
   249  
   250  	Zvex_rm_v_r
   251  	Zvex_rm_v_ro
   252  	Zvex_r_v_rm
   253  	Zvex_i_rm_vo
   254  	Zvex_v_rm_r
   255  	Zvex_i_rm_r
   256  	Zvex_i_r_v
   257  	Zvex_i_rm_v_r
   258  	Zvex
   259  	Zvex_rm_r_vo
   260  	Zvex_i_r_rm
   261  	Zvex_hr_rm_v_r
   262  
   263  	Zevex_first
   264  	Zevex_i_r_k_rm
   265  	Zevex_i_r_rm
   266  	Zevex_i_rm_k_r
   267  	Zevex_i_rm_k_vo
   268  	Zevex_i_rm_r
   269  	Zevex_i_rm_v_k_r
   270  	Zevex_i_rm_v_r
   271  	Zevex_i_rm_vo
   272  	Zevex_k_rmo
   273  	Zevex_r_k_rm
   274  	Zevex_r_v_k_rm
   275  	Zevex_r_v_rm
   276  	Zevex_rm_k_r
   277  	Zevex_rm_v_k_r
   278  	Zevex_rm_v_r
   279  	Zevex_last
   280  
   281  	Zmax
   282  )
   283  
   284  const (
   285  	Px   = 0
   286  	Px1  = 1    // symbolic; exact value doesn't matter
   287  	P32  = 0x32 // 32-bit only
   288  	Pe   = 0x66 // operand escape
   289  	Pm   = 0x0f // 2byte opcode escape
   290  	Pq   = 0xff // both escapes: 66 0f
   291  	Pb   = 0xfe // byte operands
   292  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   293  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   294  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   295  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   296  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   297  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   298  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   299  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   300  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   301  	Pw   = 0x48 // Rex.w
   302  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   303  	Py   = 0x80 // defaults to 64-bit mode
   304  	Py1  = 0x81 // symbolic; exact value doesn't matter
   305  	Py3  = 0x83 // symbolic; exact value doesn't matter
   306  	Pavx = 0x84 // symbolic: exact value doesn't matter
   307  
   308  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   309  	Rxw     = 1 << 3 // =1, 64-bit operand size
   310  	Rxr     = 1 << 2 // extend modrm reg
   311  	Rxx     = 1 << 1 // extend sib index
   312  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   313  )
   314  
   315  const (
   316  	// Encoding for VEX prefix in tables.
   317  	// The P, L, and W fields are chosen to match
   318  	// their eventual locations in the VEX prefix bytes.
   319  
   320  	// Encoding for VEX prefix in tables.
   321  	// The P, L, and W fields are chosen to match
   322  	// their eventual locations in the VEX prefix bytes.
   323  
   324  	// Using spare bit to make leading [E]VEX encoding byte different from
   325  	// 0x0f even if all other VEX fields are 0.
   326  	avxEscape = 1 << 6
   327  
   328  	// P field - 2 bits
   329  	vex66 = 1 << 0
   330  	vexF3 = 2 << 0
   331  	vexF2 = 3 << 0
   332  	// L field - 1 bit
   333  	vexLZ  = 0 << 2
   334  	vexLIG = 0 << 2
   335  	vex128 = 0 << 2
   336  	vex256 = 1 << 2
   337  	// W field - 1 bit
   338  	vexWIG = 0 << 7
   339  	vexW0  = 0 << 7
   340  	vexW1  = 1 << 7
   341  	// M field - 5 bits, but mostly reserved; we can store up to 3
   342  	vex0F   = 1 << 3
   343  	vex0F38 = 2 << 3
   344  	vex0F3A = 3 << 3
   345  )
   346  
   347  var ycover [Ymax * Ymax]uint8
   348  
   349  var reg [MAXREG]int
   350  
   351  var regrex [MAXREG + 1]int
   352  
   353  var ynone = []ytab{
   354  	{Zlit, 1, argList{}},
   355  }
   356  
   357  var ytext = []ytab{
   358  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   359  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   360  }
   361  
   362  var ynop = []ytab{
   363  	{Zpseudo, 0, argList{}},
   364  	{Zpseudo, 0, argList{Yiauto}},
   365  	{Zpseudo, 0, argList{Yml}},
   366  	{Zpseudo, 0, argList{Yrf}},
   367  	{Zpseudo, 0, argList{Yxr}},
   368  	{Zpseudo, 0, argList{Yiauto}},
   369  	{Zpseudo, 0, argList{Yml}},
   370  	{Zpseudo, 0, argList{Yrf}},
   371  	{Zpseudo, 1, argList{Yxr}},
   372  }
   373  
   374  var yfuncdata = []ytab{
   375  	{Zpseudo, 0, argList{Yi32, Ym}},
   376  }
   377  
   378  var ypcdata = []ytab{
   379  	{Zpseudo, 0, argList{Yi32, Yi32}},
   380  }
   381  
   382  var yxorb = []ytab{
   383  	{Zib_, 1, argList{Yi32, Yal}},
   384  	{Zibo_m, 2, argList{Yi32, Ymb}},
   385  	{Zr_m, 1, argList{Yrb, Ymb}},
   386  	{Zm_r, 1, argList{Ymb, Yrb}},
   387  }
   388  
   389  var yaddl = []ytab{
   390  	{Zibo_m, 2, argList{Yi8, Yml}},
   391  	{Zil_, 1, argList{Yi32, Yax}},
   392  	{Zilo_m, 2, argList{Yi32, Yml}},
   393  	{Zr_m, 1, argList{Yrl, Yml}},
   394  	{Zm_r, 1, argList{Yml, Yrl}},
   395  }
   396  
   397  var yincl = []ytab{
   398  	{Z_rp, 1, argList{Yrl}},
   399  	{Zo_m, 2, argList{Yml}},
   400  }
   401  
   402  var yincq = []ytab{
   403  	{Zo_m, 2, argList{Yml}},
   404  }
   405  
   406  var ycmpb = []ytab{
   407  	{Z_ib, 1, argList{Yal, Yi32}},
   408  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   409  	{Zm_r, 1, argList{Ymb, Yrb}},
   410  	{Zr_m, 1, argList{Yrb, Ymb}},
   411  }
   412  
   413  var ycmpl = []ytab{
   414  	{Zm_ibo, 2, argList{Yml, Yi8}},
   415  	{Z_il, 1, argList{Yax, Yi32}},
   416  	{Zm_ilo, 2, argList{Yml, Yi32}},
   417  	{Zm_r, 1, argList{Yml, Yrl}},
   418  	{Zr_m, 1, argList{Yrl, Yml}},
   419  }
   420  
   421  var yshb = []ytab{
   422  	{Zo_m, 2, argList{Yi1, Ymb}},
   423  	{Zibo_m, 2, argList{Yu8, Ymb}},
   424  	{Zo_m, 2, argList{Ycx, Ymb}},
   425  }
   426  
   427  var yshl = []ytab{
   428  	{Zo_m, 2, argList{Yi1, Yml}},
   429  	{Zibo_m, 2, argList{Yu8, Yml}},
   430  	{Zo_m, 2, argList{Ycl, Yml}},
   431  	{Zo_m, 2, argList{Ycx, Yml}},
   432  }
   433  
   434  var ytestl = []ytab{
   435  	{Zil_, 1, argList{Yi32, Yax}},
   436  	{Zilo_m, 2, argList{Yi32, Yml}},
   437  	{Zr_m, 1, argList{Yrl, Yml}},
   438  	{Zm_r, 1, argList{Yml, Yrl}},
   439  }
   440  
   441  var ymovb = []ytab{
   442  	{Zr_m, 1, argList{Yrb, Ymb}},
   443  	{Zm_r, 1, argList{Ymb, Yrb}},
   444  	{Zib_rp, 1, argList{Yi32, Yrb}},
   445  	{Zibo_m, 2, argList{Yi32, Ymb}},
   446  }
   447  
   448  var ybtl = []ytab{
   449  	{Zibo_m, 2, argList{Yi8, Yml}},
   450  	{Zr_m, 1, argList{Yrl, Yml}},
   451  }
   452  
   453  var ymovw = []ytab{
   454  	{Zr_m, 1, argList{Yrl, Yml}},
   455  	{Zm_r, 1, argList{Yml, Yrl}},
   456  	{Zil_rp, 1, argList{Yi32, Yrl}},
   457  	{Zilo_m, 2, argList{Yi32, Yml}},
   458  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   459  }
   460  
   461  var ymovl = []ytab{
   462  	{Zr_m, 1, argList{Yrl, Yml}},
   463  	{Zm_r, 1, argList{Yml, Yrl}},
   464  	{Zil_rp, 1, argList{Yi32, Yrl}},
   465  	{Zilo_m, 2, argList{Yi32, Yml}},
   466  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   467  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   468  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   469  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   470  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   471  }
   472  
   473  var yret = []ytab{
   474  	{Zo_iw, 1, argList{}},
   475  	{Zo_iw, 1, argList{Yi32}},
   476  }
   477  
   478  var ymovq = []ytab{
   479  	// valid in 32-bit mode
   480  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   481  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   482  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   483  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   484  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   485  
   486  	// valid only in 64-bit mode, usually with 64-bit prefix
   487  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   488  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   489  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   490  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   491  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   492  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   493  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   494  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   495  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   496  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   497  }
   498  
   499  var ymovbe = []ytab{
   500  	{Zlitm_r, 3, argList{Ym, Yrl}},
   501  	{Zlitr_m, 3, argList{Yrl, Ym}},
   502  }
   503  
   504  var ym_rl = []ytab{
   505  	{Zm_r, 1, argList{Ym, Yrl}},
   506  }
   507  
   508  var yrl_m = []ytab{
   509  	{Zr_m, 1, argList{Yrl, Ym}},
   510  }
   511  
   512  var ymb_rl = []ytab{
   513  	{Zmb_r, 1, argList{Ymb, Yrl}},
   514  }
   515  
   516  var yml_rl = []ytab{
   517  	{Zm_r, 1, argList{Yml, Yrl}},
   518  }
   519  
   520  var yrl_ml = []ytab{
   521  	{Zr_m, 1, argList{Yrl, Yml}},
   522  }
   523  
   524  var yml_mb = []ytab{
   525  	{Zr_m, 1, argList{Yrb, Ymb}},
   526  	{Zm_r, 1, argList{Ymb, Yrb}},
   527  }
   528  
   529  var yrb_mb = []ytab{
   530  	{Zr_m, 1, argList{Yrb, Ymb}},
   531  }
   532  
   533  var yxchg = []ytab{
   534  	{Z_rp, 1, argList{Yax, Yrl}},
   535  	{Zrp_, 1, argList{Yrl, Yax}},
   536  	{Zr_m, 1, argList{Yrl, Yml}},
   537  	{Zm_r, 1, argList{Yml, Yrl}},
   538  }
   539  
   540  var ydivl = []ytab{
   541  	{Zm_o, 2, argList{Yml}},
   542  }
   543  
   544  var ydivb = []ytab{
   545  	{Zm_o, 2, argList{Ymb}},
   546  }
   547  
   548  var yimul = []ytab{
   549  	{Zm_o, 2, argList{Yml}},
   550  	{Zib_rr, 1, argList{Yi8, Yrl}},
   551  	{Zil_rr, 1, argList{Yi32, Yrl}},
   552  	{Zm_r, 2, argList{Yml, Yrl}},
   553  }
   554  
   555  var yimul3 = []ytab{
   556  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   557  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   558  }
   559  
   560  var ybyte = []ytab{
   561  	{Zbyte, 1, argList{Yi64}},
   562  }
   563  
   564  var yin = []ytab{
   565  	{Zib_, 1, argList{Yi32}},
   566  	{Zlit, 1, argList{}},
   567  }
   568  
   569  var yint = []ytab{
   570  	{Zib_, 1, argList{Yi32}},
   571  }
   572  
   573  var ypushl = []ytab{
   574  	{Zrp_, 1, argList{Yrl}},
   575  	{Zm_o, 2, argList{Ym}},
   576  	{Zib_, 1, argList{Yi8}},
   577  	{Zil_, 1, argList{Yi32}},
   578  }
   579  
   580  var ypopl = []ytab{
   581  	{Z_rp, 1, argList{Yrl}},
   582  	{Zo_m, 2, argList{Ym}},
   583  }
   584  
   585  var ywrfsbase = []ytab{
   586  	{Zm_o, 2, argList{Yrl}},
   587  }
   588  
   589  var yrdrand = []ytab{
   590  	{Zo_m, 2, argList{Yrl}},
   591  }
   592  
   593  var yclflush = []ytab{
   594  	{Zo_m, 2, argList{Ym}},
   595  }
   596  
   597  var ybswap = []ytab{
   598  	{Z_rp, 2, argList{Yrl}},
   599  }
   600  
   601  var yscond = []ytab{
   602  	{Zo_m, 2, argList{Ymb}},
   603  }
   604  
   605  var yjcond = []ytab{
   606  	{Zbr, 0, argList{Ybr}},
   607  	{Zbr, 0, argList{Yi0, Ybr}},
   608  	{Zbr, 1, argList{Yi1, Ybr}},
   609  }
   610  
   611  var yloop = []ytab{
   612  	{Zloop, 1, argList{Ybr}},
   613  }
   614  
   615  var ycall = []ytab{
   616  	{Zcallindreg, 0, argList{Yml}},
   617  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   618  	{Zcallind, 2, argList{Yindir}},
   619  	{Zcall, 0, argList{Ybr}},
   620  	{Zcallcon, 1, argList{Yi32}},
   621  }
   622  
   623  var yduff = []ytab{
   624  	{Zcallduff, 1, argList{Yi32}},
   625  }
   626  
   627  var yjmp = []ytab{
   628  	{Zo_m64, 2, argList{Yml}},
   629  	{Zjmp, 0, argList{Ybr}},
   630  	{Zjmpcon, 1, argList{Yi32}},
   631  }
   632  
   633  var yfmvd = []ytab{
   634  	{Zm_o, 2, argList{Ym, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Ym}},
   636  	{Zm_o, 2, argList{Yrf, Yf0}},
   637  	{Zo_m, 2, argList{Yf0, Yrf}},
   638  }
   639  
   640  var yfmvdp = []ytab{
   641  	{Zo_m, 2, argList{Yf0, Ym}},
   642  	{Zo_m, 2, argList{Yf0, Yrf}},
   643  }
   644  
   645  var yfmvf = []ytab{
   646  	{Zm_o, 2, argList{Ym, Yf0}},
   647  	{Zo_m, 2, argList{Yf0, Ym}},
   648  }
   649  
   650  var yfmvx = []ytab{
   651  	{Zm_o, 2, argList{Ym, Yf0}},
   652  }
   653  
   654  var yfmvp = []ytab{
   655  	{Zo_m, 2, argList{Yf0, Ym}},
   656  }
   657  
   658  var yfcmv = []ytab{
   659  	{Zm_o, 2, argList{Yrf, Yf0}},
   660  }
   661  
   662  var yfadd = []ytab{
   663  	{Zm_o, 2, argList{Ym, Yf0}},
   664  	{Zm_o, 2, argList{Yrf, Yf0}},
   665  	{Zo_m, 2, argList{Yf0, Yrf}},
   666  }
   667  
   668  var yfxch = []ytab{
   669  	{Zo_m, 2, argList{Yf0, Yrf}},
   670  	{Zm_o, 2, argList{Yrf, Yf0}},
   671  }
   672  
   673  var ycompp = []ytab{
   674  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   675  }
   676  
   677  var ystsw = []ytab{
   678  	{Zo_m, 2, argList{Ym}},
   679  	{Zlit, 1, argList{Yax}},
   680  }
   681  
   682  var ysvrs_mo = []ytab{
   683  	{Zm_o, 2, argList{Ym}},
   684  }
   685  
   686  // unaryDst version of "ysvrs_mo".
   687  var ysvrs_om = []ytab{
   688  	{Zo_m, 2, argList{Ym}},
   689  }
   690  
   691  var ymm = []ytab{
   692  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   693  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   694  }
   695  
   696  var yxm = []ytab{
   697  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   698  }
   699  
   700  var yxm_q4 = []ytab{
   701  	{Zm_r, 1, argList{Yxm, Yxr}},
   702  }
   703  
   704  var yxcvm1 = []ytab{
   705  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   706  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   707  }
   708  
   709  var yxcvm2 = []ytab{
   710  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   711  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   712  }
   713  
   714  var yxr = []ytab{
   715  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   716  }
   717  
   718  var yxr_ml = []ytab{
   719  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   720  }
   721  
   722  var ymr = []ytab{
   723  	{Zm_r, 1, argList{Ymr, Ymr}},
   724  }
   725  
   726  var ymr_ml = []ytab{
   727  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   728  }
   729  
   730  var yxcmpi = []ytab{
   731  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   732  }
   733  
   734  var yxmov = []ytab{
   735  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   736  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   737  }
   738  
   739  var yxcvfl = []ytab{
   740  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   741  }
   742  
   743  var yxcvlf = []ytab{
   744  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   745  }
   746  
   747  var yxcvfq = []ytab{
   748  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   749  }
   750  
   751  var yxcvqf = []ytab{
   752  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   753  }
   754  
   755  var yps = []ytab{
   756  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   757  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   758  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   759  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   760  }
   761  
   762  var yxrrl = []ytab{
   763  	{Zm_r, 1, argList{Yxr, Yrl}},
   764  }
   765  
   766  var ymrxr = []ytab{
   767  	{Zm_r, 1, argList{Ymr, Yxr}},
   768  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   769  }
   770  
   771  var ymshuf = []ytab{
   772  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   773  }
   774  
   775  var ymshufb = []ytab{
   776  	{Zm2_r, 2, argList{Yxm, Yxr}},
   777  }
   778  
   779  // It should never have more than 1 entry,
   780  // because some optab entries you opcode secuences that
   781  // are longer than 2 bytes (zoffset=2 here),
   782  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   783  // to name a few.
   784  var yxshuf = []ytab{
   785  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   786  }
   787  
   788  var yextrw = []ytab{
   789  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   790  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   791  }
   792  
   793  var yextr = []ytab{
   794  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   795  }
   796  
   797  var yinsrw = []ytab{
   798  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   799  }
   800  
   801  var yinsr = []ytab{
   802  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   803  }
   804  
   805  var ypsdq = []ytab{
   806  	{Zibo_m, 2, argList{Yi8, Yxr}},
   807  }
   808  
   809  var ymskb = []ytab{
   810  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   811  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   812  }
   813  
   814  var ycrc32l = []ytab{
   815  	{Zlitm_r, 0, argList{Yml, Yrl}},
   816  }
   817  
   818  var ycrc32b = []ytab{
   819  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   820  }
   821  
   822  var yprefetch = []ytab{
   823  	{Zm_o, 2, argList{Ym}},
   824  }
   825  
   826  var yaes = []ytab{
   827  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   828  }
   829  
   830  var yxbegin = []ytab{
   831  	{Zjmp, 1, argList{Ybr}},
   832  }
   833  
   834  var yxabort = []ytab{
   835  	{Zib_, 1, argList{Yu8}},
   836  }
   837  
   838  var ylddqu = []ytab{
   839  	{Zm_r, 1, argList{Ym, Yxr}},
   840  }
   841  
   842  var ypalignr = []ytab{
   843  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   844  }
   845  
   846  var ysha256rnds2 = []ytab{
   847  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   848  }
   849  
   850  var yblendvpd = []ytab{
   851  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   852  }
   853  
   854  var ymmxmm0f38 = []ytab{
   855  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   856  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   857  }
   858  
   859  var yextractps = []ytab{
   860  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   861  }
   862  
   863  var ysha1rnds4 = []ytab{
   864  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   865  }
   866  
   867  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   868  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   869  // to find the entry with the given p.As and then looks through the ytable for
   870  // that instruction (the second field in the optab struct) for a line whose
   871  // first two values match the Ytypes of the p.From and p.To operands.  The
   872  // function oclass computes the specific Ytype of an operand and then the set
   873  // of more general Ytypes that it satisfies is implied by the ycover table, set
   874  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   875  // from the more general 8-bit constants, but instinit says
   876  //
   877  //        ycover[Yi0*Ymax+Ys32] = 1
   878  //        ycover[Yi1*Ymax+Ys32] = 1
   879  //        ycover[Yi8*Ymax+Ys32] = 1
   880  //
   881  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   882  // if that's what an instruction can handle.
   883  //
   884  // In parallel with the scan through the ytable for the appropriate line, there
   885  // is a z pointer that starts out pointing at the strange magic byte list in
   886  // the Optab struct.  With each step past a non-matching ytable line, z
   887  // advances by the 4th entry in the line.  When a matching line is found, that
   888  // z pointer has the extra data to use in laying down the instruction bytes.
   889  // The actual bytes laid down are a function of the 3rd entry in the line (that
   890  // is, the Ztype) and the z bytes.
   891  //
   892  // For example, let's look at AADDL.  The optab line says:
   893  //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   894  //
   895  // and yaddl says
   896  //        var yaddl = []ytab{
   897  //                {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //                {Yi32, Ynone, Yax, Zil_, 1},
   899  //                {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //                {Yrl, Ynone, Yml, Zr_m, 1},
   901  //                {Yml, Ynone, Yrl, Zm_r, 1},
   902  //        }
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //        Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //        Yi32, Yax -> Zil_, z+2 (0x05)
   910  //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  //	as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1100  	{AENTER, nil, 0, opBytes{}}, // botch
  1101  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1102  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1103  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1104  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1105  	{AHLT, ynone, Px, opBytes{0xf4}},
  1106  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1107  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1108  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1109  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1110  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1111  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1118  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1119  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1120  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1121  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1122  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1123  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1124  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1125  	{AINSL, ynone, Px, opBytes{0x6d}},
  1126  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1127  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1128  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1129  	{AINT, yint, Px, opBytes{0xcd}},
  1130  	{AINTO, ynone, P32, opBytes{0xce}},
  1131  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1132  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1133  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1134  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1135  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1136  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1139  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1140  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1141  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1142  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1143  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1144  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1145  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1146  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1147  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1148  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1149  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1150  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1151  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1152  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1153  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1154  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1155  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1156  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1157  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1158  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1159  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1160  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1161  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1162  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1163  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1164  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1165  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1166  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1167  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1168  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1169  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1170  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1171  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1172  	{ALODSL, ynone, Px, opBytes{0xad}},
  1173  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1174  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1175  	{ALONG, ybyte, Px, opBytes{4}},
  1176  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1177  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1178  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1179  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1180  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1181  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1182  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1183  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1184  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1185  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1186  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1187  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1188  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1189  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1190  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1191  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1192  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1193  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1194  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1195  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1196  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1197  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1198  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1199  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1200  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1201  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1202  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1203  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1204  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1205  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1206  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1207  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1208  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1209  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1210  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1211  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1212  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1213  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1214  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1216  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1217  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1218  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1219  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1220  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1221  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1222  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1223  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1224  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1225  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1226  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1227  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1228  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1229  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1230  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1231  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1232  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1233  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1234  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1235  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1236  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1237  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1238  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1239  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1240  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1241  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1242  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1243  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1244  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1245  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1246  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1247  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1248  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1249  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1250  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1251  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1252  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1253  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1254  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1255  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1256  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1257  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1258  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1259  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1260  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1261  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1262  	{AORPD, yxm, Pq, opBytes{0x56}},
  1263  	{AORPS, yxm, Pm, opBytes{0x56}},
  1264  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1267  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1268  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1269  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1270  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1271  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1272  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1273  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1274  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1275  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1276  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1277  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1278  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1279  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1280  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1281  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1282  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1283  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1284  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1285  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1286  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1287  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1288  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1289  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1290  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1291  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1292  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1293  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1294  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1295  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1296  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1297  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1298  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1299  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1300  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1301  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1302  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1303  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1304  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1305  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1306  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1307  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1308  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1309  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1310  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1311  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1312  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1313  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1314  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1315  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1316  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1317  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1318  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1319  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1320  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1321  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1322  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1323  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1324  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1325  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1326  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1327  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1328  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1329  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1330  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1331  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1332  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1333  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1334  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1335  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1336  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1337  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1338  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1339  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1340  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1341  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1342  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1343  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1344  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1345  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1346  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1347  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1348  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1349  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1350  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1351  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1352  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1353  	{APOPAL, ynone, P32, opBytes{0x61}},
  1354  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1355  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1356  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1357  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1358  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1359  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1360  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1361  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1364  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1365  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1366  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1367  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1368  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1369  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1370  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1371  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1372  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1373  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1374  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1375  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1376  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1377  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1378  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1379  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1380  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1381  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1382  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1383  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1384  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1385  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1386  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1387  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1388  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1389  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1390  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1391  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1392  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1393  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1394  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1395  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1396  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1397  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1398  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1399  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1400  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1401  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1402  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1403  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1404  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1405  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1406  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1410  	{AQUAD, ybyte, Px, opBytes{8}},
  1411  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1412  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1416  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1417  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1418  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{AREP, ynone, Px, opBytes{0xf3}},
  1422  	{AREPN, ynone, Px, opBytes{0xf2}},
  1423  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1424  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1425  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1426  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1427  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1428  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1432  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1436  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1437  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1438  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1439  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1443  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1447  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1451  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1452  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1453  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1454  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1455  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1456  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1457  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1458  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1459  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1460  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1461  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1462  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1463  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1464  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1465  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1466  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1467  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1468  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1469  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1470  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1471  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1475  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1479  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1480  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1481  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1482  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1483  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1484  	{ASTC, ynone, Px, opBytes{0xf9}},
  1485  	{ASTD, ynone, Px, opBytes{0xfd}},
  1486  	{ASTI, ynone, Px, opBytes{0xfb}},
  1487  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1488  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1489  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1490  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1491  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1492  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1493  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1494  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1495  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1496  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1497  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1498  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1499  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1500  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1501  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1502  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1503  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1507  	{obj.ATEXT, ytext, Px, opBytes{}},
  1508  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1509  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1510  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1511  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1512  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1513  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1514  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1515  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1516  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1517  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1518  	{AWORD, ybyte, Px, opBytes{2}},
  1519  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1520  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1524  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1525  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1526  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1527  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1528  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1531  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1532  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1533  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1534  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1535  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1536  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1537  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1538  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1539  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1540  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1541  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1542  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1543  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1544  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1545  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1546  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1547  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1548  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1549  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1550  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1551  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1552  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1553  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1554  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1555  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1556  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1559  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1560  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1561  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1562  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1563  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1564  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1565  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1566  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1567  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1568  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1569  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1570  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1571  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1572  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1573  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1574  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1575  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1576  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1577  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1578  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1579  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1580  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1581  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1582  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1583  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1584  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1585  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1586  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1587  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1588  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1589  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1590  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1591  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1592  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1593  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1594  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1595  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1596  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1597  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1598  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1599  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1600  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1601  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1602  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1603  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1604  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1605  	{AFFREE, nil, 0, opBytes{}},
  1606  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1607  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1608  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1609  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1610  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1611  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1612  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1613  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1614  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1615  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1616  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1617  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1618  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1619  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1620  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1621  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1622  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1623  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1624  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1625  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1626  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1627  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1628  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1629  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1630  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1631  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1632  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1633  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1634  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1635  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1636  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1637  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1638  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1639  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1640  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1641  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1642  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1643  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1644  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1645  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1646  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1650  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1651  	{AINVD, ynone, Pm, opBytes{0x08}},
  1652  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1653  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1654  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1655  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1656  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1657  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1658  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1659  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1660  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1661  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1662  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1663  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1664  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1665  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1666  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1667  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1668  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1669  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1670  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1671  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1672  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1673  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1677  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1678  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1679  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1680  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1681  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1682  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1683  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1684  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1685  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1686  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1687  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1688  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1689  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1690  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1691  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1692  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1693  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1694  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1695  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1696  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1697  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1698  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1699  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1700  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1701  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1702  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1703  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1704  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1707  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1708  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1709  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1710  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1711  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1712  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1726  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1727  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1728  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1729  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1735  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1738  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1739  	{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1742  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1743  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1744  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1747  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1750  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1751  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1752  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1753  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1754  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1755  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1756  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1757  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1758  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1759  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1760  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1761  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1762  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1763  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1764  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1765  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1766  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1767  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1768  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1769  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1770  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1771  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1772  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1773  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1774  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1775  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1776  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1777  
  1778  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1779  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1780  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1781  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1782  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1783  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1784  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1785  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1786  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1787  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1788  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1789  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1790  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1791  
  1792  	{obj.AEND, nil, 0, opBytes{}},
  1793  	{0, nil, 0, opBytes{}},
  1794  }
  1795  
  1796  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1797  
  1798  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1799  // This happens on systems like Solaris that call .so functions instead of system calls.
  1800  // It does not seem to be necessary for any other systems. This is probably working
  1801  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1802  // what that bug is. And this does fix it.
  1803  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1804  	if ctxt.Headtype == objabi.Hsolaris {
  1805  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1806  		return strings.HasPrefix(s.Name, "libc_")
  1807  	}
  1808  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1809  }
  1810  
  1811  // single-instruction no-ops of various lengths.
  1812  // constructed by hand and disassembled with gdb to verify.
  1813  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1814  var nop = [][16]uint8{
  1815  	{0x90},
  1816  	{0x66, 0x90},
  1817  	{0x0F, 0x1F, 0x00},
  1818  	{0x0F, 0x1F, 0x40, 0x00},
  1819  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1820  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1821  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1822  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1824  }
  1825  
  1826  // Native Client rejects the repeated 0x66 prefix.
  1827  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1828  func fillnop(p []byte, n int) {
  1829  	var m int
  1830  
  1831  	for n > 0 {
  1832  		m = n
  1833  		if m > len(nop) {
  1834  			m = len(nop)
  1835  		}
  1836  		copy(p[:m], nop[m-1][:m])
  1837  		p = p[m:]
  1838  		n -= m
  1839  	}
  1840  }
  1841  
  1842  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1843  	s.Grow(int64(c) + int64(pad))
  1844  	fillnop(s.P[c:], int(pad))
  1845  	return c + pad
  1846  }
  1847  
  1848  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1849  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1850  		return l
  1851  	}
  1852  	return q
  1853  }
  1854  
  1855  // isJump returns whether p is a jump instruction.
  1856  // It is used to ensure that no standalone or macro-fused jump will straddle
  1857  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1858  func isJump(p *obj.Prog) bool {
  1859  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1860  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1861  }
  1862  
  1863  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1864  // jump. Otherwise, nil is returned.
  1865  func lookForJCC(p *obj.Prog) *obj.Prog {
  1866  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1867  	var q *obj.Prog
  1868  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1869  	}
  1870  
  1871  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1872  		return nil
  1873  	}
  1874  
  1875  	switch q.As {
  1876  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1877  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1878  	default:
  1879  		return nil
  1880  	}
  1881  
  1882  	return q
  1883  }
  1884  
  1885  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1886  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1887  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1888  func fusedJump(p *obj.Prog) (bool, uint8) {
  1889  	var fusedSize uint8
  1890  
  1891  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1892  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1893  	// need to be careful to insert any padding before the locks rather than directly after them.
  1894  
  1895  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1896  		fusedSize += p.Isize
  1897  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1898  		}
  1899  		if p == nil {
  1900  			return false, 0
  1901  		}
  1902  	}
  1903  	if p.As == ALOCK {
  1904  		fusedSize += p.Isize
  1905  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1906  		}
  1907  		if p == nil {
  1908  			return false, 0
  1909  		}
  1910  	}
  1911  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1912  
  1913  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1914  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1915  
  1916  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1917  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1918  
  1919  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1920  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1921  
  1922  	if !cmpAddSub && !testAnd && !incDec {
  1923  		return false, 0
  1924  	}
  1925  
  1926  	if !incDec {
  1927  		var argOne obj.AddrType
  1928  		var argTwo obj.AddrType
  1929  		if cmp {
  1930  			argOne = p.From.Type
  1931  			argTwo = p.To.Type
  1932  		} else {
  1933  			argOne = p.To.Type
  1934  			argTwo = p.From.Type
  1935  		}
  1936  		if argOne == obj.TYPE_REG {
  1937  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1938  				return false, 0
  1939  			}
  1940  		} else if argOne == obj.TYPE_MEM {
  1941  			if argTwo != obj.TYPE_REG {
  1942  				return false, 0
  1943  			}
  1944  		} else {
  1945  			return false, 0
  1946  		}
  1947  	}
  1948  
  1949  	fusedSize += p.Isize
  1950  	jmp := lookForJCC(p)
  1951  	if jmp == nil {
  1952  		return false, 0
  1953  	}
  1954  
  1955  	fusedSize += jmp.Isize
  1956  
  1957  	if testAnd {
  1958  		return true, fusedSize
  1959  	}
  1960  
  1961  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1962  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1963  		return false, 0
  1964  	}
  1965  
  1966  	if cmpAddSub {
  1967  		return true, fusedSize
  1968  	}
  1969  
  1970  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1971  		return false, 0
  1972  	}
  1973  
  1974  	return true, fusedSize
  1975  }
  1976  
  1977  type padJumpsCtx int32
  1978  
  1979  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1980  	// Disable jump padding on 32 bit builds by settting
  1981  	// padJumps to 0.
  1982  	if ctxt.Arch.Family == sys.I386 {
  1983  		return padJumpsCtx(0)
  1984  	}
  1985  
  1986  	// Disable jump padding for hand written assembly code.
  1987  	if ctxt.IsAsm {
  1988  		return padJumpsCtx(0)
  1989  	}
  1990  
  1991  	return padJumpsCtx(32)
  1992  }
  1993  
  1994  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1995  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1996  // not cross or end on a 32 byte boundary.
  1997  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1998  	if pjc == 0 {
  1999  		return c
  2000  	}
  2001  
  2002  	var toPad int32
  2003  	fj, fjSize := fusedJump(p)
  2004  	mask := int32(pjc - 1)
  2005  	if fj {
  2006  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2007  			toPad = int32(pjc) - (c & mask)
  2008  		}
  2009  	} else if isJump(p) {
  2010  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2011  			toPad = int32(pjc) - (c & mask)
  2012  		}
  2013  	}
  2014  	if toPad <= 0 {
  2015  		return c
  2016  	}
  2017  
  2018  	return noppad(ctxt, s, c, toPad)
  2019  }
  2020  
  2021  // reAssemble is called if an instruction's size changes during assembly. If
  2022  // it does and the instruction is a standalone or a macro-fused jump we need to
  2023  // reassemble.
  2024  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2025  	if pjc == 0 {
  2026  		return false
  2027  	}
  2028  
  2029  	fj, _ := fusedJump(p)
  2030  	return fj || isJump(p)
  2031  }
  2032  
  2033  type nopPad struct {
  2034  	p *obj.Prog // Instruction before the pad
  2035  	n int32     // Size of the pad
  2036  }
  2037  
  2038  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2039  	pjc := makePjcCtx(ctxt)
  2040  
  2041  	if s.P != nil {
  2042  		return
  2043  	}
  2044  
  2045  	if ycover[0] == 0 {
  2046  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2047  	}
  2048  
  2049  	for p := s.Func().Text; p != nil; p = p.Link {
  2050  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2051  			p.To.SetTarget(p)
  2052  		}
  2053  		if p.As == AADJSP {
  2054  			p.To.Type = obj.TYPE_REG
  2055  			p.To.Reg = REG_SP
  2056  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2057  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2058  			// For that case, flip the sign and the op:
  2059  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2060  			switch v := p.From.Offset; {
  2061  			case v == 0:
  2062  				p.As = obj.ANOP
  2063  			case v == 0x80 || (v < 0 && v != -0x80):
  2064  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2065  				p.From.Offset *= -1
  2066  			default:
  2067  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2068  			}
  2069  		}
  2070  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2071  			if p.To.Type != obj.TYPE_REG {
  2072  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2073  				continue
  2074  			}
  2075  			p.To.Type = obj.TYPE_BRANCH
  2076  			p.To.Name = obj.NAME_EXTERN
  2077  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2078  			p.To.Reg = 0
  2079  			p.To.Offset = 0
  2080  		}
  2081  	}
  2082  
  2083  	var count int64 // rough count of number of instructions
  2084  	for p := s.Func().Text; p != nil; p = p.Link {
  2085  		count++
  2086  		p.Back = branchShort // use short branches first time through
  2087  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2088  			p.Back |= branchBackwards
  2089  			q.Back |= branchLoopHead
  2090  		}
  2091  	}
  2092  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2093  
  2094  	var ab AsmBuf
  2095  	var n int
  2096  	var c int32
  2097  	errors := ctxt.Errors
  2098  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2099  	nrelocs0 := len(s.R)
  2100  	for {
  2101  		// This loop continues while there are reasons to re-assemble
  2102  		// whole block, like the presence of long forward jumps.
  2103  		reAssemble := false
  2104  		for i := range s.R[nrelocs0:] {
  2105  			s.R[nrelocs0+i] = obj.Reloc{}
  2106  		}
  2107  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2108  		s.P = s.P[:0]
  2109  		c = 0
  2110  		var pPrev *obj.Prog
  2111  		nops = nops[:0]
  2112  		for p := s.Func().Text; p != nil; p = p.Link {
  2113  			c0 := c
  2114  			c = pjc.padJump(ctxt, s, p, c)
  2115  
  2116  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2117  				// pad with NOPs
  2118  				v := -c & (loopAlign - 1)
  2119  
  2120  				if v <= maxLoopPad {
  2121  					s.Grow(int64(c) + int64(v))
  2122  					fillnop(s.P[c:], int(v))
  2123  					c += v
  2124  				}
  2125  			}
  2126  
  2127  			p.Pc = int64(c)
  2128  
  2129  			// process forward jumps to p
  2130  			for q := p.Rel; q != nil; q = q.Forwd {
  2131  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2132  				if q.Back&branchShort != 0 {
  2133  					if v > 127 {
  2134  						reAssemble = true
  2135  						q.Back ^= branchShort
  2136  					}
  2137  
  2138  					if q.As == AJCXZL || q.As == AXBEGIN {
  2139  						s.P[q.Pc+2] = byte(v)
  2140  					} else {
  2141  						s.P[q.Pc+1] = byte(v)
  2142  					}
  2143  				} else {
  2144  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2145  				}
  2146  			}
  2147  
  2148  			p.Rel = nil
  2149  
  2150  			p.Pc = int64(c)
  2151  			ab.asmins(ctxt, s, p)
  2152  			m := ab.Len()
  2153  			if int(p.Isize) != m {
  2154  				p.Isize = uint8(m)
  2155  				if pjc.reAssemble(p) {
  2156  					// We need to re-assemble here to check for jumps and fused jumps
  2157  					// that span or end on 32 byte boundaries.
  2158  					reAssemble = true
  2159  				}
  2160  			}
  2161  
  2162  			s.Grow(p.Pc + int64(m))
  2163  			copy(s.P[p.Pc:], ab.Bytes())
  2164  			// If there was padding, remember it.
  2165  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2166  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2167  			}
  2168  			c += int32(m)
  2169  			pPrev = p
  2170  		}
  2171  
  2172  		n++
  2173  		if n > 20 {
  2174  			ctxt.Diag("span must be looping")
  2175  			log.Fatalf("loop")
  2176  		}
  2177  		if !reAssemble {
  2178  			break
  2179  		}
  2180  		if ctxt.Errors > errors {
  2181  			return
  2182  		}
  2183  	}
  2184  	// splice padding nops into Progs
  2185  	for _, n := range nops {
  2186  		pp := n.p
  2187  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2188  		pp.Link = np
  2189  	}
  2190  
  2191  	s.Size = int64(c)
  2192  
  2193  	if false { /* debug['a'] > 1 */
  2194  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2195  		var i int
  2196  		for i = 0; i < len(s.P); i++ {
  2197  			fmt.Printf(" %.2x", s.P[i])
  2198  			if i%16 == 15 {
  2199  				fmt.Printf("\n  %.6x", uint(i+1))
  2200  			}
  2201  		}
  2202  
  2203  		if i%16 != 0 {
  2204  			fmt.Printf("\n")
  2205  		}
  2206  
  2207  		for i := 0; i < len(s.R); i++ {
  2208  			r := &s.R[i]
  2209  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2210  		}
  2211  	}
  2212  
  2213  	// Mark nonpreemptible instruction sequences.
  2214  	// The 2-instruction TLS access sequence
  2215  	//	MOVQ TLS, BX
  2216  	//	MOVQ 0(BX)(TLS*1), BX
  2217  	// is not async preemptible, as if it is preempted and resumed on
  2218  	// a different thread, the TLS address may become invalid.
  2219  	if !CanUse1InsnTLS(ctxt) {
  2220  		useTLS := func(p *obj.Prog) bool {
  2221  			// Only need to mark the second instruction, which has
  2222  			// REG_TLS as Index. (It is okay to interrupt and restart
  2223  			// the first instruction.)
  2224  			return p.From.Index == REG_TLS
  2225  		}
  2226  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2227  	}
  2228  }
  2229  
  2230  func instinit(ctxt *obj.Link) {
  2231  	if ycover[0] != 0 {
  2232  		// Already initialized; stop now.
  2233  		// This happens in the cmd/asm tests,
  2234  		// each of which re-initializes the arch.
  2235  		return
  2236  	}
  2237  
  2238  	switch ctxt.Headtype {
  2239  	case objabi.Hplan9:
  2240  		plan9privates = ctxt.Lookup("_privates")
  2241  	}
  2242  
  2243  	for i := range avxOptab {
  2244  		c := avxOptab[i].as
  2245  		if opindex[c&obj.AMask] != nil {
  2246  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2247  		}
  2248  		opindex[c&obj.AMask] = &avxOptab[i]
  2249  	}
  2250  	for i := 1; optab[i].as != 0; i++ {
  2251  		c := optab[i].as
  2252  		if opindex[c&obj.AMask] != nil {
  2253  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2254  		}
  2255  		opindex[c&obj.AMask] = &optab[i]
  2256  	}
  2257  
  2258  	for i := 0; i < Ymax; i++ {
  2259  		ycover[i*Ymax+i] = 1
  2260  	}
  2261  
  2262  	ycover[Yi0*Ymax+Yu2] = 1
  2263  	ycover[Yi1*Ymax+Yu2] = 1
  2264  
  2265  	ycover[Yi0*Ymax+Yi8] = 1
  2266  	ycover[Yi1*Ymax+Yi8] = 1
  2267  	ycover[Yu2*Ymax+Yi8] = 1
  2268  	ycover[Yu7*Ymax+Yi8] = 1
  2269  
  2270  	ycover[Yi0*Ymax+Yu7] = 1
  2271  	ycover[Yi1*Ymax+Yu7] = 1
  2272  	ycover[Yu2*Ymax+Yu7] = 1
  2273  
  2274  	ycover[Yi0*Ymax+Yu8] = 1
  2275  	ycover[Yi1*Ymax+Yu8] = 1
  2276  	ycover[Yu2*Ymax+Yu8] = 1
  2277  	ycover[Yu7*Ymax+Yu8] = 1
  2278  
  2279  	ycover[Yi0*Ymax+Ys32] = 1
  2280  	ycover[Yi1*Ymax+Ys32] = 1
  2281  	ycover[Yu2*Ymax+Ys32] = 1
  2282  	ycover[Yu7*Ymax+Ys32] = 1
  2283  	ycover[Yu8*Ymax+Ys32] = 1
  2284  	ycover[Yi8*Ymax+Ys32] = 1
  2285  
  2286  	ycover[Yi0*Ymax+Yi32] = 1
  2287  	ycover[Yi1*Ymax+Yi32] = 1
  2288  	ycover[Yu2*Ymax+Yi32] = 1
  2289  	ycover[Yu7*Ymax+Yi32] = 1
  2290  	ycover[Yu8*Ymax+Yi32] = 1
  2291  	ycover[Yi8*Ymax+Yi32] = 1
  2292  	ycover[Ys32*Ymax+Yi32] = 1
  2293  
  2294  	ycover[Yi0*Ymax+Yi64] = 1
  2295  	ycover[Yi1*Ymax+Yi64] = 1
  2296  	ycover[Yu7*Ymax+Yi64] = 1
  2297  	ycover[Yu2*Ymax+Yi64] = 1
  2298  	ycover[Yu8*Ymax+Yi64] = 1
  2299  	ycover[Yi8*Ymax+Yi64] = 1
  2300  	ycover[Ys32*Ymax+Yi64] = 1
  2301  	ycover[Yi32*Ymax+Yi64] = 1
  2302  
  2303  	ycover[Yal*Ymax+Yrb] = 1
  2304  	ycover[Ycl*Ymax+Yrb] = 1
  2305  	ycover[Yax*Ymax+Yrb] = 1
  2306  	ycover[Ycx*Ymax+Yrb] = 1
  2307  	ycover[Yrx*Ymax+Yrb] = 1
  2308  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2309  
  2310  	ycover[Ycl*Ymax+Ycx] = 1
  2311  
  2312  	ycover[Yax*Ymax+Yrx] = 1
  2313  	ycover[Ycx*Ymax+Yrx] = 1
  2314  
  2315  	ycover[Yax*Ymax+Yrl] = 1
  2316  	ycover[Ycx*Ymax+Yrl] = 1
  2317  	ycover[Yrx*Ymax+Yrl] = 1
  2318  	ycover[Yrl32*Ymax+Yrl] = 1
  2319  
  2320  	ycover[Yf0*Ymax+Yrf] = 1
  2321  
  2322  	ycover[Yal*Ymax+Ymb] = 1
  2323  	ycover[Ycl*Ymax+Ymb] = 1
  2324  	ycover[Yax*Ymax+Ymb] = 1
  2325  	ycover[Ycx*Ymax+Ymb] = 1
  2326  	ycover[Yrx*Ymax+Ymb] = 1
  2327  	ycover[Yrb*Ymax+Ymb] = 1
  2328  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2329  	ycover[Ym*Ymax+Ymb] = 1
  2330  
  2331  	ycover[Yax*Ymax+Yml] = 1
  2332  	ycover[Ycx*Ymax+Yml] = 1
  2333  	ycover[Yrx*Ymax+Yml] = 1
  2334  	ycover[Yrl*Ymax+Yml] = 1
  2335  	ycover[Yrl32*Ymax+Yml] = 1
  2336  	ycover[Ym*Ymax+Yml] = 1
  2337  
  2338  	ycover[Yax*Ymax+Ymm] = 1
  2339  	ycover[Ycx*Ymax+Ymm] = 1
  2340  	ycover[Yrx*Ymax+Ymm] = 1
  2341  	ycover[Yrl*Ymax+Ymm] = 1
  2342  	ycover[Yrl32*Ymax+Ymm] = 1
  2343  	ycover[Ym*Ymax+Ymm] = 1
  2344  	ycover[Ymr*Ymax+Ymm] = 1
  2345  
  2346  	ycover[Yxr0*Ymax+Yxr] = 1
  2347  
  2348  	ycover[Ym*Ymax+Yxm] = 1
  2349  	ycover[Yxr0*Ymax+Yxm] = 1
  2350  	ycover[Yxr*Ymax+Yxm] = 1
  2351  
  2352  	ycover[Ym*Ymax+Yym] = 1
  2353  	ycover[Yyr*Ymax+Yym] = 1
  2354  
  2355  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2356  	ycover[Yxr*Ymax+YxrEvex] = 1
  2357  
  2358  	ycover[Ym*Ymax+YxmEvex] = 1
  2359  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2360  	ycover[Yxr*Ymax+YxmEvex] = 1
  2361  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2362  
  2363  	ycover[Yyr*Ymax+YyrEvex] = 1
  2364  
  2365  	ycover[Ym*Ymax+YymEvex] = 1
  2366  	ycover[Yyr*Ymax+YymEvex] = 1
  2367  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2368  
  2369  	ycover[Ym*Ymax+Yzm] = 1
  2370  	ycover[Yzr*Ymax+Yzm] = 1
  2371  
  2372  	ycover[Yk0*Ymax+Yk] = 1
  2373  	ycover[Yknot0*Ymax+Yk] = 1
  2374  
  2375  	ycover[Yk0*Ymax+Ykm] = 1
  2376  	ycover[Yknot0*Ymax+Ykm] = 1
  2377  	ycover[Yk*Ymax+Ykm] = 1
  2378  	ycover[Ym*Ymax+Ykm] = 1
  2379  
  2380  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2381  
  2382  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2383  
  2384  	for i := 0; i < MAXREG; i++ {
  2385  		reg[i] = -1
  2386  		if i >= REG_AL && i <= REG_R15B {
  2387  			reg[i] = (i - REG_AL) & 7
  2388  			if i >= REG_SPB && i <= REG_DIB {
  2389  				regrex[i] = 0x40
  2390  			}
  2391  			if i >= REG_R8B && i <= REG_R15B {
  2392  				regrex[i] = Rxr | Rxx | Rxb
  2393  			}
  2394  		}
  2395  
  2396  		if i >= REG_AH && i <= REG_BH {
  2397  			reg[i] = 4 + ((i - REG_AH) & 7)
  2398  		}
  2399  		if i >= REG_AX && i <= REG_R15 {
  2400  			reg[i] = (i - REG_AX) & 7
  2401  			if i >= REG_R8 {
  2402  				regrex[i] = Rxr | Rxx | Rxb
  2403  			}
  2404  		}
  2405  
  2406  		if i >= REG_F0 && i <= REG_F0+7 {
  2407  			reg[i] = (i - REG_F0) & 7
  2408  		}
  2409  		if i >= REG_M0 && i <= REG_M0+7 {
  2410  			reg[i] = (i - REG_M0) & 7
  2411  		}
  2412  		if i >= REG_K0 && i <= REG_K0+7 {
  2413  			reg[i] = (i - REG_K0) & 7
  2414  		}
  2415  		if i >= REG_X0 && i <= REG_X0+15 {
  2416  			reg[i] = (i - REG_X0) & 7
  2417  			if i >= REG_X0+8 {
  2418  				regrex[i] = Rxr | Rxx | Rxb
  2419  			}
  2420  		}
  2421  		if i >= REG_X16 && i <= REG_X16+15 {
  2422  			reg[i] = (i - REG_X16) & 7
  2423  			if i >= REG_X16+8 {
  2424  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2425  			} else {
  2426  				regrex[i] = RxrEvex
  2427  			}
  2428  		}
  2429  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2430  			reg[i] = (i - REG_Y0) & 7
  2431  			if i >= REG_Y0+8 {
  2432  				regrex[i] = Rxr | Rxx | Rxb
  2433  			}
  2434  		}
  2435  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2436  			reg[i] = (i - REG_Y16) & 7
  2437  			if i >= REG_Y16+8 {
  2438  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2439  			} else {
  2440  				regrex[i] = RxrEvex
  2441  			}
  2442  		}
  2443  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2444  			reg[i] = (i - REG_Z0) & 7
  2445  			if i > REG_Z0+7 {
  2446  				regrex[i] = Rxr | Rxx | Rxb
  2447  			}
  2448  		}
  2449  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2450  			reg[i] = (i - REG_Z16) & 7
  2451  			if i >= REG_Z16+8 {
  2452  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2453  			} else {
  2454  				regrex[i] = RxrEvex
  2455  			}
  2456  		}
  2457  
  2458  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2459  			regrex[i] = Rxr
  2460  		}
  2461  	}
  2462  }
  2463  
  2464  var isAndroid = buildcfg.GOOS == "android"
  2465  
  2466  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2467  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2468  		return 0
  2469  	}
  2470  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2471  		switch a.Reg {
  2472  		case REG_CS:
  2473  			return 0x2e
  2474  
  2475  		case REG_DS:
  2476  			return 0x3e
  2477  
  2478  		case REG_ES:
  2479  			return 0x26
  2480  
  2481  		case REG_FS:
  2482  			return 0x64
  2483  
  2484  		case REG_GS:
  2485  			return 0x65
  2486  
  2487  		case REG_TLS:
  2488  			// NOTE: Systems listed here should be only systems that
  2489  			// support direct TLS references like 8(TLS) implemented as
  2490  			// direct references from FS or GS. Systems that require
  2491  			// the initial-exec model, where you load the TLS base into
  2492  			// a register and then index from that register, do not reach
  2493  			// this code and should not be listed.
  2494  			if ctxt.Arch.Family == sys.I386 {
  2495  				switch ctxt.Headtype {
  2496  				default:
  2497  					if isAndroid {
  2498  						return 0x65 // GS
  2499  					}
  2500  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2501  
  2502  				case objabi.Hdarwin,
  2503  					objabi.Hdragonfly,
  2504  					objabi.Hfreebsd,
  2505  					objabi.Hnetbsd,
  2506  					objabi.Hopenbsd:
  2507  					return 0x65 // GS
  2508  				}
  2509  			}
  2510  
  2511  			switch ctxt.Headtype {
  2512  			default:
  2513  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2514  
  2515  			case objabi.Hlinux:
  2516  				if isAndroid {
  2517  					return 0x64 // FS
  2518  				}
  2519  
  2520  				if ctxt.Flag_shared {
  2521  					log.Fatalf("unknown TLS base register for linux with -shared")
  2522  				} else {
  2523  					return 0x64 // FS
  2524  				}
  2525  
  2526  			case objabi.Hdragonfly,
  2527  				objabi.Hfreebsd,
  2528  				objabi.Hnetbsd,
  2529  				objabi.Hopenbsd,
  2530  				objabi.Hsolaris:
  2531  				return 0x64 // FS
  2532  
  2533  			case objabi.Hdarwin:
  2534  				return 0x65 // GS
  2535  			}
  2536  		}
  2537  	}
  2538  
  2539  	if ctxt.Arch.Family == sys.I386 {
  2540  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2541  			// When building for inclusion into a shared library, an instruction of the form
  2542  			//     MOVL off(CX)(TLS*1), AX
  2543  			// becomes
  2544  			//     mov %gs:off(%ecx), %eax
  2545  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2546  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2547  			// a shared library the instruction it becomes
  2548  			//     mov 0x0(%ecx), %eax
  2549  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2550  			return 0x65 // GS
  2551  		}
  2552  		return 0
  2553  	}
  2554  
  2555  	switch a.Index {
  2556  	case REG_CS:
  2557  		return 0x2e
  2558  
  2559  	case REG_DS:
  2560  		return 0x3e
  2561  
  2562  	case REG_ES:
  2563  		return 0x26
  2564  
  2565  	case REG_TLS:
  2566  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2567  			// When building for inclusion into a shared library, an instruction of the form
  2568  			//     MOV off(CX)(TLS*1), AX
  2569  			// becomes
  2570  			//     mov %fs:off(%rcx), %rax
  2571  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2572  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2573  			// a shared library the instruction does not require a prefix.
  2574  			return 0x64
  2575  		}
  2576  
  2577  	case REG_FS:
  2578  		return 0x64
  2579  
  2580  	case REG_GS:
  2581  		return 0x65
  2582  	}
  2583  
  2584  	return 0
  2585  }
  2586  
  2587  // oclassRegList returns multisource operand class for addr.
  2588  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2589  	// TODO(quasilyte): when oclass register case is refactored into
  2590  	// lookup table, use it here to get register kind more easily.
  2591  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2592  
  2593  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2594  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2595  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2596  
  2597  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2598  	low := regIndex(int16(reg0))
  2599  	high := regIndex(int16(reg1))
  2600  
  2601  	if ctxt.Arch.Family == sys.I386 {
  2602  		if low >= 8 || high >= 8 {
  2603  			return Yxxx
  2604  		}
  2605  	}
  2606  
  2607  	switch high - low {
  2608  	case 3:
  2609  		switch {
  2610  		case regIsXmm(reg0) && regIsXmm(reg1):
  2611  			return YxrEvexMulti4
  2612  		case regIsYmm(reg0) && regIsYmm(reg1):
  2613  			return YyrEvexMulti4
  2614  		case regIsZmm(reg0) && regIsZmm(reg1):
  2615  			return YzrMulti4
  2616  		default:
  2617  			return Yxxx
  2618  		}
  2619  	default:
  2620  		return Yxxx
  2621  	}
  2622  }
  2623  
  2624  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2625  // For addr that is not V-mem returns (Yxxx, false).
  2626  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2627  	switch addr.Index {
  2628  	case REG_X0 + 0,
  2629  		REG_X0 + 1,
  2630  		REG_X0 + 2,
  2631  		REG_X0 + 3,
  2632  		REG_X0 + 4,
  2633  		REG_X0 + 5,
  2634  		REG_X0 + 6,
  2635  		REG_X0 + 7:
  2636  		return Yxvm, true
  2637  	case REG_X8 + 0,
  2638  		REG_X8 + 1,
  2639  		REG_X8 + 2,
  2640  		REG_X8 + 3,
  2641  		REG_X8 + 4,
  2642  		REG_X8 + 5,
  2643  		REG_X8 + 6,
  2644  		REG_X8 + 7:
  2645  		if ctxt.Arch.Family == sys.I386 {
  2646  			return Yxxx, true
  2647  		}
  2648  		return Yxvm, true
  2649  	case REG_X16 + 0,
  2650  		REG_X16 + 1,
  2651  		REG_X16 + 2,
  2652  		REG_X16 + 3,
  2653  		REG_X16 + 4,
  2654  		REG_X16 + 5,
  2655  		REG_X16 + 6,
  2656  		REG_X16 + 7,
  2657  		REG_X16 + 8,
  2658  		REG_X16 + 9,
  2659  		REG_X16 + 10,
  2660  		REG_X16 + 11,
  2661  		REG_X16 + 12,
  2662  		REG_X16 + 13,
  2663  		REG_X16 + 14,
  2664  		REG_X16 + 15:
  2665  		if ctxt.Arch.Family == sys.I386 {
  2666  			return Yxxx, true
  2667  		}
  2668  		return YxvmEvex, true
  2669  
  2670  	case REG_Y0 + 0,
  2671  		REG_Y0 + 1,
  2672  		REG_Y0 + 2,
  2673  		REG_Y0 + 3,
  2674  		REG_Y0 + 4,
  2675  		REG_Y0 + 5,
  2676  		REG_Y0 + 6,
  2677  		REG_Y0 + 7:
  2678  		return Yyvm, true
  2679  	case REG_Y8 + 0,
  2680  		REG_Y8 + 1,
  2681  		REG_Y8 + 2,
  2682  		REG_Y8 + 3,
  2683  		REG_Y8 + 4,
  2684  		REG_Y8 + 5,
  2685  		REG_Y8 + 6,
  2686  		REG_Y8 + 7:
  2687  		if ctxt.Arch.Family == sys.I386 {
  2688  			return Yxxx, true
  2689  		}
  2690  		return Yyvm, true
  2691  	case REG_Y16 + 0,
  2692  		REG_Y16 + 1,
  2693  		REG_Y16 + 2,
  2694  		REG_Y16 + 3,
  2695  		REG_Y16 + 4,
  2696  		REG_Y16 + 5,
  2697  		REG_Y16 + 6,
  2698  		REG_Y16 + 7,
  2699  		REG_Y16 + 8,
  2700  		REG_Y16 + 9,
  2701  		REG_Y16 + 10,
  2702  		REG_Y16 + 11,
  2703  		REG_Y16 + 12,
  2704  		REG_Y16 + 13,
  2705  		REG_Y16 + 14,
  2706  		REG_Y16 + 15:
  2707  		if ctxt.Arch.Family == sys.I386 {
  2708  			return Yxxx, true
  2709  		}
  2710  		return YyvmEvex, true
  2711  
  2712  	case REG_Z0 + 0,
  2713  		REG_Z0 + 1,
  2714  		REG_Z0 + 2,
  2715  		REG_Z0 + 3,
  2716  		REG_Z0 + 4,
  2717  		REG_Z0 + 5,
  2718  		REG_Z0 + 6,
  2719  		REG_Z0 + 7:
  2720  		return Yzvm, true
  2721  	case REG_Z8 + 0,
  2722  		REG_Z8 + 1,
  2723  		REG_Z8 + 2,
  2724  		REG_Z8 + 3,
  2725  		REG_Z8 + 4,
  2726  		REG_Z8 + 5,
  2727  		REG_Z8 + 6,
  2728  		REG_Z8 + 7,
  2729  		REG_Z8 + 8,
  2730  		REG_Z8 + 9,
  2731  		REG_Z8 + 10,
  2732  		REG_Z8 + 11,
  2733  		REG_Z8 + 12,
  2734  		REG_Z8 + 13,
  2735  		REG_Z8 + 14,
  2736  		REG_Z8 + 15,
  2737  		REG_Z8 + 16,
  2738  		REG_Z8 + 17,
  2739  		REG_Z8 + 18,
  2740  		REG_Z8 + 19,
  2741  		REG_Z8 + 20,
  2742  		REG_Z8 + 21,
  2743  		REG_Z8 + 22,
  2744  		REG_Z8 + 23:
  2745  		if ctxt.Arch.Family == sys.I386 {
  2746  			return Yxxx, true
  2747  		}
  2748  		return Yzvm, true
  2749  	}
  2750  
  2751  	return Yxxx, false
  2752  }
  2753  
  2754  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2755  	switch a.Type {
  2756  	case obj.TYPE_REGLIST:
  2757  		return oclassRegList(ctxt, a)
  2758  
  2759  	case obj.TYPE_NONE:
  2760  		return Ynone
  2761  
  2762  	case obj.TYPE_BRANCH:
  2763  		return Ybr
  2764  
  2765  	case obj.TYPE_INDIR:
  2766  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2767  			return Yindir
  2768  		}
  2769  		return Yxxx
  2770  
  2771  	case obj.TYPE_MEM:
  2772  		// Pseudo registers have negative index, but SP is
  2773  		// not pseudo on x86, hence REG_SP check is not redundant.
  2774  		if a.Index == REG_SP || a.Index < 0 {
  2775  			// Can't use FP/SB/PC/SP as the index register.
  2776  			return Yxxx
  2777  		}
  2778  
  2779  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2780  			return vmem
  2781  		}
  2782  
  2783  		if ctxt.Arch.Family == sys.AMD64 {
  2784  			switch a.Name {
  2785  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2786  				// Global variables can't use index registers and their
  2787  				// base register is %rip (%rip is encoded as REG_NONE).
  2788  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2789  					return Yxxx
  2790  				}
  2791  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2792  				// These names must have a base of SP.  The old compiler
  2793  				// uses 0 for the base register. SSA uses REG_SP.
  2794  				if a.Reg != REG_SP && a.Reg != 0 {
  2795  					return Yxxx
  2796  				}
  2797  			case obj.NAME_NONE:
  2798  				// everything is ok
  2799  			default:
  2800  				// unknown name
  2801  				return Yxxx
  2802  			}
  2803  		}
  2804  		return Ym
  2805  
  2806  	case obj.TYPE_ADDR:
  2807  		switch a.Name {
  2808  		case obj.NAME_GOTREF:
  2809  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2810  			return Yxxx
  2811  
  2812  		case obj.NAME_EXTERN,
  2813  			obj.NAME_STATIC:
  2814  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2815  				return Yi32
  2816  			}
  2817  			return Yiauto // use pc-relative addressing
  2818  
  2819  		case obj.NAME_AUTO,
  2820  			obj.NAME_PARAM:
  2821  			return Yiauto
  2822  		}
  2823  
  2824  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2825  		// and got Yi32 in an earlier version of this code.
  2826  		// Keep doing that until we fix yduff etc.
  2827  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2828  			return Yi32
  2829  		}
  2830  
  2831  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2832  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2833  		}
  2834  		fallthrough
  2835  
  2836  	case obj.TYPE_CONST:
  2837  		if a.Sym != nil {
  2838  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2839  		}
  2840  
  2841  		v := a.Offset
  2842  		if ctxt.Arch.Family == sys.I386 {
  2843  			v = int64(int32(v))
  2844  		}
  2845  		switch {
  2846  		case v == 0:
  2847  			return Yi0
  2848  		case v == 1:
  2849  			return Yi1
  2850  		case v >= 0 && v <= 3:
  2851  			return Yu2
  2852  		case v >= 0 && v <= 127:
  2853  			return Yu7
  2854  		case v >= 0 && v <= 255:
  2855  			return Yu8
  2856  		case v >= -128 && v <= 127:
  2857  			return Yi8
  2858  		}
  2859  		if ctxt.Arch.Family == sys.I386 {
  2860  			return Yi32
  2861  		}
  2862  		l := int32(v)
  2863  		if int64(l) == v {
  2864  			return Ys32 // can sign extend
  2865  		}
  2866  		if v>>32 == 0 {
  2867  			return Yi32 // unsigned
  2868  		}
  2869  		return Yi64
  2870  
  2871  	case obj.TYPE_TEXTSIZE:
  2872  		return Ytextsize
  2873  	}
  2874  
  2875  	if a.Type != obj.TYPE_REG {
  2876  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2877  		return Yxxx
  2878  	}
  2879  
  2880  	switch a.Reg {
  2881  	case REG_AL:
  2882  		return Yal
  2883  
  2884  	case REG_AX:
  2885  		return Yax
  2886  
  2887  		/*
  2888  			case REG_SPB:
  2889  		*/
  2890  	case REG_BPB,
  2891  		REG_SIB,
  2892  		REG_DIB,
  2893  		REG_R8B,
  2894  		REG_R9B,
  2895  		REG_R10B,
  2896  		REG_R11B,
  2897  		REG_R12B,
  2898  		REG_R13B,
  2899  		REG_R14B,
  2900  		REG_R15B:
  2901  		if ctxt.Arch.Family == sys.I386 {
  2902  			return Yxxx
  2903  		}
  2904  		fallthrough
  2905  
  2906  	case REG_DL,
  2907  		REG_BL,
  2908  		REG_AH,
  2909  		REG_CH,
  2910  		REG_DH,
  2911  		REG_BH:
  2912  		return Yrb
  2913  
  2914  	case REG_CL:
  2915  		return Ycl
  2916  
  2917  	case REG_CX:
  2918  		return Ycx
  2919  
  2920  	case REG_DX, REG_BX:
  2921  		return Yrx
  2922  
  2923  	case REG_R8, // not really Yrl
  2924  		REG_R9,
  2925  		REG_R10,
  2926  		REG_R11,
  2927  		REG_R12,
  2928  		REG_R13,
  2929  		REG_R14,
  2930  		REG_R15:
  2931  		if ctxt.Arch.Family == sys.I386 {
  2932  			return Yxxx
  2933  		}
  2934  		fallthrough
  2935  
  2936  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2937  		if ctxt.Arch.Family == sys.I386 {
  2938  			return Yrl32
  2939  		}
  2940  		return Yrl
  2941  
  2942  	case REG_F0 + 0:
  2943  		return Yf0
  2944  
  2945  	case REG_F0 + 1,
  2946  		REG_F0 + 2,
  2947  		REG_F0 + 3,
  2948  		REG_F0 + 4,
  2949  		REG_F0 + 5,
  2950  		REG_F0 + 6,
  2951  		REG_F0 + 7:
  2952  		return Yrf
  2953  
  2954  	case REG_M0 + 0,
  2955  		REG_M0 + 1,
  2956  		REG_M0 + 2,
  2957  		REG_M0 + 3,
  2958  		REG_M0 + 4,
  2959  		REG_M0 + 5,
  2960  		REG_M0 + 6,
  2961  		REG_M0 + 7:
  2962  		return Ymr
  2963  
  2964  	case REG_X0:
  2965  		return Yxr0
  2966  
  2967  	case REG_X0 + 1,
  2968  		REG_X0 + 2,
  2969  		REG_X0 + 3,
  2970  		REG_X0 + 4,
  2971  		REG_X0 + 5,
  2972  		REG_X0 + 6,
  2973  		REG_X0 + 7,
  2974  		REG_X0 + 8,
  2975  		REG_X0 + 9,
  2976  		REG_X0 + 10,
  2977  		REG_X0 + 11,
  2978  		REG_X0 + 12,
  2979  		REG_X0 + 13,
  2980  		REG_X0 + 14,
  2981  		REG_X0 + 15:
  2982  		return Yxr
  2983  
  2984  	case REG_X0 + 16,
  2985  		REG_X0 + 17,
  2986  		REG_X0 + 18,
  2987  		REG_X0 + 19,
  2988  		REG_X0 + 20,
  2989  		REG_X0 + 21,
  2990  		REG_X0 + 22,
  2991  		REG_X0 + 23,
  2992  		REG_X0 + 24,
  2993  		REG_X0 + 25,
  2994  		REG_X0 + 26,
  2995  		REG_X0 + 27,
  2996  		REG_X0 + 28,
  2997  		REG_X0 + 29,
  2998  		REG_X0 + 30,
  2999  		REG_X0 + 31:
  3000  		return YxrEvex
  3001  
  3002  	case REG_Y0 + 0,
  3003  		REG_Y0 + 1,
  3004  		REG_Y0 + 2,
  3005  		REG_Y0 + 3,
  3006  		REG_Y0 + 4,
  3007  		REG_Y0 + 5,
  3008  		REG_Y0 + 6,
  3009  		REG_Y0 + 7,
  3010  		REG_Y0 + 8,
  3011  		REG_Y0 + 9,
  3012  		REG_Y0 + 10,
  3013  		REG_Y0 + 11,
  3014  		REG_Y0 + 12,
  3015  		REG_Y0 + 13,
  3016  		REG_Y0 + 14,
  3017  		REG_Y0 + 15:
  3018  		return Yyr
  3019  
  3020  	case REG_Y0 + 16,
  3021  		REG_Y0 + 17,
  3022  		REG_Y0 + 18,
  3023  		REG_Y0 + 19,
  3024  		REG_Y0 + 20,
  3025  		REG_Y0 + 21,
  3026  		REG_Y0 + 22,
  3027  		REG_Y0 + 23,
  3028  		REG_Y0 + 24,
  3029  		REG_Y0 + 25,
  3030  		REG_Y0 + 26,
  3031  		REG_Y0 + 27,
  3032  		REG_Y0 + 28,
  3033  		REG_Y0 + 29,
  3034  		REG_Y0 + 30,
  3035  		REG_Y0 + 31:
  3036  		return YyrEvex
  3037  
  3038  	case REG_Z0 + 0,
  3039  		REG_Z0 + 1,
  3040  		REG_Z0 + 2,
  3041  		REG_Z0 + 3,
  3042  		REG_Z0 + 4,
  3043  		REG_Z0 + 5,
  3044  		REG_Z0 + 6,
  3045  		REG_Z0 + 7:
  3046  		return Yzr
  3047  
  3048  	case REG_Z0 + 8,
  3049  		REG_Z0 + 9,
  3050  		REG_Z0 + 10,
  3051  		REG_Z0 + 11,
  3052  		REG_Z0 + 12,
  3053  		REG_Z0 + 13,
  3054  		REG_Z0 + 14,
  3055  		REG_Z0 + 15,
  3056  		REG_Z0 + 16,
  3057  		REG_Z0 + 17,
  3058  		REG_Z0 + 18,
  3059  		REG_Z0 + 19,
  3060  		REG_Z0 + 20,
  3061  		REG_Z0 + 21,
  3062  		REG_Z0 + 22,
  3063  		REG_Z0 + 23,
  3064  		REG_Z0 + 24,
  3065  		REG_Z0 + 25,
  3066  		REG_Z0 + 26,
  3067  		REG_Z0 + 27,
  3068  		REG_Z0 + 28,
  3069  		REG_Z0 + 29,
  3070  		REG_Z0 + 30,
  3071  		REG_Z0 + 31:
  3072  		if ctxt.Arch.Family == sys.I386 {
  3073  			return Yxxx
  3074  		}
  3075  		return Yzr
  3076  
  3077  	case REG_K0:
  3078  		return Yk0
  3079  
  3080  	case REG_K0 + 1,
  3081  		REG_K0 + 2,
  3082  		REG_K0 + 3,
  3083  		REG_K0 + 4,
  3084  		REG_K0 + 5,
  3085  		REG_K0 + 6,
  3086  		REG_K0 + 7:
  3087  		return Yknot0
  3088  
  3089  	case REG_CS:
  3090  		return Ycs
  3091  	case REG_SS:
  3092  		return Yss
  3093  	case REG_DS:
  3094  		return Yds
  3095  	case REG_ES:
  3096  		return Yes
  3097  	case REG_FS:
  3098  		return Yfs
  3099  	case REG_GS:
  3100  		return Ygs
  3101  	case REG_TLS:
  3102  		return Ytls
  3103  
  3104  	case REG_GDTR:
  3105  		return Ygdtr
  3106  	case REG_IDTR:
  3107  		return Yidtr
  3108  	case REG_LDTR:
  3109  		return Yldtr
  3110  	case REG_MSW:
  3111  		return Ymsw
  3112  	case REG_TASK:
  3113  		return Ytask
  3114  
  3115  	case REG_CR + 0:
  3116  		return Ycr0
  3117  	case REG_CR + 1:
  3118  		return Ycr1
  3119  	case REG_CR + 2:
  3120  		return Ycr2
  3121  	case REG_CR + 3:
  3122  		return Ycr3
  3123  	case REG_CR + 4:
  3124  		return Ycr4
  3125  	case REG_CR + 5:
  3126  		return Ycr5
  3127  	case REG_CR + 6:
  3128  		return Ycr6
  3129  	case REG_CR + 7:
  3130  		return Ycr7
  3131  	case REG_CR + 8:
  3132  		return Ycr8
  3133  
  3134  	case REG_DR + 0:
  3135  		return Ydr0
  3136  	case REG_DR + 1:
  3137  		return Ydr1
  3138  	case REG_DR + 2:
  3139  		return Ydr2
  3140  	case REG_DR + 3:
  3141  		return Ydr3
  3142  	case REG_DR + 4:
  3143  		return Ydr4
  3144  	case REG_DR + 5:
  3145  		return Ydr5
  3146  	case REG_DR + 6:
  3147  		return Ydr6
  3148  	case REG_DR + 7:
  3149  		return Ydr7
  3150  
  3151  	case REG_TR + 0:
  3152  		return Ytr0
  3153  	case REG_TR + 1:
  3154  		return Ytr1
  3155  	case REG_TR + 2:
  3156  		return Ytr2
  3157  	case REG_TR + 3:
  3158  		return Ytr3
  3159  	case REG_TR + 4:
  3160  		return Ytr4
  3161  	case REG_TR + 5:
  3162  		return Ytr5
  3163  	case REG_TR + 6:
  3164  		return Ytr6
  3165  	case REG_TR + 7:
  3166  		return Ytr7
  3167  	}
  3168  
  3169  	return Yxxx
  3170  }
  3171  
  3172  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3173  // and hold assembly state.
  3174  type AsmBuf struct {
  3175  	buf      [100]byte
  3176  	off      int
  3177  	rexflag  int
  3178  	vexflag  bool // Per inst: true for VEX-encoded
  3179  	evexflag bool // Per inst: true for EVEX-encoded
  3180  	rep      bool
  3181  	repn     bool
  3182  	lock     bool
  3183  
  3184  	evex evexBits // Initialized when evexflag is true
  3185  }
  3186  
  3187  // Put1 appends one byte to the end of the buffer.
  3188  func (ab *AsmBuf) Put1(x byte) {
  3189  	ab.buf[ab.off] = x
  3190  	ab.off++
  3191  }
  3192  
  3193  // Put2 appends two bytes to the end of the buffer.
  3194  func (ab *AsmBuf) Put2(x, y byte) {
  3195  	ab.buf[ab.off+0] = x
  3196  	ab.buf[ab.off+1] = y
  3197  	ab.off += 2
  3198  }
  3199  
  3200  // Put3 appends three bytes to the end of the buffer.
  3201  func (ab *AsmBuf) Put3(x, y, z byte) {
  3202  	ab.buf[ab.off+0] = x
  3203  	ab.buf[ab.off+1] = y
  3204  	ab.buf[ab.off+2] = z
  3205  	ab.off += 3
  3206  }
  3207  
  3208  // Put4 appends four bytes to the end of the buffer.
  3209  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3210  	ab.buf[ab.off+0] = x
  3211  	ab.buf[ab.off+1] = y
  3212  	ab.buf[ab.off+2] = z
  3213  	ab.buf[ab.off+3] = w
  3214  	ab.off += 4
  3215  }
  3216  
  3217  // PutInt16 writes v into the buffer using little-endian encoding.
  3218  func (ab *AsmBuf) PutInt16(v int16) {
  3219  	ab.buf[ab.off+0] = byte(v)
  3220  	ab.buf[ab.off+1] = byte(v >> 8)
  3221  	ab.off += 2
  3222  }
  3223  
  3224  // PutInt32 writes v into the buffer using little-endian encoding.
  3225  func (ab *AsmBuf) PutInt32(v int32) {
  3226  	ab.buf[ab.off+0] = byte(v)
  3227  	ab.buf[ab.off+1] = byte(v >> 8)
  3228  	ab.buf[ab.off+2] = byte(v >> 16)
  3229  	ab.buf[ab.off+3] = byte(v >> 24)
  3230  	ab.off += 4
  3231  }
  3232  
  3233  // PutInt64 writes v into the buffer using little-endian encoding.
  3234  func (ab *AsmBuf) PutInt64(v int64) {
  3235  	ab.buf[ab.off+0] = byte(v)
  3236  	ab.buf[ab.off+1] = byte(v >> 8)
  3237  	ab.buf[ab.off+2] = byte(v >> 16)
  3238  	ab.buf[ab.off+3] = byte(v >> 24)
  3239  	ab.buf[ab.off+4] = byte(v >> 32)
  3240  	ab.buf[ab.off+5] = byte(v >> 40)
  3241  	ab.buf[ab.off+6] = byte(v >> 48)
  3242  	ab.buf[ab.off+7] = byte(v >> 56)
  3243  	ab.off += 8
  3244  }
  3245  
  3246  // Put copies b into the buffer.
  3247  func (ab *AsmBuf) Put(b []byte) {
  3248  	copy(ab.buf[ab.off:], b)
  3249  	ab.off += len(b)
  3250  }
  3251  
  3252  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3253  // starting at specified offset (e.g. z counter value).
  3254  // Trailing 0 is not written.
  3255  //
  3256  // Intended to be used for literal Z cases.
  3257  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3258  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3259  	for int(op[offset]) != 0 {
  3260  		ab.Put1(byte(op[offset]))
  3261  		offset++
  3262  	}
  3263  }
  3264  
  3265  // Insert inserts b at offset i.
  3266  func (ab *AsmBuf) Insert(i int, b byte) {
  3267  	ab.off++
  3268  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3269  	ab.buf[i] = b
  3270  }
  3271  
  3272  // Last returns the byte at the end of the buffer.
  3273  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3274  
  3275  // Len returns the length of the buffer.
  3276  func (ab *AsmBuf) Len() int { return ab.off }
  3277  
  3278  // Bytes returns the contents of the buffer.
  3279  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3280  
  3281  // Reset empties the buffer.
  3282  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3283  
  3284  // At returns the byte at offset i.
  3285  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3286  
  3287  // asmidx emits SIB byte.
  3288  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3289  	var i int
  3290  
  3291  	// X/Y index register is used in VSIB.
  3292  	switch index {
  3293  	default:
  3294  		goto bad
  3295  
  3296  	case REG_NONE:
  3297  		i = 4 << 3
  3298  		goto bas
  3299  
  3300  	case REG_R8,
  3301  		REG_R9,
  3302  		REG_R10,
  3303  		REG_R11,
  3304  		REG_R12,
  3305  		REG_R13,
  3306  		REG_R14,
  3307  		REG_R15,
  3308  		REG_X8,
  3309  		REG_X9,
  3310  		REG_X10,
  3311  		REG_X11,
  3312  		REG_X12,
  3313  		REG_X13,
  3314  		REG_X14,
  3315  		REG_X15,
  3316  		REG_X16,
  3317  		REG_X17,
  3318  		REG_X18,
  3319  		REG_X19,
  3320  		REG_X20,
  3321  		REG_X21,
  3322  		REG_X22,
  3323  		REG_X23,
  3324  		REG_X24,
  3325  		REG_X25,
  3326  		REG_X26,
  3327  		REG_X27,
  3328  		REG_X28,
  3329  		REG_X29,
  3330  		REG_X30,
  3331  		REG_X31,
  3332  		REG_Y8,
  3333  		REG_Y9,
  3334  		REG_Y10,
  3335  		REG_Y11,
  3336  		REG_Y12,
  3337  		REG_Y13,
  3338  		REG_Y14,
  3339  		REG_Y15,
  3340  		REG_Y16,
  3341  		REG_Y17,
  3342  		REG_Y18,
  3343  		REG_Y19,
  3344  		REG_Y20,
  3345  		REG_Y21,
  3346  		REG_Y22,
  3347  		REG_Y23,
  3348  		REG_Y24,
  3349  		REG_Y25,
  3350  		REG_Y26,
  3351  		REG_Y27,
  3352  		REG_Y28,
  3353  		REG_Y29,
  3354  		REG_Y30,
  3355  		REG_Y31,
  3356  		REG_Z8,
  3357  		REG_Z9,
  3358  		REG_Z10,
  3359  		REG_Z11,
  3360  		REG_Z12,
  3361  		REG_Z13,
  3362  		REG_Z14,
  3363  		REG_Z15,
  3364  		REG_Z16,
  3365  		REG_Z17,
  3366  		REG_Z18,
  3367  		REG_Z19,
  3368  		REG_Z20,
  3369  		REG_Z21,
  3370  		REG_Z22,
  3371  		REG_Z23,
  3372  		REG_Z24,
  3373  		REG_Z25,
  3374  		REG_Z26,
  3375  		REG_Z27,
  3376  		REG_Z28,
  3377  		REG_Z29,
  3378  		REG_Z30,
  3379  		REG_Z31:
  3380  		if ctxt.Arch.Family == sys.I386 {
  3381  			goto bad
  3382  		}
  3383  		fallthrough
  3384  
  3385  	case REG_AX,
  3386  		REG_CX,
  3387  		REG_DX,
  3388  		REG_BX,
  3389  		REG_BP,
  3390  		REG_SI,
  3391  		REG_DI,
  3392  		REG_X0,
  3393  		REG_X1,
  3394  		REG_X2,
  3395  		REG_X3,
  3396  		REG_X4,
  3397  		REG_X5,
  3398  		REG_X6,
  3399  		REG_X7,
  3400  		REG_Y0,
  3401  		REG_Y1,
  3402  		REG_Y2,
  3403  		REG_Y3,
  3404  		REG_Y4,
  3405  		REG_Y5,
  3406  		REG_Y6,
  3407  		REG_Y7,
  3408  		REG_Z0,
  3409  		REG_Z1,
  3410  		REG_Z2,
  3411  		REG_Z3,
  3412  		REG_Z4,
  3413  		REG_Z5,
  3414  		REG_Z6,
  3415  		REG_Z7:
  3416  		i = reg[index] << 3
  3417  	}
  3418  
  3419  	switch scale {
  3420  	default:
  3421  		goto bad
  3422  
  3423  	case 1:
  3424  		break
  3425  
  3426  	case 2:
  3427  		i |= 1 << 6
  3428  
  3429  	case 4:
  3430  		i |= 2 << 6
  3431  
  3432  	case 8:
  3433  		i |= 3 << 6
  3434  	}
  3435  
  3436  bas:
  3437  	switch base {
  3438  	default:
  3439  		goto bad
  3440  
  3441  	case REG_NONE: // must be mod=00
  3442  		i |= 5
  3443  
  3444  	case REG_R8,
  3445  		REG_R9,
  3446  		REG_R10,
  3447  		REG_R11,
  3448  		REG_R12,
  3449  		REG_R13,
  3450  		REG_R14,
  3451  		REG_R15:
  3452  		if ctxt.Arch.Family == sys.I386 {
  3453  			goto bad
  3454  		}
  3455  		fallthrough
  3456  
  3457  	case REG_AX,
  3458  		REG_CX,
  3459  		REG_DX,
  3460  		REG_BX,
  3461  		REG_SP,
  3462  		REG_BP,
  3463  		REG_SI,
  3464  		REG_DI:
  3465  		i |= reg[base]
  3466  	}
  3467  
  3468  	ab.Put1(byte(i))
  3469  	return
  3470  
  3471  bad:
  3472  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3473  	ab.Put1(0)
  3474  }
  3475  
  3476  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3477  	var rel obj.Reloc
  3478  
  3479  	v := vaddr(ctxt, p, a, &rel)
  3480  	if rel.Siz != 0 {
  3481  		if rel.Siz != 4 {
  3482  			ctxt.Diag("bad reloc")
  3483  		}
  3484  		r := obj.Addrel(cursym)
  3485  		*r = rel
  3486  		r.Off = int32(p.Pc + int64(ab.Len()))
  3487  	}
  3488  
  3489  	ab.PutInt32(int32(v))
  3490  }
  3491  
  3492  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3493  	if r != nil {
  3494  		*r = obj.Reloc{}
  3495  	}
  3496  
  3497  	switch a.Name {
  3498  	case obj.NAME_STATIC,
  3499  		obj.NAME_GOTREF,
  3500  		obj.NAME_EXTERN:
  3501  		s := a.Sym
  3502  		if r == nil {
  3503  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3504  			log.Fatalf("reloc")
  3505  		}
  3506  
  3507  		if a.Name == obj.NAME_GOTREF {
  3508  			r.Siz = 4
  3509  			r.Type = objabi.R_GOTPCREL
  3510  		} else if useAbs(ctxt, s) {
  3511  			r.Siz = 4
  3512  			r.Type = objabi.R_ADDR
  3513  		} else {
  3514  			r.Siz = 4
  3515  			r.Type = objabi.R_PCREL
  3516  		}
  3517  
  3518  		r.Off = -1 // caller must fill in
  3519  		r.Sym = s
  3520  		r.Add = a.Offset
  3521  
  3522  		return 0
  3523  	}
  3524  
  3525  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3526  		if r == nil {
  3527  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3528  			log.Fatalf("reloc")
  3529  		}
  3530  
  3531  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3532  			r.Type = objabi.R_TLS_LE
  3533  			r.Siz = 4
  3534  			r.Off = -1 // caller must fill in
  3535  			r.Add = a.Offset
  3536  		}
  3537  		return 0
  3538  	}
  3539  
  3540  	return a.Offset
  3541  }
  3542  
  3543  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3544  	var base int
  3545  	var rel obj.Reloc
  3546  
  3547  	rex &= 0x40 | Rxr
  3548  	if a.Offset != int64(int32(a.Offset)) {
  3549  		// The rules are slightly different for 386 and AMD64,
  3550  		// mostly for historical reasons. We may unify them later,
  3551  		// but it must be discussed beforehand.
  3552  		//
  3553  		// For 64bit mode only LEAL is allowed to overflow.
  3554  		// It's how https://golang.org/cl/59630 made it.
  3555  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3556  		//
  3557  		// For 32bit mode rules are more permissive.
  3558  		// If offset fits uint32, it's permitted.
  3559  		// This is allowed for assembly that wants to use 32-bit hex
  3560  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3561  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3562  			(ctxt.Arch.Family != sys.AMD64 &&
  3563  				int64(uint32(a.Offset)) == a.Offset &&
  3564  				ab.rexflag&Rxw == 0)
  3565  		if !overflowOK {
  3566  			ctxt.Diag("offset too large in %s", p)
  3567  		}
  3568  	}
  3569  	v := int32(a.Offset)
  3570  	rel.Siz = 0
  3571  
  3572  	switch a.Type {
  3573  	case obj.TYPE_ADDR:
  3574  		if a.Name == obj.NAME_NONE {
  3575  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3576  		}
  3577  		if a.Index == REG_TLS {
  3578  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3579  		}
  3580  		goto bad
  3581  
  3582  	case obj.TYPE_REG:
  3583  		const regFirst = REG_AL
  3584  		const regLast = REG_Z31
  3585  		if a.Reg < regFirst || regLast < a.Reg {
  3586  			goto bad
  3587  		}
  3588  		if v != 0 {
  3589  			goto bad
  3590  		}
  3591  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3592  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3593  		return
  3594  	}
  3595  
  3596  	if a.Type != obj.TYPE_MEM {
  3597  		goto bad
  3598  	}
  3599  
  3600  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3601  		base := int(a.Reg)
  3602  		switch a.Name {
  3603  		case obj.NAME_EXTERN,
  3604  			obj.NAME_GOTREF,
  3605  			obj.NAME_STATIC:
  3606  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3607  				goto bad
  3608  			}
  3609  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3610  				// The base register has already been set. It holds the PC
  3611  				// of this instruction returned by a PC-reading thunk.
  3612  				// See obj6.go:rewriteToPcrel.
  3613  			} else {
  3614  				base = REG_NONE
  3615  			}
  3616  			v = int32(vaddr(ctxt, p, a, &rel))
  3617  
  3618  		case obj.NAME_AUTO,
  3619  			obj.NAME_PARAM:
  3620  			base = REG_SP
  3621  		}
  3622  
  3623  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3624  		if base == REG_NONE {
  3625  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3626  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3627  			goto putrelv
  3628  		}
  3629  
  3630  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3631  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3632  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3633  			return
  3634  		}
  3635  
  3636  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3637  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3638  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3639  			ab.Put1(disp8)
  3640  			return
  3641  		}
  3642  
  3643  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3644  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3645  		goto putrelv
  3646  	}
  3647  
  3648  	base = int(a.Reg)
  3649  	switch a.Name {
  3650  	case obj.NAME_STATIC,
  3651  		obj.NAME_GOTREF,
  3652  		obj.NAME_EXTERN:
  3653  		if a.Sym == nil {
  3654  			ctxt.Diag("bad addr: %v", p)
  3655  		}
  3656  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3657  			// The base register has already been set. It holds the PC
  3658  			// of this instruction returned by a PC-reading thunk.
  3659  			// See obj6.go:rewriteToPcrel.
  3660  		} else {
  3661  			base = REG_NONE
  3662  		}
  3663  		v = int32(vaddr(ctxt, p, a, &rel))
  3664  
  3665  	case obj.NAME_AUTO,
  3666  		obj.NAME_PARAM:
  3667  		base = REG_SP
  3668  	}
  3669  
  3670  	if base == REG_TLS {
  3671  		v = int32(vaddr(ctxt, p, a, &rel))
  3672  	}
  3673  
  3674  	ab.rexflag |= regrex[base]&Rxb | rex
  3675  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3676  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3677  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3678  				ctxt.Diag("%v has offset against gotref", p)
  3679  			}
  3680  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3681  			goto putrelv
  3682  		}
  3683  
  3684  		// temporary
  3685  		ab.Put2(
  3686  			byte(0<<6|4<<0|r<<3), // sib present
  3687  			0<<6|4<<3|5<<0,       // DS:d32
  3688  		)
  3689  		goto putrelv
  3690  	}
  3691  
  3692  	if base == REG_SP || base == REG_R12 {
  3693  		if v == 0 {
  3694  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3695  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3696  			return
  3697  		}
  3698  
  3699  		if disp8, ok := toDisp8(v, p, ab); ok {
  3700  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3701  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3702  			ab.Put1(disp8)
  3703  			return
  3704  		}
  3705  
  3706  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3707  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3708  		goto putrelv
  3709  	}
  3710  
  3711  	if REG_AX <= base && base <= REG_R15 {
  3712  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3713  			rel = obj.Reloc{}
  3714  			rel.Type = objabi.R_TLS_LE
  3715  			rel.Siz = 4
  3716  			rel.Sym = nil
  3717  			rel.Add = int64(v)
  3718  			v = 0
  3719  		}
  3720  
  3721  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3722  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3723  			return
  3724  		}
  3725  
  3726  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3727  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3728  			return
  3729  		}
  3730  
  3731  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3732  		goto putrelv
  3733  	}
  3734  
  3735  	goto bad
  3736  
  3737  putrelv:
  3738  	if rel.Siz != 0 {
  3739  		if rel.Siz != 4 {
  3740  			ctxt.Diag("bad rel")
  3741  			goto bad
  3742  		}
  3743  
  3744  		r := obj.Addrel(cursym)
  3745  		*r = rel
  3746  		r.Off = int32(p.Pc + int64(ab.Len()))
  3747  	}
  3748  
  3749  	ab.PutInt32(v)
  3750  	return
  3751  
  3752  bad:
  3753  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3754  }
  3755  
  3756  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3757  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3758  }
  3759  
  3760  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3761  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3762  }
  3763  
  3764  func bytereg(a *obj.Addr, t *uint8) {
  3765  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3766  		a.Reg += REG_AL - REG_AX
  3767  		*t = 0
  3768  	}
  3769  }
  3770  
  3771  func unbytereg(a *obj.Addr, t *uint8) {
  3772  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3773  		a.Reg += REG_AX - REG_AL
  3774  		*t = 0
  3775  	}
  3776  }
  3777  
  3778  const (
  3779  	movLit uint8 = iota // Like Zlit
  3780  	movRegMem
  3781  	movMemReg
  3782  	movRegMem2op
  3783  	movMemReg2op
  3784  	movFullPtr // Load full pointer, trash heap (unsupported)
  3785  	movDoubleShift
  3786  	movTLSReg
  3787  )
  3788  
  3789  var ymovtab = []movtab{
  3790  	// push
  3791  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3792  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3793  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3794  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3795  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3796  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3797  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3798  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3799  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3800  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3801  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3802  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3803  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3804  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3805  
  3806  	// pop
  3807  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3808  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3809  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3810  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3811  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3812  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3813  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3814  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3815  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3816  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3817  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3818  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3819  
  3820  	// mov seg
  3821  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3822  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3823  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3824  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3825  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3826  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3827  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3828  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3829  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3830  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3831  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3832  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3833  
  3834  	// mov cr
  3835  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3836  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3837  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3838  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3839  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3840  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3841  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3842  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3843  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3844  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3845  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3846  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3847  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3848  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3849  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3850  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3851  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3852  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3853  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3854  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3855  
  3856  	// mov dr
  3857  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3858  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3859  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3860  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3861  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3862  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3863  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3864  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3865  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3866  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3867  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3868  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3869  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3870  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3871  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3872  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3873  
  3874  	// mov tr
  3875  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3876  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3877  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3878  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3879  
  3880  	// lgdt, sgdt, lidt, sidt
  3881  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3882  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3883  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3884  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3885  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3886  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3887  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3888  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3889  
  3890  	// lldt, sldt
  3891  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3892  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3893  
  3894  	// lmsw, smsw
  3895  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3896  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3897  
  3898  	// ltr, str
  3899  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3900  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3901  
  3902  	/* load full pointer - unsupported
  3903  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3904  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3905  	*/
  3906  
  3907  	// double shift
  3908  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3909  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3910  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3911  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3912  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3913  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3914  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3915  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3916  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3917  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3918  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3919  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3920  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3921  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3922  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3923  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3924  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3925  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3926  
  3927  	// load TLS base
  3928  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3929  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3930  	{0, 0, 0, 0, 0, [4]uint8{}},
  3931  }
  3932  
  3933  func isax(a *obj.Addr) bool {
  3934  	switch a.Reg {
  3935  	case REG_AX, REG_AL, REG_AH:
  3936  		return true
  3937  	}
  3938  
  3939  	if a.Index == REG_AX {
  3940  		return true
  3941  	}
  3942  	return false
  3943  }
  3944  
  3945  func subreg(p *obj.Prog, from int, to int) {
  3946  	if false { /* debug['Q'] */
  3947  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3948  	}
  3949  
  3950  	if int(p.From.Reg) == from {
  3951  		p.From.Reg = int16(to)
  3952  		p.Ft = 0
  3953  	}
  3954  
  3955  	if int(p.To.Reg) == from {
  3956  		p.To.Reg = int16(to)
  3957  		p.Tt = 0
  3958  	}
  3959  
  3960  	if int(p.From.Index) == from {
  3961  		p.From.Index = int16(to)
  3962  		p.Ft = 0
  3963  	}
  3964  
  3965  	if int(p.To.Index) == from {
  3966  		p.To.Index = int16(to)
  3967  		p.Tt = 0
  3968  	}
  3969  
  3970  	if false { /* debug['Q'] */
  3971  		fmt.Printf("%v\n", p)
  3972  	}
  3973  }
  3974  
  3975  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3976  	switch op {
  3977  	case Pm, Pe, Pf2, Pf3:
  3978  		if osize != 1 {
  3979  			if op != Pm {
  3980  				ab.Put1(byte(op))
  3981  			}
  3982  			ab.Put1(Pm)
  3983  			z++
  3984  			op = int(o.op[z])
  3985  			break
  3986  		}
  3987  		fallthrough
  3988  
  3989  	default:
  3990  		if ab.Len() == 0 || ab.Last() != Pm {
  3991  			ab.Put1(Pm)
  3992  		}
  3993  	}
  3994  
  3995  	ab.Put1(byte(op))
  3996  	return z
  3997  }
  3998  
  3999  var bpduff1 = []byte{
  4000  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4001  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4002  }
  4003  
  4004  var bpduff2 = []byte{
  4005  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4006  }
  4007  
  4008  // asmevex emits EVEX pregis and opcode byte.
  4009  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4010  // K-masking register.
  4011  //
  4012  // Expects asmbuf.evex to be properly initialized.
  4013  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4014  	ab.evexflag = true
  4015  	evex := ab.evex
  4016  
  4017  	rexR := byte(1)
  4018  	evexR := byte(1)
  4019  	rexX := byte(1)
  4020  	rexB := byte(1)
  4021  	if r != nil {
  4022  		if regrex[r.Reg]&Rxr != 0 {
  4023  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4024  		}
  4025  		if regrex[r.Reg]&RxrEvex != 0 {
  4026  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4027  		}
  4028  	}
  4029  	if rm != nil {
  4030  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4031  			rexX = 0
  4032  		} else if regrex[rm.Index]&Rxx != 0 {
  4033  			rexX = 0
  4034  		}
  4035  		if regrex[rm.Reg]&Rxb != 0 {
  4036  			rexB = 0
  4037  		}
  4038  	}
  4039  	// P0 = [R][X][B][R'][00][mm]
  4040  	p0 := (rexR << 7) |
  4041  		(rexX << 6) |
  4042  		(rexB << 5) |
  4043  		(evexR << 4) |
  4044  		(0 << 2) |
  4045  		(evex.M() << 0)
  4046  
  4047  	vexV := byte(0)
  4048  	if v != nil {
  4049  		// 4bit-wide reg index.
  4050  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4051  	}
  4052  	vexV ^= 0x0F
  4053  	// P1 = [W][vvvv][1][pp]
  4054  	p1 := (evex.W() << 7) |
  4055  		(vexV << 3) |
  4056  		(1 << 2) |
  4057  		(evex.P() << 0)
  4058  
  4059  	suffix := evexSuffixMap[p.Scond]
  4060  	evexZ := byte(0)
  4061  	evexLL := evex.L()
  4062  	evexB := byte(0)
  4063  	evexV := byte(1)
  4064  	evexA := byte(0)
  4065  	if suffix.zeroing {
  4066  		if !evex.ZeroingEnabled() {
  4067  			ctxt.Diag("unsupported zeroing: %v", p)
  4068  		}
  4069  		evexZ = 1
  4070  	}
  4071  	switch {
  4072  	case suffix.rounding != rcUnset:
  4073  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4074  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4075  		} else if !evex.RoundingEnabled() {
  4076  			ctxt.Diag("unsupported rounding: %v", p)
  4077  		}
  4078  		evexB = 1
  4079  		evexLL = suffix.rounding
  4080  	case suffix.broadcast:
  4081  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4082  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4083  		} else if !evex.BroadcastEnabled() {
  4084  			ctxt.Diag("unsupported broadcast: %v", p)
  4085  		}
  4086  		evexB = 1
  4087  	case suffix.sae:
  4088  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4089  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4090  		} else if !evex.SaeEnabled() {
  4091  			ctxt.Diag("unsupported SAE: %v", p)
  4092  		}
  4093  		evexB = 1
  4094  	}
  4095  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4096  		evexV = 0
  4097  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4098  		evexV = 0 // VSR selector 5th bit.
  4099  	}
  4100  	if k != nil {
  4101  		evexA = byte(reg[k.Reg])
  4102  	}
  4103  	// P2 = [z][L'L][b][V'][aaa]
  4104  	p2 := (evexZ << 7) |
  4105  		(evexLL << 5) |
  4106  		(evexB << 4) |
  4107  		(evexV << 3) |
  4108  		(evexA << 0)
  4109  
  4110  	const evexEscapeByte = 0x62
  4111  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4112  	ab.Put1(evex.opcode)
  4113  }
  4114  
  4115  // Emit VEX prefix and opcode byte.
  4116  // The three addresses are the r/m, vvvv, and reg fields.
  4117  // The reg and rm arguments appear in the same order as the
  4118  // arguments to asmand, which typically follows the call to asmvex.
  4119  // The final two arguments are the VEX prefix (see encoding above)
  4120  // and the opcode byte.
  4121  // For details about vex prefix see:
  4122  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4123  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4124  	ab.vexflag = true
  4125  	rexR := 0
  4126  	if r != nil {
  4127  		rexR = regrex[r.Reg] & Rxr
  4128  	}
  4129  	rexB := 0
  4130  	rexX := 0
  4131  	if rm != nil {
  4132  		rexB = regrex[rm.Reg] & Rxb
  4133  		rexX = regrex[rm.Index] & Rxx
  4134  	}
  4135  	vexM := (vex >> 3) & 0x7
  4136  	vexWLP := vex & 0x87
  4137  	vexV := byte(0)
  4138  	if v != nil {
  4139  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4140  	}
  4141  	vexV ^= 0xF
  4142  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4143  		// Can use 2-byte encoding.
  4144  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4145  	} else {
  4146  		// Must use 3-byte encoding.
  4147  		ab.Put3(0xc4,
  4148  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4149  			vexV<<3|vexWLP,
  4150  		)
  4151  	}
  4152  	ab.Put1(opcode)
  4153  }
  4154  
  4155  // regIndex returns register index that fits in 5 bits.
  4156  //
  4157  //	R         : 3 bit | legacy instructions     | N/A
  4158  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4159  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4160  //
  4161  // Examples:
  4162  //	REG_Z30 => 30
  4163  //	REG_X15 => 15
  4164  //	REG_R9  => 9
  4165  //	REG_AX  => 0
  4166  //
  4167  func regIndex(r int16) int {
  4168  	lower3bits := reg[r]
  4169  	high4bit := regrex[r] & Rxr << 1
  4170  	high5bit := regrex[r] & RxrEvex << 0
  4171  	return lower3bits | high4bit | high5bit
  4172  }
  4173  
  4174  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4175  // Reports errors via ctxt.
  4176  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4177  	// If any pair of the index, mask, or destination registers
  4178  	// are the same, illegal instruction trap (#UD) is triggered.
  4179  	index := regIndex(p.GetFrom3().Index)
  4180  	mask := regIndex(p.From.Reg)
  4181  	dest := regIndex(p.To.Reg)
  4182  	if dest == mask || dest == index || mask == index {
  4183  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4184  		return false
  4185  	}
  4186  
  4187  	return true
  4188  }
  4189  
  4190  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4191  // Reports errors via ctxt.
  4192  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4193  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4194  	// register is the same as index vector in VSIB.
  4195  	index := regIndex(p.From.Index)
  4196  	dest := regIndex(p.To.Reg)
  4197  	if dest == index {
  4198  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4199  		return false
  4200  	}
  4201  
  4202  	return true
  4203  }
  4204  
  4205  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4206  	o := opindex[p.As&obj.AMask]
  4207  
  4208  	if o == nil {
  4209  		ctxt.Diag("asmins: missing op %v", p)
  4210  		return
  4211  	}
  4212  
  4213  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4214  		ab.Put1(byte(pre))
  4215  	}
  4216  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4217  		ab.Put1(byte(pre))
  4218  	}
  4219  
  4220  	// Checks to warn about instruction/arguments combinations that
  4221  	// will unconditionally trigger illegal instruction trap (#UD).
  4222  	switch p.As {
  4223  	case AVGATHERDPD,
  4224  		AVGATHERQPD,
  4225  		AVGATHERDPS,
  4226  		AVGATHERQPS,
  4227  		AVPGATHERDD,
  4228  		AVPGATHERQD,
  4229  		AVPGATHERDQ,
  4230  		AVPGATHERQQ:
  4231  		// AVX512 gather requires explicit K mask.
  4232  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4233  			if !avx512gatherValid(ctxt, p) {
  4234  				return
  4235  			}
  4236  		} else {
  4237  			if !avx2gatherValid(ctxt, p) {
  4238  				return
  4239  			}
  4240  		}
  4241  	}
  4242  
  4243  	if p.Ft == 0 {
  4244  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4245  	}
  4246  	if p.Tt == 0 {
  4247  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4248  	}
  4249  
  4250  	ft := int(p.Ft) * Ymax
  4251  	var f3t int
  4252  	tt := int(p.Tt) * Ymax
  4253  
  4254  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4255  	z := 0
  4256  	var a *obj.Addr
  4257  	var l int
  4258  	var op int
  4259  	var q *obj.Prog
  4260  	var r *obj.Reloc
  4261  	var rel obj.Reloc
  4262  	var v int64
  4263  
  4264  	args := make([]int, 0, argListMax)
  4265  	if ft != Ynone*Ymax {
  4266  		args = append(args, ft)
  4267  	}
  4268  	for i := range p.RestArgs {
  4269  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4270  	}
  4271  	if tt != Ynone*Ymax {
  4272  		args = append(args, tt)
  4273  	}
  4274  
  4275  	for _, yt := range o.ytab {
  4276  		// ytab matching is purely args-based,
  4277  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4278  		// add EVEX-only filter that will reject non-EVEX matches.
  4279  		//
  4280  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4281  		// Without this rule, operands will lead to VEX-encoded form
  4282  		// and produce "c5b15813" encoding.
  4283  		if !yt.match(args) {
  4284  			// "xo" is always zero for VEX/EVEX encoded insts.
  4285  			z += int(yt.zoffset) + xo
  4286  		} else {
  4287  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4288  				// Do not signal error and continue to search
  4289  				// for matching EVEX-encoded form.
  4290  				z += int(yt.zoffset)
  4291  				continue
  4292  			}
  4293  
  4294  			switch o.prefix {
  4295  			case Px1: // first option valid only in 32-bit mode
  4296  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4297  					z += int(yt.zoffset) + xo
  4298  					continue
  4299  				}
  4300  			case Pq: // 16 bit escape and opcode escape
  4301  				ab.Put2(Pe, Pm)
  4302  
  4303  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4304  				ab.rexflag |= Pw
  4305  				ab.Put2(Pe, Pm)
  4306  
  4307  			case Pq4: // 66 0F 38
  4308  				ab.Put3(0x66, 0x0F, 0x38)
  4309  
  4310  			case Pq4w: // 66 0F 38 + REX.W
  4311  				ab.rexflag |= Pw
  4312  				ab.Put3(0x66, 0x0F, 0x38)
  4313  
  4314  			case Pq5: // F3 0F 38
  4315  				ab.Put3(0xF3, 0x0F, 0x38)
  4316  
  4317  			case Pq5w: //  F3 0F 38 + REX.W
  4318  				ab.rexflag |= Pw
  4319  				ab.Put3(0xF3, 0x0F, 0x38)
  4320  
  4321  			case Pf2, // xmm opcode escape
  4322  				Pf3:
  4323  				ab.Put2(o.prefix, Pm)
  4324  
  4325  			case Pef3:
  4326  				ab.Put3(Pe, Pf3, Pm)
  4327  
  4328  			case Pfw: // xmm opcode escape + REX.W
  4329  				ab.rexflag |= Pw
  4330  				ab.Put2(Pf3, Pm)
  4331  
  4332  			case Pm: // opcode escape
  4333  				ab.Put1(Pm)
  4334  
  4335  			case Pe: // 16 bit escape
  4336  				ab.Put1(Pe)
  4337  
  4338  			case Pw: // 64-bit escape
  4339  				if ctxt.Arch.Family != sys.AMD64 {
  4340  					ctxt.Diag("asmins: illegal 64: %v", p)
  4341  				}
  4342  				ab.rexflag |= Pw
  4343  
  4344  			case Pw8: // 64-bit escape if z >= 8
  4345  				if z >= 8 {
  4346  					if ctxt.Arch.Family != sys.AMD64 {
  4347  						ctxt.Diag("asmins: illegal 64: %v", p)
  4348  					}
  4349  					ab.rexflag |= Pw
  4350  				}
  4351  
  4352  			case Pb: // botch
  4353  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4354  					goto bad
  4355  				}
  4356  				// NOTE(rsc): This is probably safe to do always,
  4357  				// but when enabled it chooses different encodings
  4358  				// than the old cmd/internal/obj/i386 code did,
  4359  				// which breaks our "same bits out" checks.
  4360  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4361  				// in the original obj/i386, and it would encode
  4362  				// (using a valid, shorter form) as 3c 00 if we enabled
  4363  				// the call to bytereg here.
  4364  				if ctxt.Arch.Family == sys.AMD64 {
  4365  					bytereg(&p.From, &p.Ft)
  4366  					bytereg(&p.To, &p.Tt)
  4367  				}
  4368  
  4369  			case P32: // 32 bit but illegal if 64-bit mode
  4370  				if ctxt.Arch.Family == sys.AMD64 {
  4371  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4372  				}
  4373  
  4374  			case Py: // 64-bit only, no prefix
  4375  				if ctxt.Arch.Family != sys.AMD64 {
  4376  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4377  				}
  4378  
  4379  			case Py1: // 64-bit only if z < 1, no prefix
  4380  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4381  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4382  				}
  4383  
  4384  			case Py3: // 64-bit only if z < 3, no prefix
  4385  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4386  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4387  				}
  4388  			}
  4389  
  4390  			if z >= len(o.op) {
  4391  				log.Fatalf("asmins bad table %v", p)
  4392  			}
  4393  			op = int(o.op[z])
  4394  			if op == 0x0f {
  4395  				ab.Put1(byte(op))
  4396  				z++
  4397  				op = int(o.op[z])
  4398  			}
  4399  
  4400  			switch yt.zcase {
  4401  			default:
  4402  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4403  				return
  4404  
  4405  			case Zpseudo:
  4406  				break
  4407  
  4408  			case Zlit:
  4409  				ab.PutOpBytesLit(z, &o.op)
  4410  
  4411  			case Zlitr_m:
  4412  				ab.PutOpBytesLit(z, &o.op)
  4413  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4414  
  4415  			case Zlitm_r:
  4416  				ab.PutOpBytesLit(z, &o.op)
  4417  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4418  
  4419  			case Zlit_m_r:
  4420  				ab.PutOpBytesLit(z, &o.op)
  4421  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4422  
  4423  			case Zmb_r:
  4424  				bytereg(&p.From, &p.Ft)
  4425  				fallthrough
  4426  
  4427  			case Zm_r:
  4428  				ab.Put1(byte(op))
  4429  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4430  
  4431  			case Z_m_r:
  4432  				ab.Put1(byte(op))
  4433  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4434  
  4435  			case Zm2_r:
  4436  				ab.Put2(byte(op), o.op[z+1])
  4437  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4438  
  4439  			case Zm_r_xm:
  4440  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4441  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4442  
  4443  			case Zm_r_xm_nr:
  4444  				ab.rexflag = 0
  4445  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4446  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4447  
  4448  			case Zm_r_i_xm:
  4449  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4450  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4451  				ab.Put1(byte(p.To.Offset))
  4452  
  4453  			case Zibm_r, Zibr_m:
  4454  				ab.PutOpBytesLit(z, &o.op)
  4455  				if yt.zcase == Zibr_m {
  4456  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4457  				} else {
  4458  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4459  				}
  4460  				switch {
  4461  				default:
  4462  					ab.Put1(byte(p.From.Offset))
  4463  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4464  					ab.PutInt16(int16(p.From.Offset))
  4465  				case yt.args[0] == Yi32:
  4466  					ab.PutInt32(int32(p.From.Offset))
  4467  				}
  4468  
  4469  			case Zaut_r:
  4470  				ab.Put1(0x8d) // leal
  4471  				if p.From.Type != obj.TYPE_ADDR {
  4472  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4473  				}
  4474  				p.From.Type = obj.TYPE_MEM
  4475  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4476  				p.From.Type = obj.TYPE_ADDR
  4477  
  4478  			case Zm_o:
  4479  				ab.Put1(byte(op))
  4480  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4481  
  4482  			case Zr_m:
  4483  				ab.Put1(byte(op))
  4484  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4485  
  4486  			case Zvex:
  4487  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4488  
  4489  			case Zvex_rm_v_r:
  4490  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4491  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4492  
  4493  			case Zvex_rm_v_ro:
  4494  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4495  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4496  
  4497  			case Zvex_i_rm_vo:
  4498  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4499  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4500  				ab.Put1(byte(p.From.Offset))
  4501  
  4502  			case Zvex_i_r_v:
  4503  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4504  				regnum := byte(0x7)
  4505  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4506  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4507  				} else {
  4508  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4509  				}
  4510  				ab.Put1(o.op[z+2] | regnum)
  4511  				ab.Put1(byte(p.From.Offset))
  4512  
  4513  			case Zvex_i_rm_v_r:
  4514  				imm, from, from3, to := unpackOps4(p)
  4515  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4516  				ab.asmand(ctxt, cursym, p, from, to)
  4517  				ab.Put1(byte(imm.Offset))
  4518  
  4519  			case Zvex_i_rm_r:
  4520  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4521  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4522  				ab.Put1(byte(p.From.Offset))
  4523  
  4524  			case Zvex_v_rm_r:
  4525  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4526  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4527  
  4528  			case Zvex_r_v_rm:
  4529  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4530  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4531  
  4532  			case Zvex_rm_r_vo:
  4533  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4534  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4535  
  4536  			case Zvex_i_r_rm:
  4537  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4538  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4539  				ab.Put1(byte(p.From.Offset))
  4540  
  4541  			case Zvex_hr_rm_v_r:
  4542  				hr, from, from3, to := unpackOps4(p)
  4543  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4544  				ab.asmand(ctxt, cursym, p, from, to)
  4545  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4546  
  4547  			case Zevex_k_rmo:
  4548  				ab.evex = newEVEXBits(z, &o.op)
  4549  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4550  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4551  
  4552  			case Zevex_i_rm_vo:
  4553  				ab.evex = newEVEXBits(z, &o.op)
  4554  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4555  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4556  				ab.Put1(byte(p.From.Offset))
  4557  
  4558  			case Zevex_i_rm_k_vo:
  4559  				imm, from, kmask, to := unpackOps4(p)
  4560  				ab.evex = newEVEXBits(z, &o.op)
  4561  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4562  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4563  				ab.Put1(byte(imm.Offset))
  4564  
  4565  			case Zevex_i_r_rm:
  4566  				ab.evex = newEVEXBits(z, &o.op)
  4567  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4568  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4569  				ab.Put1(byte(p.From.Offset))
  4570  
  4571  			case Zevex_i_r_k_rm:
  4572  				imm, from, kmask, to := unpackOps4(p)
  4573  				ab.evex = newEVEXBits(z, &o.op)
  4574  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4575  				ab.asmand(ctxt, cursym, p, to, from)
  4576  				ab.Put1(byte(imm.Offset))
  4577  
  4578  			case Zevex_i_rm_r:
  4579  				ab.evex = newEVEXBits(z, &o.op)
  4580  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4581  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4582  				ab.Put1(byte(p.From.Offset))
  4583  
  4584  			case Zevex_i_rm_k_r:
  4585  				imm, from, kmask, to := unpackOps4(p)
  4586  				ab.evex = newEVEXBits(z, &o.op)
  4587  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4588  				ab.asmand(ctxt, cursym, p, from, to)
  4589  				ab.Put1(byte(imm.Offset))
  4590  
  4591  			case Zevex_i_rm_v_r:
  4592  				imm, from, from3, to := unpackOps4(p)
  4593  				ab.evex = newEVEXBits(z, &o.op)
  4594  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4595  				ab.asmand(ctxt, cursym, p, from, to)
  4596  				ab.Put1(byte(imm.Offset))
  4597  
  4598  			case Zevex_i_rm_v_k_r:
  4599  				imm, from, from3, kmask, to := unpackOps5(p)
  4600  				ab.evex = newEVEXBits(z, &o.op)
  4601  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4602  				ab.asmand(ctxt, cursym, p, from, to)
  4603  				ab.Put1(byte(imm.Offset))
  4604  
  4605  			case Zevex_r_v_rm:
  4606  				ab.evex = newEVEXBits(z, &o.op)
  4607  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4608  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4609  
  4610  			case Zevex_rm_v_r:
  4611  				ab.evex = newEVEXBits(z, &o.op)
  4612  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4613  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4614  
  4615  			case Zevex_rm_k_r:
  4616  				ab.evex = newEVEXBits(z, &o.op)
  4617  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4618  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4619  
  4620  			case Zevex_r_k_rm:
  4621  				ab.evex = newEVEXBits(z, &o.op)
  4622  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4623  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4624  
  4625  			case Zevex_rm_v_k_r:
  4626  				from, from3, kmask, to := unpackOps4(p)
  4627  				ab.evex = newEVEXBits(z, &o.op)
  4628  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4629  				ab.asmand(ctxt, cursym, p, from, to)
  4630  
  4631  			case Zevex_r_v_k_rm:
  4632  				from, from3, kmask, to := unpackOps4(p)
  4633  				ab.evex = newEVEXBits(z, &o.op)
  4634  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4635  				ab.asmand(ctxt, cursym, p, to, from)
  4636  
  4637  			case Zr_m_xm:
  4638  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4639  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4640  
  4641  			case Zr_m_xm_nr:
  4642  				ab.rexflag = 0
  4643  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4644  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4645  
  4646  			case Zo_m:
  4647  				ab.Put1(byte(op))
  4648  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4649  
  4650  			case Zcallindreg:
  4651  				r = obj.Addrel(cursym)
  4652  				r.Off = int32(p.Pc)
  4653  				r.Type = objabi.R_CALLIND
  4654  				r.Siz = 0
  4655  				fallthrough
  4656  
  4657  			case Zo_m64:
  4658  				ab.Put1(byte(op))
  4659  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4660  
  4661  			case Zm_ibo:
  4662  				ab.Put1(byte(op))
  4663  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4664  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4665  
  4666  			case Zibo_m:
  4667  				ab.Put1(byte(op))
  4668  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4669  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4670  
  4671  			case Zibo_m_xm:
  4672  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4673  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4674  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4675  
  4676  			case Z_ib, Zib_:
  4677  				if yt.zcase == Zib_ {
  4678  					a = &p.From
  4679  				} else {
  4680  					a = &p.To
  4681  				}
  4682  				ab.Put1(byte(op))
  4683  				if p.As == AXABORT {
  4684  					ab.Put1(o.op[z+1])
  4685  				}
  4686  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4687  
  4688  			case Zib_rp:
  4689  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4690  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4691  
  4692  			case Zil_rp:
  4693  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4694  				ab.Put1(byte(op + reg[p.To.Reg]))
  4695  				if o.prefix == Pe {
  4696  					v = vaddr(ctxt, p, &p.From, nil)
  4697  					ab.PutInt16(int16(v))
  4698  				} else {
  4699  					ab.relput4(ctxt, cursym, p, &p.From)
  4700  				}
  4701  
  4702  			case Zo_iw:
  4703  				ab.Put1(byte(op))
  4704  				if p.From.Type != obj.TYPE_NONE {
  4705  					v = vaddr(ctxt, p, &p.From, nil)
  4706  					ab.PutInt16(int16(v))
  4707  				}
  4708  
  4709  			case Ziq_rp:
  4710  				v = vaddr(ctxt, p, &p.From, &rel)
  4711  				l = int(v >> 32)
  4712  				if l == 0 && rel.Siz != 8 {
  4713  					ab.rexflag &^= (0x40 | Rxw)
  4714  
  4715  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4716  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4717  					if rel.Type != 0 {
  4718  						r = obj.Addrel(cursym)
  4719  						*r = rel
  4720  						r.Off = int32(p.Pc + int64(ab.Len()))
  4721  					}
  4722  
  4723  					ab.PutInt32(int32(v))
  4724  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4725  					ab.Put1(0xc7)
  4726  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4727  
  4728  					ab.PutInt32(int32(v)) // need all 8
  4729  				} else {
  4730  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4731  					ab.Put1(byte(op + reg[p.To.Reg]))
  4732  					if rel.Type != 0 {
  4733  						r = obj.Addrel(cursym)
  4734  						*r = rel
  4735  						r.Off = int32(p.Pc + int64(ab.Len()))
  4736  					}
  4737  
  4738  					ab.PutInt64(v)
  4739  				}
  4740  
  4741  			case Zib_rr:
  4742  				ab.Put1(byte(op))
  4743  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4744  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4745  
  4746  			case Z_il, Zil_:
  4747  				if yt.zcase == Zil_ {
  4748  					a = &p.From
  4749  				} else {
  4750  					a = &p.To
  4751  				}
  4752  				ab.Put1(byte(op))
  4753  				if o.prefix == Pe {
  4754  					v = vaddr(ctxt, p, a, nil)
  4755  					ab.PutInt16(int16(v))
  4756  				} else {
  4757  					ab.relput4(ctxt, cursym, p, a)
  4758  				}
  4759  
  4760  			case Zm_ilo, Zilo_m:
  4761  				ab.Put1(byte(op))
  4762  				if yt.zcase == Zilo_m {
  4763  					a = &p.From
  4764  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4765  				} else {
  4766  					a = &p.To
  4767  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4768  				}
  4769  
  4770  				if o.prefix == Pe {
  4771  					v = vaddr(ctxt, p, a, nil)
  4772  					ab.PutInt16(int16(v))
  4773  				} else {
  4774  					ab.relput4(ctxt, cursym, p, a)
  4775  				}
  4776  
  4777  			case Zil_rr:
  4778  				ab.Put1(byte(op))
  4779  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4780  				if o.prefix == Pe {
  4781  					v = vaddr(ctxt, p, &p.From, nil)
  4782  					ab.PutInt16(int16(v))
  4783  				} else {
  4784  					ab.relput4(ctxt, cursym, p, &p.From)
  4785  				}
  4786  
  4787  			case Z_rp:
  4788  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4789  				ab.Put1(byte(op + reg[p.To.Reg]))
  4790  
  4791  			case Zrp_:
  4792  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4793  				ab.Put1(byte(op + reg[p.From.Reg]))
  4794  
  4795  			case Zcallcon, Zjmpcon:
  4796  				if yt.zcase == Zcallcon {
  4797  					ab.Put1(byte(op))
  4798  				} else {
  4799  					ab.Put1(o.op[z+1])
  4800  				}
  4801  				r = obj.Addrel(cursym)
  4802  				r.Off = int32(p.Pc + int64(ab.Len()))
  4803  				r.Type = objabi.R_PCREL
  4804  				r.Siz = 4
  4805  				r.Add = p.To.Offset
  4806  				ab.PutInt32(0)
  4807  
  4808  			case Zcallind:
  4809  				ab.Put2(byte(op), o.op[z+1])
  4810  				r = obj.Addrel(cursym)
  4811  				r.Off = int32(p.Pc + int64(ab.Len()))
  4812  				if ctxt.Arch.Family == sys.AMD64 {
  4813  					r.Type = objabi.R_PCREL
  4814  				} else {
  4815  					r.Type = objabi.R_ADDR
  4816  				}
  4817  				r.Siz = 4
  4818  				r.Add = p.To.Offset
  4819  				r.Sym = p.To.Sym
  4820  				ab.PutInt32(0)
  4821  
  4822  			case Zcall, Zcallduff:
  4823  				if p.To.Sym == nil {
  4824  					ctxt.Diag("call without target")
  4825  					ctxt.DiagFlush()
  4826  					log.Fatalf("bad code")
  4827  				}
  4828  
  4829  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4830  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4831  				}
  4832  
  4833  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4834  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4835  					// (the call jumps into the middle of the function).
  4836  					// This makes it possible to see call sites for duffcopy/duffzero in
  4837  					// BP-based profiling tools like Linux perf (which is the
  4838  					// whole point of maintaining frame pointers in Go).
  4839  					// MOVQ BP, -16(SP)
  4840  					// LEAQ -16(SP), BP
  4841  					ab.Put(bpduff1)
  4842  				}
  4843  				ab.Put1(byte(op))
  4844  				r = obj.Addrel(cursym)
  4845  				r.Off = int32(p.Pc + int64(ab.Len()))
  4846  				r.Sym = p.To.Sym
  4847  				r.Add = p.To.Offset
  4848  				r.Type = objabi.R_CALL
  4849  				r.Siz = 4
  4850  				ab.PutInt32(0)
  4851  
  4852  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4853  					// Pop BP pushed above.
  4854  					// MOVQ 0(BP), BP
  4855  					ab.Put(bpduff2)
  4856  				}
  4857  
  4858  			// TODO: jump across functions needs reloc
  4859  			case Zbr, Zjmp, Zloop:
  4860  				if p.As == AXBEGIN {
  4861  					ab.Put1(byte(op))
  4862  				}
  4863  				if p.To.Sym != nil {
  4864  					if yt.zcase != Zjmp {
  4865  						ctxt.Diag("branch to ATEXT")
  4866  						ctxt.DiagFlush()
  4867  						log.Fatalf("bad code")
  4868  					}
  4869  
  4870  					ab.Put1(o.op[z+1])
  4871  					r = obj.Addrel(cursym)
  4872  					r.Off = int32(p.Pc + int64(ab.Len()))
  4873  					r.Sym = p.To.Sym
  4874  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4875  					// it can point to a trampoline instead of the destination itself.
  4876  					r.Type = objabi.R_CALL
  4877  					r.Siz = 4
  4878  					ab.PutInt32(0)
  4879  					break
  4880  				}
  4881  
  4882  				// Assumes q is in this function.
  4883  				// TODO: Check in input, preserve in brchain.
  4884  
  4885  				// Fill in backward jump now.
  4886  				q = p.To.Target()
  4887  
  4888  				if q == nil {
  4889  					ctxt.Diag("jmp/branch/loop without target")
  4890  					ctxt.DiagFlush()
  4891  					log.Fatalf("bad code")
  4892  				}
  4893  
  4894  				if p.Back&branchBackwards != 0 {
  4895  					v = q.Pc - (p.Pc + 2)
  4896  					if v >= -128 && p.As != AXBEGIN {
  4897  						if p.As == AJCXZL {
  4898  							ab.Put1(0x67)
  4899  						}
  4900  						ab.Put2(byte(op), byte(v))
  4901  					} else if yt.zcase == Zloop {
  4902  						ctxt.Diag("loop too far: %v", p)
  4903  					} else {
  4904  						v -= 5 - 2
  4905  						if p.As == AXBEGIN {
  4906  							v--
  4907  						}
  4908  						if yt.zcase == Zbr {
  4909  							ab.Put1(0x0f)
  4910  							v--
  4911  						}
  4912  
  4913  						ab.Put1(o.op[z+1])
  4914  						ab.PutInt32(int32(v))
  4915  					}
  4916  
  4917  					break
  4918  				}
  4919  
  4920  				// Annotate target; will fill in later.
  4921  				p.Forwd = q.Rel
  4922  
  4923  				q.Rel = p
  4924  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4925  					if p.As == AJCXZL {
  4926  						ab.Put1(0x67)
  4927  					}
  4928  					ab.Put2(byte(op), 0)
  4929  				} else if yt.zcase == Zloop {
  4930  					ctxt.Diag("loop too far: %v", p)
  4931  				} else {
  4932  					if yt.zcase == Zbr {
  4933  						ab.Put1(0x0f)
  4934  					}
  4935  					ab.Put1(o.op[z+1])
  4936  					ab.PutInt32(0)
  4937  				}
  4938  
  4939  			case Zbyte:
  4940  				v = vaddr(ctxt, p, &p.From, &rel)
  4941  				if rel.Siz != 0 {
  4942  					rel.Siz = uint8(op)
  4943  					r = obj.Addrel(cursym)
  4944  					*r = rel
  4945  					r.Off = int32(p.Pc + int64(ab.Len()))
  4946  				}
  4947  
  4948  				ab.Put1(byte(v))
  4949  				if op > 1 {
  4950  					ab.Put1(byte(v >> 8))
  4951  					if op > 2 {
  4952  						ab.PutInt16(int16(v >> 16))
  4953  						if op > 4 {
  4954  							ab.PutInt32(int32(v >> 32))
  4955  						}
  4956  					}
  4957  				}
  4958  			}
  4959  
  4960  			return
  4961  		}
  4962  	}
  4963  	f3t = Ynone * Ymax
  4964  	if p.GetFrom3() != nil {
  4965  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4966  	}
  4967  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4968  		var pp obj.Prog
  4969  		var t []byte
  4970  		if p.As == mo[0].as {
  4971  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4972  				t = mo[0].op[:]
  4973  				switch mo[0].code {
  4974  				default:
  4975  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4976  
  4977  				case movLit:
  4978  					for z = 0; t[z] != 0; z++ {
  4979  						ab.Put1(t[z])
  4980  					}
  4981  
  4982  				case movRegMem:
  4983  					ab.Put1(t[0])
  4984  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4985  
  4986  				case movMemReg:
  4987  					ab.Put1(t[0])
  4988  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4989  
  4990  				case movRegMem2op: // r,m - 2op
  4991  					ab.Put2(t[0], t[1])
  4992  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4993  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4994  
  4995  				case movMemReg2op:
  4996  					ab.Put2(t[0], t[1])
  4997  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4998  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4999  
  5000  				case movFullPtr:
  5001  					if t[0] != 0 {
  5002  						ab.Put1(t[0])
  5003  					}
  5004  					switch p.To.Index {
  5005  					default:
  5006  						goto bad
  5007  
  5008  					case REG_DS:
  5009  						ab.Put1(0xc5)
  5010  
  5011  					case REG_SS:
  5012  						ab.Put2(0x0f, 0xb2)
  5013  
  5014  					case REG_ES:
  5015  						ab.Put1(0xc4)
  5016  
  5017  					case REG_FS:
  5018  						ab.Put2(0x0f, 0xb4)
  5019  
  5020  					case REG_GS:
  5021  						ab.Put2(0x0f, 0xb5)
  5022  					}
  5023  
  5024  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5025  
  5026  				case movDoubleShift:
  5027  					if t[0] == Pw {
  5028  						if ctxt.Arch.Family != sys.AMD64 {
  5029  							ctxt.Diag("asmins: illegal 64: %v", p)
  5030  						}
  5031  						ab.rexflag |= Pw
  5032  						t = t[1:]
  5033  					} else if t[0] == Pe {
  5034  						ab.Put1(Pe)
  5035  						t = t[1:]
  5036  					}
  5037  
  5038  					switch p.From.Type {
  5039  					default:
  5040  						goto bad
  5041  
  5042  					case obj.TYPE_CONST:
  5043  						ab.Put2(0x0f, t[0])
  5044  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5045  						ab.Put1(byte(p.From.Offset))
  5046  
  5047  					case obj.TYPE_REG:
  5048  						switch p.From.Reg {
  5049  						default:
  5050  							goto bad
  5051  
  5052  						case REG_CL, REG_CX:
  5053  							ab.Put2(0x0f, t[1])
  5054  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5055  						}
  5056  					}
  5057  
  5058  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5059  				// where you load the TLS base register into a register and then index off that
  5060  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5061  				// are handled in prefixof above and should not be listed here.
  5062  				case movTLSReg:
  5063  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5064  						ctxt.Diag("invalid load of TLS: %v", p)
  5065  					}
  5066  
  5067  					if ctxt.Arch.Family == sys.I386 {
  5068  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5069  						// where you load the TLS base register into a register and then index off that
  5070  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5071  						// are handled in prefixof above and should not be listed here.
  5072  						switch ctxt.Headtype {
  5073  						default:
  5074  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5075  
  5076  						case objabi.Hlinux, objabi.Hfreebsd:
  5077  							if ctxt.Flag_shared {
  5078  								// Note that this is not generating the same insns as the other cases.
  5079  								//     MOV TLS, dst
  5080  								// becomes
  5081  								//     call __x86.get_pc_thunk.dst
  5082  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5083  								// which is encoded as
  5084  								//     call __x86.get_pc_thunk.dst
  5085  								//     movq 0(dst), dst
  5086  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5087  								// is g, which we can't check here, but will when we assemble the second
  5088  								// instruction.
  5089  								dst := p.To.Reg
  5090  								ab.Put1(0xe8)
  5091  								r = obj.Addrel(cursym)
  5092  								r.Off = int32(p.Pc + int64(ab.Len()))
  5093  								r.Type = objabi.R_CALL
  5094  								r.Siz = 4
  5095  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5096  								ab.PutInt32(0)
  5097  
  5098  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5099  								r = obj.Addrel(cursym)
  5100  								r.Off = int32(p.Pc + int64(ab.Len()))
  5101  								r.Type = objabi.R_TLS_IE
  5102  								r.Siz = 4
  5103  								r.Add = 2
  5104  								ab.PutInt32(0)
  5105  							} else {
  5106  								// ELF TLS base is 0(GS).
  5107  								pp.From = p.From
  5108  
  5109  								pp.From.Type = obj.TYPE_MEM
  5110  								pp.From.Reg = REG_GS
  5111  								pp.From.Offset = 0
  5112  								pp.From.Index = REG_NONE
  5113  								pp.From.Scale = 0
  5114  								ab.Put2(0x65, // GS
  5115  									0x8B)
  5116  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5117  							}
  5118  						case objabi.Hplan9:
  5119  							pp.From = obj.Addr{}
  5120  							pp.From.Type = obj.TYPE_MEM
  5121  							pp.From.Name = obj.NAME_EXTERN
  5122  							pp.From.Sym = plan9privates
  5123  							pp.From.Offset = 0
  5124  							pp.From.Index = REG_NONE
  5125  							ab.Put1(0x8B)
  5126  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5127  
  5128  						case objabi.Hwindows:
  5129  							// Windows TLS base is always 0x14(FS).
  5130  							pp.From = p.From
  5131  
  5132  							pp.From.Type = obj.TYPE_MEM
  5133  							pp.From.Reg = REG_FS
  5134  							pp.From.Offset = 0x14
  5135  							pp.From.Index = REG_NONE
  5136  							pp.From.Scale = 0
  5137  							ab.Put2(0x64, // FS
  5138  								0x8B)
  5139  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5140  						}
  5141  						break
  5142  					}
  5143  
  5144  					switch ctxt.Headtype {
  5145  					default:
  5146  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5147  
  5148  					case objabi.Hlinux, objabi.Hfreebsd:
  5149  						if !ctxt.Flag_shared {
  5150  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5151  						}
  5152  						// Note that this is not generating the same insn as the other cases.
  5153  						//     MOV TLS, R_to
  5154  						// becomes
  5155  						//     movq g@gottpoff(%rip), R_to
  5156  						// which is encoded as
  5157  						//     movq 0(%rip), R_to
  5158  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5159  						// is g, which we can't check here, but will when we assemble the second
  5160  						// instruction.
  5161  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5162  
  5163  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5164  						r = obj.Addrel(cursym)
  5165  						r.Off = int32(p.Pc + int64(ab.Len()))
  5166  						r.Type = objabi.R_TLS_IE
  5167  						r.Siz = 4
  5168  						r.Add = -4
  5169  						ab.PutInt32(0)
  5170  
  5171  					case objabi.Hplan9:
  5172  						pp.From = obj.Addr{}
  5173  						pp.From.Type = obj.TYPE_MEM
  5174  						pp.From.Name = obj.NAME_EXTERN
  5175  						pp.From.Sym = plan9privates
  5176  						pp.From.Offset = 0
  5177  						pp.From.Index = REG_NONE
  5178  						ab.rexflag |= Pw
  5179  						ab.Put1(0x8B)
  5180  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5181  
  5182  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5183  						// TLS base is 0(FS).
  5184  						pp.From = p.From
  5185  
  5186  						pp.From.Type = obj.TYPE_MEM
  5187  						pp.From.Name = obj.NAME_NONE
  5188  						pp.From.Reg = REG_NONE
  5189  						pp.From.Offset = 0
  5190  						pp.From.Index = REG_NONE
  5191  						pp.From.Scale = 0
  5192  						ab.rexflag |= Pw
  5193  						ab.Put2(0x64, // FS
  5194  							0x8B)
  5195  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5196  
  5197  					case objabi.Hwindows:
  5198  						// Windows TLS base is always 0x28(GS).
  5199  						pp.From = p.From
  5200  
  5201  						pp.From.Type = obj.TYPE_MEM
  5202  						pp.From.Name = obj.NAME_NONE
  5203  						pp.From.Reg = REG_GS
  5204  						pp.From.Offset = 0x28
  5205  						pp.From.Index = REG_NONE
  5206  						pp.From.Scale = 0
  5207  						ab.rexflag |= Pw
  5208  						ab.Put2(0x65, // GS
  5209  							0x8B)
  5210  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5211  					}
  5212  				}
  5213  				return
  5214  			}
  5215  		}
  5216  	}
  5217  	goto bad
  5218  
  5219  bad:
  5220  	if ctxt.Arch.Family != sys.AMD64 {
  5221  		// here, the assembly has failed.
  5222  		// if it's a byte instruction that has
  5223  		// unaddressable registers, try to
  5224  		// exchange registers and reissue the
  5225  		// instruction with the operands renamed.
  5226  		pp := *p
  5227  
  5228  		unbytereg(&pp.From, &pp.Ft)
  5229  		unbytereg(&pp.To, &pp.Tt)
  5230  
  5231  		z := int(p.From.Reg)
  5232  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5233  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5234  			// For now, different to keep bit-for-bit compatibility.
  5235  			if ctxt.Arch.Family == sys.I386 {
  5236  				breg := byteswapreg(ctxt, &p.To)
  5237  				if breg != REG_AX {
  5238  					ab.Put1(0x87) // xchg lhs,bx
  5239  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5240  					subreg(&pp, z, breg)
  5241  					ab.doasm(ctxt, cursym, &pp)
  5242  					ab.Put1(0x87) // xchg lhs,bx
  5243  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5244  				} else {
  5245  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5246  					subreg(&pp, z, REG_AX)
  5247  					ab.doasm(ctxt, cursym, &pp)
  5248  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5249  				}
  5250  				return
  5251  			}
  5252  
  5253  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5254  				// We certainly don't want to exchange
  5255  				// with AX if the op is MUL or DIV.
  5256  				ab.Put1(0x87) // xchg lhs,bx
  5257  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5258  				subreg(&pp, z, REG_BX)
  5259  				ab.doasm(ctxt, cursym, &pp)
  5260  				ab.Put1(0x87) // xchg lhs,bx
  5261  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5262  			} else {
  5263  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5264  				subreg(&pp, z, REG_AX)
  5265  				ab.doasm(ctxt, cursym, &pp)
  5266  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5267  			}
  5268  			return
  5269  		}
  5270  
  5271  		z = int(p.To.Reg)
  5272  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5273  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5274  			// For now, different to keep bit-for-bit compatibility.
  5275  			if ctxt.Arch.Family == sys.I386 {
  5276  				breg := byteswapreg(ctxt, &p.From)
  5277  				if breg != REG_AX {
  5278  					ab.Put1(0x87) //xchg rhs,bx
  5279  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5280  					subreg(&pp, z, breg)
  5281  					ab.doasm(ctxt, cursym, &pp)
  5282  					ab.Put1(0x87) // xchg rhs,bx
  5283  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5284  				} else {
  5285  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5286  					subreg(&pp, z, REG_AX)
  5287  					ab.doasm(ctxt, cursym, &pp)
  5288  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5289  				}
  5290  				return
  5291  			}
  5292  
  5293  			if isax(&p.From) {
  5294  				ab.Put1(0x87) // xchg rhs,bx
  5295  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5296  				subreg(&pp, z, REG_BX)
  5297  				ab.doasm(ctxt, cursym, &pp)
  5298  				ab.Put1(0x87) // xchg rhs,bx
  5299  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5300  			} else {
  5301  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5302  				subreg(&pp, z, REG_AX)
  5303  				ab.doasm(ctxt, cursym, &pp)
  5304  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5305  			}
  5306  			return
  5307  		}
  5308  	}
  5309  
  5310  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5311  }
  5312  
  5313  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5314  // which is not referenced in a.
  5315  // If a is empty, it returns BX to account for MULB-like instructions
  5316  // that might use DX and AX.
  5317  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5318  	cana, canb, canc, cand := true, true, true, true
  5319  	if a.Type == obj.TYPE_NONE {
  5320  		cana, cand = false, false
  5321  	}
  5322  
  5323  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5324  		switch a.Reg {
  5325  		case REG_NONE:
  5326  			cana, cand = false, false
  5327  		case REG_AX, REG_AL, REG_AH:
  5328  			cana = false
  5329  		case REG_BX, REG_BL, REG_BH:
  5330  			canb = false
  5331  		case REG_CX, REG_CL, REG_CH:
  5332  			canc = false
  5333  		case REG_DX, REG_DL, REG_DH:
  5334  			cand = false
  5335  		}
  5336  	}
  5337  
  5338  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5339  		switch a.Index {
  5340  		case REG_AX:
  5341  			cana = false
  5342  		case REG_BX:
  5343  			canb = false
  5344  		case REG_CX:
  5345  			canc = false
  5346  		case REG_DX:
  5347  			cand = false
  5348  		}
  5349  	}
  5350  
  5351  	switch {
  5352  	case cana:
  5353  		return REG_AX
  5354  	case canb:
  5355  		return REG_BX
  5356  	case canc:
  5357  		return REG_CX
  5358  	case cand:
  5359  		return REG_DX
  5360  	default:
  5361  		ctxt.Diag("impossible byte register")
  5362  		ctxt.DiagFlush()
  5363  		log.Fatalf("bad code")
  5364  		return 0
  5365  	}
  5366  }
  5367  
  5368  func isbadbyte(a *obj.Addr) bool {
  5369  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5370  }
  5371  
  5372  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5373  	ab.Reset()
  5374  
  5375  	ab.rexflag = 0
  5376  	ab.vexflag = false
  5377  	ab.evexflag = false
  5378  	mark := ab.Len()
  5379  	ab.doasm(ctxt, cursym, p)
  5380  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5381  		// as befits the whole approach of the architecture,
  5382  		// the rex prefix must appear before the first opcode byte
  5383  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5384  		// before the 0f opcode escape!), or it might be ignored.
  5385  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5386  		if ctxt.Arch.Family != sys.AMD64 {
  5387  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5388  		}
  5389  		n := ab.Len()
  5390  		var np int
  5391  		for np = mark; np < n; np++ {
  5392  			c := ab.At(np)
  5393  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5394  				break
  5395  			}
  5396  		}
  5397  		ab.Insert(np, byte(0x40|ab.rexflag))
  5398  	}
  5399  
  5400  	n := ab.Len()
  5401  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5402  		r := &cursym.R[i]
  5403  		if int64(r.Off) < p.Pc {
  5404  			break
  5405  		}
  5406  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5407  			r.Off++
  5408  		}
  5409  		if r.Type == objabi.R_PCREL {
  5410  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5411  				// PC-relative addressing is relative to the end of the instruction,
  5412  				// but the relocations applied by the linker are relative to the end
  5413  				// of the relocation. Because immediate instruction
  5414  				// arguments can follow the PC-relative memory reference in the
  5415  				// instruction encoding, the two may not coincide. In this case,
  5416  				// adjust addend so that linker can keep relocating relative to the
  5417  				// end of the relocation.
  5418  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5419  			} else if ctxt.Arch.Family == sys.I386 {
  5420  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5421  				// assumes that the previous instruction loaded the PC of the end
  5422  				// of that instruction into CX, so the adjustment is relative to
  5423  				// that.
  5424  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5425  			}
  5426  		}
  5427  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5428  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5429  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5430  		}
  5431  
  5432  	}
  5433  }
  5434  
  5435  // unpackOps4 extracts 4 operands from p.
  5436  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5437  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5438  }
  5439  
  5440  // unpackOps5 extracts 5 operands from p.
  5441  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5442  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5443  }
  5444  

View as plain text