p256_ppc64le.go

Documentation: crypto/elliptic

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64le
     6  // +build ppc64le
     7  
     8  package elliptic
     9  
    10  import (
    11  	"crypto/subtle"
    12  	"encoding/binary"
    13  	"math/big"
    14  )
    15  
    16  // This was ported from the s390x implementation for ppc64le.
    17  // Some hints are included here for changes that should be
    18  // in the big endian ppc64 implementation, however more
    19  // investigation and testing is needed for the ppc64 big
    20  // endian version to work.
    21  type p256CurveFast struct {
    22  	*CurveParams
    23  }
    24  
    25  type p256Point struct {
    26  	x [32]byte
    27  	y [32]byte
    28  	z [32]byte
    29  }
    30  
    31  var (
    32  	p256        Curve
    33  	p256PreFast *[37][64]p256Point
    34  )
    35  
    36  func initP256Arch() {
    37  	p256 = p256CurveFast{p256Params}
    38  	initTable()
    39  	return
    40  }
    41  
    42  func (curve p256CurveFast) Params() *CurveParams {
    43  	return curve.CurveParams
    44  }
    45  
    46  // Functions implemented in p256_asm_ppc64le.s
    47  // Montgomery multiplication modulo P256
    48  //
    49  //go:noescape
    50  func p256MulAsm(res, in1, in2 []byte)
    51  
    52  // Montgomery square modulo P256
    53  //
    54  func p256Sqr(res, in []byte) {
    55  	p256MulAsm(res, in, in)
    56  }
    57  
    58  // Montgomery multiplication by 1
    59  //
    60  //go:noescape
    61  func p256FromMont(res, in []byte)
    62  
    63  // iff cond == 1  val <- -val
    64  //
    65  //go:noescape
    66  func p256NegCond(val *p256Point, cond int)
    67  
    68  // if cond == 0 res <- b; else res <- a
    69  //
    70  //go:noescape
    71  func p256MovCond(res, a, b *p256Point, cond int)
    72  
    73  // Constant time table access
    74  //
    75  //go:noescape
    76  func p256Select(point *p256Point, table []p256Point, idx int)
    77  
    78  //
    79  //go:noescape
    80  func p256SelectBase(point *p256Point, table []p256Point, idx int)
    81  
    82  // Point add with P2 being affine point
    83  // If sign == 1 -> P2 = -P2
    84  // If sel == 0 -> P3 = P1
    85  // if zero == 0 -> P3 = P2
    86  //
    87  //go:noescape
    88  func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int)
    89  
    90  // Point add
    91  //
    92  //go:noescape
    93  func p256PointAddAsm(res, in1, in2 *p256Point) int
    94  
    95  //
    96  //go:noescape
    97  func p256PointDoubleAsm(res, in *p256Point)
    98  
    99  // The result should be a slice in LE order, but the slice
   100  // from big.Bytes is in BE order.
   101  // TODO: For big endian implementation, do not reverse bytes.
   102  //
   103  func fromBig(big *big.Int) []byte {
   104  	// This could be done a lot more efficiently...
   105  	res := big.Bytes()
   106  	t := make([]byte, 32)
   107  	if len(res) < 32 {
   108  		copy(t[32-len(res):], res)
   109  	} else if len(res) == 32 {
   110  		copy(t, res)
   111  	} else {
   112  		copy(t, res[len(res)-32:])
   113  	}
   114  	p256ReverseBytes(t, t)
   115  	return t
   116  }
   117  
   118  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   119  // is equal or greater than the order of the group, it's reduced modulo that order.
   120  func p256GetMultiplier(in []byte) []byte {
   121  	n := new(big.Int).SetBytes(in)
   122  
   123  	if n.Cmp(p256Params.N) >= 0 {
   124  		n.Mod(n, p256Params.N)
   125  	}
   126  	return fromBig(n)
   127  }
   128  
   129  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   130  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   131  // R×R mod p. See comment in Inverse about how this is used.
   132  // TODO: For big endian implementation, the bytes in these slices should be in reverse order,
   133  // as found in the s390x implementation.
   134  var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00}
   135  
   136  // (This is one, in the Montgomery domain.)
   137  var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}
   138  
   139  func maybeReduceModP(in *big.Int) *big.Int {
   140  	if in.Cmp(p256Params.P) < 0 {
   141  		return in
   142  	}
   143  	return new(big.Int).Mod(in, p256Params.P)
   144  }
   145  
   146  // p256ReverseBytes copies the first 32 bytes from in to res in reverse order.
   147  func p256ReverseBytes(res, in []byte) {
   148  	// remove bounds check
   149  	in = in[:32]
   150  	res = res[:32]
   151  
   152  	// Load in reverse order
   153  	a := binary.BigEndian.Uint64(in[0:])
   154  	b := binary.BigEndian.Uint64(in[8:])
   155  	c := binary.BigEndian.Uint64(in[16:])
   156  	d := binary.BigEndian.Uint64(in[24:])
   157  
   158  	// Store in normal order
   159  	binary.LittleEndian.PutUint64(res[0:], d)
   160  	binary.LittleEndian.PutUint64(res[8:], c)
   161  	binary.LittleEndian.PutUint64(res[16:], b)
   162  	binary.LittleEndian.PutUint64(res[24:], a)
   163  }
   164  
   165  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   166  	var r1, r2 p256Point
   167  
   168  	scalarReduced := p256GetMultiplier(baseScalar)
   169  	r1IsInfinity := scalarIsZero(scalarReduced)
   170  	r1.p256BaseMult(scalarReduced)
   171  
   172  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   173  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   174  	copy(r2.z[:], one)
   175  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   176  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   177  
   178  	scalarReduced = p256GetMultiplier(scalar)
   179  	r2IsInfinity := scalarIsZero(scalarReduced)
   180  	r2.p256ScalarMult(scalarReduced)
   181  
   182  	var sum, double p256Point
   183  	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
   184  	p256PointDoubleAsm(&double, &r1)
   185  	p256MovCond(&sum, &double, &sum, pointsEqual)
   186  	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
   187  	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
   188  	return sum.p256PointToAffine()
   189  }
   190  
   191  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   192  	var r p256Point
   193  	reducedScalar := p256GetMultiplier(scalar)
   194  	r.p256BaseMult(reducedScalar)
   195  	return r.p256PointToAffine()
   196  }
   197  
   198  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   199  	scalarReduced := p256GetMultiplier(scalar)
   200  	var r p256Point
   201  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   202  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   203  	copy(r.z[:], one)
   204  	p256MulAsm(r.x[:], r.x[:], rr[:])
   205  	p256MulAsm(r.y[:], r.y[:], rr[:])
   206  	r.p256ScalarMult(scalarReduced)
   207  	return r.p256PointToAffine()
   208  }
   209  
   210  func scalarIsZero(scalar []byte) int {
   211  	// If any byte is not zero, return 0.
   212  	// Check for -0.... since that appears to compare to 0.
   213  	b := byte(0)
   214  	for _, s := range scalar {
   215  		b |= s
   216  	}
   217  	return subtle.ConstantTimeByteEq(b, 0)
   218  }
   219  
   220  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   221  	zInv := make([]byte, 32)
   222  	zInvSq := make([]byte, 32)
   223  
   224  	p256Inverse(zInv, p.z[:])
   225  	p256Sqr(zInvSq, zInv)
   226  	p256MulAsm(zInv, zInv, zInvSq)
   227  
   228  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   229  	p256MulAsm(zInv, p.y[:], zInv)
   230  
   231  	p256FromMont(zInvSq, zInvSq)
   232  	p256FromMont(zInv, zInv)
   233  
   234  	// SetBytes expects a slice in big endian order,
   235  	// since ppc64le is little endian, reverse the bytes.
   236  	// TODO: For big endian, bytes don't need to be reversed.
   237  	p256ReverseBytes(zInvSq, zInvSq)
   238  	p256ReverseBytes(zInv, zInv)
   239  	rx := new(big.Int).SetBytes(zInvSq)
   240  	ry := new(big.Int).SetBytes(zInv)
   241  	return rx, ry
   242  }
   243  
   244  // p256Inverse sets out to in^-1 mod p.
   245  func p256Inverse(out, in []byte) {
   246  	var stack [6 * 32]byte
   247  	p2 := stack[32*0 : 32*0+32]
   248  	p4 := stack[32*1 : 32*1+32]
   249  	p8 := stack[32*2 : 32*2+32]
   250  	p16 := stack[32*3 : 32*3+32]
   251  	p32 := stack[32*4 : 32*4+32]
   252  
   253  	p256Sqr(out, in)
   254  	p256MulAsm(p2, out, in) // 3*p
   255  
   256  	p256Sqr(out, p2)
   257  	p256Sqr(out, out)
   258  	p256MulAsm(p4, out, p2) // f*p
   259  
   260  	p256Sqr(out, p4)
   261  	p256Sqr(out, out)
   262  	p256Sqr(out, out)
   263  	p256Sqr(out, out)
   264  	p256MulAsm(p8, out, p4) // ff*p
   265  
   266  	p256Sqr(out, p8)
   267  
   268  	for i := 0; i < 7; i++ {
   269  		p256Sqr(out, out)
   270  	}
   271  	p256MulAsm(p16, out, p8) // ffff*p
   272  
   273  	p256Sqr(out, p16)
   274  	for i := 0; i < 15; i++ {
   275  		p256Sqr(out, out)
   276  	}
   277  	p256MulAsm(p32, out, p16) // ffffffff*p
   278  
   279  	p256Sqr(out, p32)
   280  
   281  	for i := 0; i < 31; i++ {
   282  		p256Sqr(out, out)
   283  	}
   284  	p256MulAsm(out, out, in)
   285  
   286  	for i := 0; i < 32*4; i++ {
   287  		p256Sqr(out, out)
   288  	}
   289  	p256MulAsm(out, out, p32)
   290  
   291  	for i := 0; i < 32; i++ {
   292  		p256Sqr(out, out)
   293  	}
   294  	p256MulAsm(out, out, p32)
   295  
   296  	for i := 0; i < 16; i++ {
   297  		p256Sqr(out, out)
   298  	}
   299  	p256MulAsm(out, out, p16)
   300  
   301  	for i := 0; i < 8; i++ {
   302  		p256Sqr(out, out)
   303  	}
   304  	p256MulAsm(out, out, p8)
   305  
   306  	p256Sqr(out, out)
   307  	p256Sqr(out, out)
   308  	p256Sqr(out, out)
   309  	p256Sqr(out, out)
   310  	p256MulAsm(out, out, p4)
   311  
   312  	p256Sqr(out, out)
   313  	p256Sqr(out, out)
   314  	p256MulAsm(out, out, p2)
   315  
   316  	p256Sqr(out, out)
   317  	p256Sqr(out, out)
   318  	p256MulAsm(out, out, in)
   319  }
   320  
   321  func boothW5(in uint) (int, int) {
   322  	var s uint = ^((in >> 5) - 1)
   323  	var d uint = (1 << 6) - in - 1
   324  	d = (d & s) | (in & (^s))
   325  	d = (d >> 1) + (d & 1)
   326  	return int(d), int(s & 1)
   327  }
   328  
   329  func boothW6(in uint) (int, int) {
   330  	var s uint = ^((in >> 6) - 1)
   331  	var d uint = (1 << 7) - in - 1
   332  	d = (d & s) | (in & (^s))
   333  	d = (d >> 1) + (d & 1)
   334  	return int(d), int(s & 1)
   335  }
   336  
   337  func boothW7(in uint) (int, int) {
   338  	var s uint = ^((in >> 7) - 1)
   339  	var d uint = (1 << 8) - in - 1
   340  	d = (d & s) | (in & (^s))
   341  	d = (d >> 1) + (d & 1)
   342  	return int(d), int(s & 1)
   343  }
   344  
   345  func initTable() {
   346  
   347  	p256PreFast = new([37][64]p256Point)
   348  
   349  	// TODO: For big endian, these slices should be in reverse byte order,
   350  	// as found in the s390x implementation.
   351  	basePoint := p256Point{
   352  		x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75,
   353  			0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p
   354  		y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b,
   355  			0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p
   356  		z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
   357  			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p
   358  
   359  	}
   360  
   361  	t1 := new(p256Point)
   362  	t2 := new(p256Point)
   363  	*t2 = basePoint
   364  
   365  	zInv := make([]byte, 32)
   366  	zInvSq := make([]byte, 32)
   367  	for j := 0; j < 64; j++ {
   368  		*t1 = *t2
   369  		for i := 0; i < 37; i++ {
   370  			// The window size is 7 so we need to double 7 times.
   371  			if i != 0 {
   372  				for k := 0; k < 7; k++ {
   373  					p256PointDoubleAsm(t1, t1)
   374  				}
   375  			}
   376  			// Convert the point to affine form. (Its values are
   377  			// still in Montgomery form however.)
   378  			p256Inverse(zInv, t1.z[:])
   379  			p256Sqr(zInvSq, zInv)
   380  			p256MulAsm(zInv, zInv, zInvSq)
   381  
   382  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   383  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   384  
   385  			copy(t1.z[:], basePoint.z[:])
   386  			// Update the table entry
   387  			copy(p256PreFast[i][j].x[:], t1.x[:])
   388  			copy(p256PreFast[i][j].y[:], t1.y[:])
   389  		}
   390  		if j == 0 {
   391  			p256PointDoubleAsm(t2, &basePoint)
   392  		} else {
   393  			p256PointAddAsm(t2, t2, &basePoint)
   394  		}
   395  	}
   396  }
   397  
   398  func (p *p256Point) p256BaseMult(scalar []byte) {
   399  	// TODO: For big endian, the index should be 31 not 0.
   400  	wvalue := (uint(scalar[0]) << 1) & 0xff
   401  	sel, sign := boothW7(uint(wvalue))
   402  	p256SelectBase(p, p256PreFast[0][:], sel)
   403  	p256NegCond(p, sign)
   404  
   405  	copy(p.z[:], one[:])
   406  	var t0 p256Point
   407  
   408  	copy(t0.z[:], one[:])
   409  
   410  	index := uint(6)
   411  	zero := sel
   412  	for i := 1; i < 37; i++ {
   413  		// TODO: For big endian, use the same index values as found
   414  		// in the  s390x implementation.
   415  		if index < 247 {
   416  			wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff
   417  		} else {
   418  			wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff
   419  		}
   420  		index += 7
   421  		sel, sign = boothW7(uint(wvalue))
   422  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   423  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   424  		zero |= sel
   425  	}
   426  }
   427  
   428  func (p *p256Point) p256ScalarMult(scalar []byte) {
   429  	// precomp is a table of precomputed points that stores powers of p
   430  	// from p^1 to p^16.
   431  	var precomp [16]p256Point
   432  	var t0, t1, t2, t3 p256Point
   433  
   434  	*&precomp[0] = *p
   435  	p256PointDoubleAsm(&t0, p)
   436  	p256PointDoubleAsm(&t1, &t0)
   437  	p256PointDoubleAsm(&t2, &t1)
   438  	p256PointDoubleAsm(&t3, &t2)
   439  	*&precomp[1] = t0
   440  	*&precomp[3] = t1
   441  	*&precomp[7] = t2
   442  	*&precomp[15] = t3
   443  
   444  	p256PointAddAsm(&t0, &t0, p)
   445  	p256PointAddAsm(&t1, &t1, p)
   446  	p256PointAddAsm(&t2, &t2, p)
   447  
   448  	*&precomp[2] = t0
   449  	*&precomp[4] = t1
   450  	*&precomp[8] = t2
   451  
   452  	p256PointDoubleAsm(&t0, &t0)
   453  	p256PointDoubleAsm(&t1, &t1)
   454  	*&precomp[5] = t0
   455  	*&precomp[9] = t1
   456  
   457  	p256PointAddAsm(&t2, &t0, p)
   458  	p256PointAddAsm(&t1, &t1, p)
   459  	*&precomp[6] = t2
   460  	*&precomp[10] = t1
   461  
   462  	p256PointDoubleAsm(&t0, &t0)
   463  	p256PointDoubleAsm(&t2, &t2)
   464  	*&precomp[11] = t0
   465  	*&precomp[13] = t2
   466  
   467  	p256PointAddAsm(&t0, &t0, p)
   468  	p256PointAddAsm(&t2, &t2, p)
   469  	*&precomp[12] = t0
   470  	*&precomp[14] = t2
   471  
   472  	// Start scanning the window from top bit
   473  	index := uint(254)
   474  	var sel, sign int
   475  
   476  	// TODO: For big endian, use index found in s390x implementation.
   477  	wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f
   478  	sel, _ = boothW5(uint(wvalue))
   479  	p256Select(p, precomp[:], sel)
   480  	zero := sel
   481  
   482  	for index > 4 {
   483  		index -= 5
   484  		p256PointDoubleAsm(p, p)
   485  		p256PointDoubleAsm(p, p)
   486  		p256PointDoubleAsm(p, p)
   487  		p256PointDoubleAsm(p, p)
   488  		p256PointDoubleAsm(p, p)
   489  
   490  		// TODO: For big endian, use index values as found in s390x implementation.
   491  		if index < 247 {
   492  			wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f
   493  		} else {
   494  			wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f
   495  		}
   496  
   497  		sel, sign = boothW5(uint(wvalue))
   498  
   499  		p256Select(&t0, precomp[:], sel)
   500  		p256NegCond(&t0, sign)
   501  		p256PointAddAsm(&t1, p, &t0)
   502  		p256MovCond(&t1, &t1, p, sel)
   503  		p256MovCond(p, &t1, &t0, zero)
   504  		zero |= sel
   505  	}
   506  
   507  	p256PointDoubleAsm(p, p)
   508  	p256PointDoubleAsm(p, p)
   509  	p256PointDoubleAsm(p, p)
   510  	p256PointDoubleAsm(p, p)
   511  	p256PointDoubleAsm(p, p)
   512  
   513  	// TODO: Use index for big endian as found in s390x implementation.
   514  	wvalue = (uint(scalar[0]) << 1) & 0x3f
   515  	sel, sign = boothW5(uint(wvalue))
   516  
   517  	p256Select(&t0, precomp[:], sel)
   518  	p256NegCond(&t0, sign)
   519  	p256PointAddAsm(&t1, p, &t0)
   520  	p256MovCond(&t1, &t1, p, sel)
   521  	p256MovCond(p, &t1, &t0, zero)
   522  }
   523
View as plain text