Black Lives Matter. Support the Equal Justice Initiative.

Text file src/runtime/memmove_ppc64x.s

Documentation: runtime

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  // +build ppc64 ppc64le
     7  
     8  #include "textflag.h"
     9  
    10  // See memmove Go doc for important implementation constraints.
    11  
    12  // func memmove(to, from unsafe.Pointer, n uintptr)
    13  
    14  // target address
    15  #define TGT R3
    16  // source address
    17  #define SRC R4
    18  // length to move
    19  #define LEN R5
    20  // number of doublewords
    21  #define DWORDS R6
    22  // number of bytes < 8
    23  #define BYTES R7
    24  // const 16 used as index
    25  #define IDX16 R8
    26  // temp used for copies, etc.
    27  #define TMP R9
    28  // number of 32 byte chunks
    29  #define QWORDS R10
    30  
    31  TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
    32  	MOVD	to+0(FP), TGT
    33  	MOVD	from+8(FP), SRC
    34  	MOVD	n+16(FP), LEN
    35  
    36  	// Determine if there are doublewords to
    37  	// copy so a more efficient move can be done
    38  check:
    39  	ANDCC	$7, LEN, BYTES	// R7: bytes to copy
    40  	SRD	$3, LEN, DWORDS	// R6: double words to copy
    41  	MOVFL	CR0, CR3	// save CR from ANDCC
    42  	CMP	DWORDS, $0, CR1	// CR1[EQ] set if no double words to copy
    43  
    44  	// Determine overlap by subtracting dest - src and comparing against the
    45  	// length.  This catches the cases where src and dest are in different types
    46  	// of storage such as stack and static to avoid doing backward move when not
    47  	// necessary.
    48  
    49  	SUB	SRC, TGT, TMP	// dest - src
    50  	CMPU	TMP, LEN, CR2	// < len?
    51  	BC	12, 8, backward // BLT CR2 backward
    52  
    53  	// Copying forward if no overlap.
    54  
    55  	BC	12, 6, checkbytes	// BEQ CR1, checkbytes
    56  	SRDCC	$2, DWORDS, QWORDS	// 32 byte chunks?
    57  	BEQ	lt32gt8			// < 32 bytes
    58  
    59  	// Prepare for moves of 32 bytes at a time.
    60  
    61  forward32setup:
    62  	DCBTST	(TGT)			// prepare data cache
    63  	DCBT	(SRC)
    64  	MOVD	QWORDS, CTR		// Number of 32 byte chunks
    65  	MOVD	$16, IDX16		// 16 for index
    66  
    67  forward32:
    68  	LXVD2X	(R0)(SRC), VS32		// load 16 bytes
    69  	LXVD2X	(IDX16)(SRC), VS33	// load 16 bytes
    70  	ADD	$32, SRC
    71  	STXVD2X	VS32, (R0)(TGT)		// store 16 bytes
    72  	STXVD2X	VS33, (IDX16)(TGT)
    73  	ADD	$32,TGT			// bump up for next set
    74  	BC	16, 0, forward32	// continue
    75  	ANDCC	$3, DWORDS		// remaining doublewords
    76  	BEQ	checkbytes		// only bytes remain
    77  
    78  lt32gt8:
    79          // At this point >= 8 and < 32
    80  	// Move 16 bytes if possible
    81  	CMP     DWORDS, $2
    82  	BLT     lt16
    83  	LXVD2X	(R0)(SRC), VS32
    84  	ADD	$-2, DWORDS
    85  	STXVD2X	VS32, (R0)(TGT)
    86  	ADD     $16, SRC
    87  	ADD     $16, TGT
    88  
    89  lt16:	// Move 8 bytes if possible
    90  	CMP     DWORDS, $1
    91  	BLT     checkbytes
    92  	MOVD    0(SRC), TMP
    93  	ADD	$8, SRC
    94  	MOVD    TMP, 0(TGT)
    95  	ADD     $8, TGT
    96  checkbytes:
    97  	BC	12, 14, LR		// BEQ lr
    98  lt8:	// Move word if possible
    99  	CMP BYTES, $4
   100  	BLT lt4
   101  	MOVWZ 0(SRC), TMP
   102  	ADD $-4, BYTES
   103  	MOVW TMP, 0(TGT)
   104  	ADD $4, SRC
   105  	ADD $4, TGT
   106  lt4:	// Move halfword if possible
   107  	CMP BYTES, $2
   108  	BLT lt2
   109  	MOVHZ 0(SRC), TMP
   110  	ADD $-2, BYTES
   111  	MOVH TMP, 0(TGT)
   112  	ADD $2, SRC
   113  	ADD $2, TGT
   114  lt2:	// Move last byte if 1 left
   115  	CMP BYTES, $1
   116  	BC 12, 0, LR	// ble lr
   117  	MOVBZ 0(SRC), TMP
   118  	MOVBZ TMP, 0(TGT)
   119  	RET
   120  
   121  backward:
   122  	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
   123  	// R3 and R4 are advanced to the end of the destination/source buffers
   124  	// respectively and moved back as we copy.
   125  
   126  	ADD	LEN, SRC, SRC		// end of source
   127  	ADD	TGT, LEN, TGT		// end of dest
   128  
   129  	BEQ	nobackwardtail		// earlier condition
   130  
   131  	MOVD	BYTES, CTR			// bytes to move
   132  
   133  backwardtailloop:
   134  	MOVBZ 	-1(SRC), TMP		// point to last byte
   135  	SUB	$1,SRC
   136  	MOVBZ 	TMP, -1(TGT)
   137  	SUB	$1,TGT
   138  	BC	16, 0, backwardtailloop // bndz
   139  
   140  nobackwardtail:
   141  	BC	4, 5, LR		// ble CR1 lr
   142  
   143  backwardlarge:
   144  	MOVD	DWORDS, CTR
   145  	SUB	TGT, SRC, TMP		// Use vsx if moving
   146  	CMP	TMP, $32		// at least 32 byte chunks
   147  	BLT	backwardlargeloop	// and distance >= 32
   148  	SRDCC	$2,DWORDS,QWORDS	// 32 byte chunks
   149  	BNE	backward32setup
   150  
   151  backwardlargeloop:
   152  	MOVD 	-8(SRC), TMP
   153  	SUB	$8,SRC
   154  	MOVD 	TMP, -8(TGT)
   155  	SUB	$8,TGT
   156  	BC	16, 0, backwardlargeloop // bndz
   157  	RET
   158  
   159  backward32setup:
   160  	MOVD	QWORDS, CTR			// set up loop ctr
   161  	MOVD	$16, IDX16			// 32 bytes at a time
   162  
   163  backward32loop:
   164  	SUB	$32, TGT
   165  	SUB	$32, SRC
   166  	LXVD2X	(R0)(TGT), VS32           // load 16 bytes
   167  	LXVD2X	(IDX16)(TGT), VS33
   168  	STXVD2X	VS32, (R0)(SRC)           // store 16 bytes
   169  	STXVD2X	VS33, (IDX16)(SRC)
   170  	BC      16, 0, backward32loop   // bndz
   171  	BC	4, 5, LR		// ble CR1 lr
   172  	MOVD	DWORDS, CTR
   173  	BR	backwardlargeloop
   174  

View as plain text