Black Lives Matter. Support the Equal Justice Initiative.

Text file src/runtime/memclr_ppc64x.s

Documentation: runtime

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  // +build ppc64 ppc64le
     7  
     8  #include "textflag.h"
     9  
    10  // See memclrNoHeapPointers Go doc for important implementation constraints.
    11  
    12  // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
    13  TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16
    14  	MOVD ptr+0(FP), R3
    15  	MOVD n+8(FP), R4
    16  
    17  	// Determine if there are doublewords to clear
    18  check:
    19  	ANDCC $7, R4, R5  // R5: leftover bytes to clear
    20  	SRD   $3, R4, R6  // R6: double words to clear
    21  	CMP   R6, $0, CR1 // CR1[EQ] set if no double words
    22  
    23  	BC    12, 6, nozerolarge // only single bytes
    24  	CMP   R4, $512
    25  	BLT   under512           // special case for < 512
    26  	ANDCC $127, R3, R8       // check for 128 alignment of address
    27  	BEQ   zero512setup
    28  
    29  	ANDCC $7, R3, R15
    30  	BEQ   zero512xsetup // at least 8 byte aligned
    31  
    32  	// zero bytes up to 8 byte alignment
    33  
    34  	ANDCC $1, R3, R15 // check for byte alignment
    35  	BEQ   byte2
    36  	MOVB  R0, 0(R3)   // zero 1 byte
    37  	ADD   $1, R3      // bump ptr by 1
    38  	ADD   $-1, R4
    39  
    40  byte2:
    41  	ANDCC $2, R3, R15 // check for 2 byte alignment
    42  	BEQ   byte4
    43  	MOVH  R0, 0(R3)   // zero 2 bytes
    44  	ADD   $2, R3      // bump ptr by 2
    45  	ADD   $-2, R4
    46  
    47  byte4:
    48  	ANDCC $4, R3, R15   // check for 4 byte alignment
    49  	BEQ   zero512xsetup
    50  	MOVW  R0, 0(R3)     // zero 4 bytes
    51  	ADD   $4, R3        // bump ptr by 4
    52  	ADD   $-4, R4
    53  	BR    zero512xsetup // ptr should now be 8 byte aligned
    54  
    55  under512:
    56  	MOVD  R6, CTR     // R6 = number of double words
    57  	SRDCC $2, R6, R7  // 32 byte chunks?
    58  	BNE   zero32setup
    59  
    60  	// Clear double words
    61  
    62  zero8:
    63  	MOVD R0, 0(R3)    // double word
    64  	ADD  $8, R3
    65  	ADD  $-8, R4
    66  	BC   16, 0, zero8 // dec ctr, br zero8 if ctr not 0
    67  	BR   nozerolarge  // handle leftovers
    68  
    69  	// Prepare to clear 32 bytes at a time.
    70  
    71  zero32setup:
    72  	DCBTST (R3)             // prepare data cache
    73  	XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
    74  	MOVD   R7, CTR          // number of 32 byte chunks
    75  	MOVD   $16, R8
    76  
    77  zero32:
    78  	STXVD2X VS32, (R3+R0)   // store 16 bytes
    79  	STXVD2X VS32, (R3+R8)
    80  	ADD     $32, R3
    81  	ADD     $-32, R4
    82  	BC      16, 0, zero32   // dec ctr, br zero32 if ctr not 0
    83  	RLDCLCC $61, R4, $3, R6 // remaining doublewords
    84  	BEQ     nozerolarge
    85  	MOVD    R6, CTR         // set up the CTR for doublewords
    86  	BR      zero8
    87  
    88  nozerolarge:
    89  	ANDCC $7, R4, R5 // any remaining bytes
    90  	BC    4, 1, LR   // ble lr
    91  
    92  zerotail:
    93  	MOVD R5, CTR // set up to clear tail bytes
    94  
    95  zerotailloop:
    96  	MOVB R0, 0(R3)           // clear single bytes
    97  	ADD  $1, R3
    98  	BC   16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0
    99  	RET
   100  
   101  zero512xsetup:  // 512 chunk with extra needed
   102  	ANDCC $8, R3, R11    // 8 byte alignment?
   103  	BEQ   zero512setup16
   104  	MOVD  R0, 0(R3)      // clear 8 bytes
   105  	ADD   $8, R3         // update ptr to next 8
   106  	ADD   $-8, R4        // dec count by 8
   107  
   108  zero512setup16:
   109  	ANDCC $127, R3, R14 // < 128 byte alignment
   110  	BEQ   zero512setup  // handle 128 byte alignment
   111  	MOVD  $128, R15
   112  	SUB   R14, R15, R14 // find increment to 128 alignment
   113  	SRD   $4, R14, R15  // number of 16 byte chunks
   114  
   115  zero512presetup:
   116  	MOVD   R15, CTR         // loop counter of 16 bytes
   117  	XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
   118  
   119  zero512preloop:  // clear up to 128 alignment
   120  	STXVD2X VS32, (R3+R0)         // clear 16 bytes
   121  	ADD     $16, R3               // update ptr
   122  	ADD     $-16, R4              // dec count
   123  	BC      16, 0, zero512preloop
   124  
   125  zero512setup:  // setup for dcbz loop
   126  	CMP  R4, $512   // check if at least 512
   127  	BLT  remain
   128  	SRD  $9, R4, R8 // loop count for 512 chunks
   129  	MOVD R8, CTR    // set up counter
   130  	MOVD $128, R9   // index regs for 128 bytes
   131  	MOVD $256, R10
   132  	MOVD $384, R11
   133  
   134  zero512:
   135  	DCBZ (R3+R0)        // clear first chunk
   136  	DCBZ (R3+R9)        // clear second chunk
   137  	DCBZ (R3+R10)       // clear third chunk
   138  	DCBZ (R3+R11)       // clear fourth chunk
   139  	ADD  $512, R3
   140  	ADD  $-512, R4
   141  	BC   16, 0, zero512
   142  
   143  remain:
   144  	CMP  R4, $128  // check if 128 byte chunks left
   145  	BLT  smaller
   146  	DCBZ (R3+R0)   // clear 128
   147  	ADD  $128, R3
   148  	ADD  $-128, R4
   149  	BR   remain
   150  
   151  smaller:
   152  	ANDCC $127, R4, R7 // find leftovers
   153  	BEQ   done
   154  	CMP   R7, $64      // more than 64, do 32 at a time
   155  	BLT   zero8setup   // less than 64, do 8 at a time
   156  	SRD   $5, R7, R7   // set up counter for 32
   157  	BR    zero32setup
   158  
   159  zero8setup:
   160  	SRDCC $3, R7, R7  // less than 8 bytes
   161  	BEQ   nozerolarge
   162  	MOVD  R7, CTR
   163  	BR    zero8
   164  
   165  done:
   166  	RET
   167  

View as plain text