Text file src/runtime/memclr_s390x.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // See memclrNoHeapPointers Go doc for important implementation constraints.
     8  
     9  // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
    10  TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT|NOFRAME,$0-16
    11  	MOVD	ptr+0(FP), R4
    12  	MOVD	n+8(FP), R5
    13  
    14  	CMPBGE	R5, $32, clearge32
    15  
    16  start:
    17  	CMPBLE	R5, $3, clear0to3
    18  	CMPBLE	R5, $7, clear4to7
    19  	CMPBLE	R5, $11, clear8to11
    20  	CMPBLE	R5, $15, clear12to15
    21  	MOVD	$0, 0(R4)
    22  	MOVD	$0, 8(R4)
    23  	ADD	$16, R4
    24  	SUB	$16, R5
    25  	BR	start
    26  
    27  clear0to3:
    28  	CMPBEQ	R5, $0, done
    29  	CMPBNE	R5, $1, clear2
    30  	MOVB	$0, 0(R4)
    31  	RET
    32  clear2:
    33  	CMPBNE	R5, $2, clear3
    34  	MOVH	$0, 0(R4)
    35  	RET
    36  clear3:
    37  	MOVH	$0, 0(R4)
    38  	MOVB	$0, 2(R4)
    39  	RET
    40  
    41  clear4to7:
    42  	CMPBNE	R5, $4, clear5
    43  	MOVW	$0, 0(R4)
    44  	RET
    45  clear5:
    46  	CMPBNE	R5, $5, clear6
    47  	MOVW	$0, 0(R4)
    48  	MOVB	$0, 4(R4)
    49  	RET
    50  clear6:
    51  	CMPBNE	R5, $6, clear7
    52  	MOVW	$0, 0(R4)
    53  	MOVH	$0, 4(R4)
    54  	RET
    55  clear7:
    56  	MOVW	$0, 0(R4)
    57  	MOVH	$0, 4(R4)
    58  	MOVB	$0, 6(R4)
    59  	RET
    60  
    61  clear8to11:
    62  	CMPBNE	R5, $8, clear9
    63  	MOVD	$0, 0(R4)
    64  	RET
    65  clear9:
    66  	CMPBNE	R5, $9, clear10
    67  	MOVD	$0, 0(R4)
    68  	MOVB	$0, 8(R4)
    69  	RET
    70  clear10:
    71  	CMPBNE	R5, $10, clear11
    72  	MOVD	$0, 0(R4)
    73  	MOVH	$0, 8(R4)
    74  	RET
    75  clear11:
    76  	MOVD	$0, 0(R4)
    77  	MOVH	$0, 8(R4)
    78  	MOVB	$0, 10(R4)
    79  	RET
    80  
    81  clear12to15:
    82  	CMPBNE	R5, $12, clear13
    83  	MOVD	$0, 0(R4)
    84  	MOVW	$0, 8(R4)
    85  	RET
    86  clear13:
    87  	CMPBNE	R5, $13, clear14
    88  	MOVD	$0, 0(R4)
    89  	MOVW	$0, 8(R4)
    90  	MOVB	$0, 12(R4)
    91  	RET
    92  clear14:
    93  	CMPBNE	R5, $14, clear15
    94  	MOVD	$0, 0(R4)
    95  	MOVW	$0, 8(R4)
    96  	MOVH	$0, 12(R4)
    97  	RET
    98  clear15:
    99  	MOVD	$0, 0(R4)
   100  	MOVW	$0, 8(R4)
   101  	MOVH	$0, 12(R4)
   102  	MOVB	$0, 14(R4)
   103  	RET
   104  
   105  clearge32:
   106  	CMP	R5, $4096
   107  	BLT	clear256Bto4KB
   108  
   109  // For size >= 4KB, XC is loop unrolled 16 times (4KB = 256B * 16)
   110  clearge4KB:
   111  	XC	$256, 0(R4), 0(R4)
   112  	XC	$256, 256(R4), 256(R4)
   113  	XC	$256, 512(R4), 512(R4)
   114  	XC	$256, 768(R4), 768(R4)
   115  	XC	$256, 1024(R4), 1024(R4)
   116  	XC	$256, 1280(R4), 1280(R4)
   117  	XC	$256, 1536(R4), 1536(R4)
   118  	XC	$256, 1792(R4), 1792(R4)
   119  	XC	$256, 2048(R4), 2048(R4)
   120  	XC	$256, 2304(R4), 2304(R4)
   121  	XC	$256, 2560(R4), 2560(R4)
   122  	XC	$256, 2816(R4), 2816(R4)
   123  	XC	$256, 3072(R4), 3072(R4)
   124  	XC	$256, 3328(R4), 3328(R4)
   125  	XC	$256, 3584(R4), 3584(R4)
   126  	XC	$256, 3840(R4), 3840(R4)
   127  	ADD	$4096, R4
   128  	ADD	$-4096, R5
   129  	CMP	R5, $4096
   130  	BGE	clearge4KB
   131  
   132  clear256Bto4KB:
   133  	CMP	R5, $256
   134  	BLT	clear32to255
   135  	XC	$256, 0(R4), 0(R4)
   136  	ADD	$256, R4
   137  	ADD	$-256, R5
   138  	BR	clear256Bto4KB
   139  
   140  clear32to255:
   141  	CMPBEQ	R5, $0, done
   142  	CMPBLT	R5, $32, start
   143  	CMPBEQ	R5, $32, clear32
   144  	CMPBLE	R5, $64, clear33to64
   145  	CMP	R5, $128
   146  	BLE	clear65to128
   147  	CMP	R5, $255
   148  	BLE	clear129to255
   149  
   150  clear32:
   151  	VZERO	V1
   152  	VST	V1, 0(R4)
   153  	VST	V1, 16(R4)
   154  	RET
   155  
   156  clear33to64:
   157  	VZERO	V1
   158  	VST	V1, 0(R4)
   159  	VST	V1, 16(R4)
   160  	ADD	$-32, R5
   161  	VST	V1, 0(R4)(R5)
   162  	VST	V1, 16(R4)(R5)
   163  	RET
   164  
   165  clear65to128:
   166  	VZERO	V1
   167  	VST	V1, 0(R4)
   168  	VST	V1, 16(R4)
   169  	VST	V1, 32(R4)
   170  	VST	V1, 48(R4)
   171  	ADD	$-64, R5
   172  	VST	V1, 0(R4)(R5)
   173  	VST	V1, 16(R4)(R5)
   174  	VST	V1, 32(R4)(R5)
   175  	VST	V1, 48(R4)(R5)
   176  	RET
   177  
   178  clear129to255:
   179  	VZERO	V1
   180  	VST	V1, 0(R4)
   181  	VST	V1, 16(R4)
   182  	VST	V1, 32(R4)
   183  	VST	V1, 48(R4)
   184  	VST	V1, 64(R4)
   185  	VST	V1, 80(R4)
   186  	VST	V1, 96(R4)
   187  	VST	V1, 112(R4)
   188  	ADD	$-128, R5
   189  	VST	V1, 0(R4)(R5)
   190  	VST	V1, 16(R4)(R5)
   191  	VST	V1, 32(R4)(R5)
   192  	VST	V1, 48(R4)(R5)
   193  	VST	V1, 64(R4)(R5)
   194  	VST	V1, 80(R4)(R5)
   195  	VST	V1, 96(R4)(R5)
   196  	VST	V1, 112(R4)(R5)
   197  	RET
   198  
   199  done:
   200  	RET
   201  
   202  

View as plain text