Text file src/internal/runtime/maps/memhash_386.s

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // hash function using AES hardware instructions
     8  TEXT ·MemHash32(SB),NOSPLIT,$0-12
     9  	CMPB	·UseAeshash(SB), $0
    10  	JEQ	noaes
    11  	MOVL	p+0(FP), AX	// ptr to data
    12  	MOVL	h+4(FP), X0	// seed
    13  	PINSRD	$1, (AX), X0	// data
    14  	AESENC	·aeskeysched+0(SB), X0
    15  	AESENC	·aeskeysched+16(SB), X0
    16  	AESENC	·aeskeysched+32(SB), X0
    17  	MOVL	X0, ret+8(FP)
    18  	RET
    19  noaes:
    20  	JMP	·memHash32Fallback(SB)
    21  
    22  TEXT ·MemHash64(SB),NOSPLIT,$0-12
    23  	CMPB	·UseAeshash(SB), $0
    24  	JEQ	noaes
    25  	MOVL	p+0(FP), AX	// ptr to data
    26  	MOVQ	(AX), X0	// data
    27  	PINSRD	$2, h+4(FP), X0	// seed
    28  	AESENC	·aeskeysched+0(SB), X0
    29  	AESENC	·aeskeysched+16(SB), X0
    30  	AESENC	·aeskeysched+32(SB), X0
    31  	MOVL	X0, ret+8(FP)
    32  	RET
    33  noaes:
    34  	JMP	·memHash64Fallback(SB)
    35  
    36  TEXT ·MemHash(SB),NOSPLIT,$0-16
    37  	CMPB	·UseAeshash(SB), $0
    38  	JEQ	noaes
    39  	MOVL	p+0(FP), AX	// ptr to data
    40  	MOVL	s+8(FP), BX	// size
    41  	LEAL	ret+12(FP), DX
    42  	JMP	·aeshashbody<>(SB)
    43  noaes:
    44  	JMP	·memHashFallback(SB)
    45  
    46  TEXT ·StrHash(SB),NOSPLIT,$0-12
    47  	CMPB	·UseAeshash(SB), $0
    48  	JEQ	noaes
    49  	MOVL	p+0(FP), AX	// ptr to string object
    50  	MOVL	4(AX), BX	// length of string
    51  	MOVL	(AX), AX	// string data
    52  	LEAL	ret+8(FP), DX
    53  	JMP	·aeshashbody<>(SB)
    54  noaes:
    55  	JMP	·strHashFallback(SB)
    56  
    57  // AX: data
    58  // BX: length
    59  // DX: address to put return value
    60  TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
    61  	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
    62  	PINSRW	$4, BX, X0	            // 16 bits of length
    63  	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
    64  	MOVO	X0, X1                      // save unscrambled seed
    65  	PXOR	·aeskeysched(SB), X0 // xor in per-process seed
    66  	AESENC	X0, X0                      // scramble seed
    67  
    68  	CMPL	BX, $16
    69  	JB	aes0to15
    70  	JE	aes16
    71  	CMPL	BX, $32
    72  	JBE	aes17to32
    73  	CMPL	BX, $64
    74  	JBE	aes33to64
    75  	JMP	aes65plus
    76  
    77  aes0to15:
    78  	TESTL	BX, BX
    79  	JE	aes0
    80  
    81  	ADDL	$16, AX
    82  	TESTW	$0xff0, AX
    83  	JE	endofpage
    84  
    85  	// 16 bytes loaded at this address won't cross
    86  	// a page boundary, so we can load it directly.
    87  	MOVOU	-16(AX), X1
    88  	ADDL	BX, BX
    89  	PAND	masks<>(SB)(BX*8), X1
    90  
    91  final1:
    92  	PXOR	X0, X1	// xor data with seed
    93  	AESENC	X1, X1  // scramble combo 3 times
    94  	AESENC	X1, X1
    95  	AESENC	X1, X1
    96  	MOVL	X1, (DX)
    97  	RET
    98  
    99  endofpage:
   100  	// address ends in 1111xxxx. Might be up against
   101  	// a page boundary, so load ending at last byte.
   102  	// Then shift bytes down using pshufb.
   103  	MOVOU	-32(AX)(BX*1), X1
   104  	ADDL	BX, BX
   105  	PSHUFB	shifts<>(SB)(BX*8), X1
   106  	JMP	final1
   107  
   108  aes0:
   109  	// Return scrambled input seed
   110  	AESENC	X0, X0
   111  	MOVL	X0, (DX)
   112  	RET
   113  
   114  aes16:
   115  	MOVOU	(AX), X1
   116  	JMP	final1
   117  
   118  aes17to32:
   119  	// make second starting seed
   120  	PXOR	·aeskeysched+16(SB), X1
   121  	AESENC	X1, X1
   122  
   123  	// load data to be hashed
   124  	MOVOU	(AX), X2
   125  	MOVOU	-16(AX)(BX*1), X3
   126  
   127  	// xor with seed
   128  	PXOR	X0, X2
   129  	PXOR	X1, X3
   130  
   131  	// scramble 3 times
   132  	AESENC	X2, X2
   133  	AESENC	X3, X3
   134  	AESENC	X2, X2
   135  	AESENC	X3, X3
   136  	AESENC	X2, X2
   137  	AESENC	X3, X3
   138  
   139  	// combine results
   140  	PXOR	X3, X2
   141  	MOVL	X2, (DX)
   142  	RET
   143  
   144  aes33to64:
   145  	// make 3 more starting seeds
   146  	MOVO	X1, X2
   147  	MOVO	X1, X3
   148  	PXOR	·aeskeysched+16(SB), X1
   149  	PXOR	·aeskeysched+32(SB), X2
   150  	PXOR	·aeskeysched+48(SB), X3
   151  	AESENC	X1, X1
   152  	AESENC	X2, X2
   153  	AESENC	X3, X3
   154  
   155  	MOVOU	(AX), X4
   156  	MOVOU	16(AX), X5
   157  	MOVOU	-32(AX)(BX*1), X6
   158  	MOVOU	-16(AX)(BX*1), X7
   159  
   160  	PXOR	X0, X4
   161  	PXOR	X1, X5
   162  	PXOR	X2, X6
   163  	PXOR	X3, X7
   164  
   165  	AESENC	X4, X4
   166  	AESENC	X5, X5
   167  	AESENC	X6, X6
   168  	AESENC	X7, X7
   169  
   170  	AESENC	X4, X4
   171  	AESENC	X5, X5
   172  	AESENC	X6, X6
   173  	AESENC	X7, X7
   174  
   175  	AESENC	X4, X4
   176  	AESENC	X5, X5
   177  	AESENC	X6, X6
   178  	AESENC	X7, X7
   179  
   180  	PXOR	X6, X4
   181  	PXOR	X7, X5
   182  	PXOR	X5, X4
   183  	MOVL	X4, (DX)
   184  	RET
   185  
   186  aes65plus:
   187  	// make 3 more starting seeds
   188  	MOVO	X1, X2
   189  	MOVO	X1, X3
   190  	PXOR	·aeskeysched+16(SB), X1
   191  	PXOR	·aeskeysched+32(SB), X2
   192  	PXOR	·aeskeysched+48(SB), X3
   193  	AESENC	X1, X1
   194  	AESENC	X2, X2
   195  	AESENC	X3, X3
   196  
   197  	// start with last (possibly overlapping) block
   198  	MOVOU	-64(AX)(BX*1), X4
   199  	MOVOU	-48(AX)(BX*1), X5
   200  	MOVOU	-32(AX)(BX*1), X6
   201  	MOVOU	-16(AX)(BX*1), X7
   202  
   203  	// scramble state once
   204  	AESENC	X0, X4
   205  	AESENC	X1, X5
   206  	AESENC	X2, X6
   207  	AESENC	X3, X7
   208  
   209  	// compute number of remaining 64-byte blocks
   210  	DECL	BX
   211  	SHRL	$6, BX
   212  
   213  aesloop:
   214  	// scramble state, xor in a block
   215  	MOVOU	(AX), X0
   216  	MOVOU	16(AX), X1
   217  	MOVOU	32(AX), X2
   218  	MOVOU	48(AX), X3
   219  	AESENC	X0, X4
   220  	AESENC	X1, X5
   221  	AESENC	X2, X6
   222  	AESENC	X3, X7
   223  
   224  	// scramble state
   225  	AESENC	X4, X4
   226  	AESENC	X5, X5
   227  	AESENC	X6, X6
   228  	AESENC	X7, X7
   229  
   230  	ADDL	$64, AX
   231  	DECL	BX
   232  	JNE	aesloop
   233  
   234  	// 3 more scrambles to finish
   235  	AESENC	X4, X4
   236  	AESENC	X5, X5
   237  	AESENC	X6, X6
   238  	AESENC	X7, X7
   239  
   240  	AESENC	X4, X4
   241  	AESENC	X5, X5
   242  	AESENC	X6, X6
   243  	AESENC	X7, X7
   244  
   245  	AESENC	X4, X4
   246  	AESENC	X5, X5
   247  	AESENC	X6, X6
   248  	AESENC	X7, X7
   249  
   250  	PXOR	X6, X4
   251  	PXOR	X7, X5
   252  	PXOR	X5, X4
   253  	MOVL	X4, (DX)
   254  	RET
   255  
   256  // simple mask to get rid of data in the high part of the register.
   257  DATA masks<>+0x00(SB)/4, $0x00000000
   258  DATA masks<>+0x04(SB)/4, $0x00000000
   259  DATA masks<>+0x08(SB)/4, $0x00000000
   260  DATA masks<>+0x0c(SB)/4, $0x00000000
   261  
   262  DATA masks<>+0x10(SB)/4, $0x000000ff
   263  DATA masks<>+0x14(SB)/4, $0x00000000
   264  DATA masks<>+0x18(SB)/4, $0x00000000
   265  DATA masks<>+0x1c(SB)/4, $0x00000000
   266  
   267  DATA masks<>+0x20(SB)/4, $0x0000ffff
   268  DATA masks<>+0x24(SB)/4, $0x00000000
   269  DATA masks<>+0x28(SB)/4, $0x00000000
   270  DATA masks<>+0x2c(SB)/4, $0x00000000
   271  
   272  DATA masks<>+0x30(SB)/4, $0x00ffffff
   273  DATA masks<>+0x34(SB)/4, $0x00000000
   274  DATA masks<>+0x38(SB)/4, $0x00000000
   275  DATA masks<>+0x3c(SB)/4, $0x00000000
   276  
   277  DATA masks<>+0x40(SB)/4, $0xffffffff
   278  DATA masks<>+0x44(SB)/4, $0x00000000
   279  DATA masks<>+0x48(SB)/4, $0x00000000
   280  DATA masks<>+0x4c(SB)/4, $0x00000000
   281  
   282  DATA masks<>+0x50(SB)/4, $0xffffffff
   283  DATA masks<>+0x54(SB)/4, $0x000000ff
   284  DATA masks<>+0x58(SB)/4, $0x00000000
   285  DATA masks<>+0x5c(SB)/4, $0x00000000
   286  
   287  DATA masks<>+0x60(SB)/4, $0xffffffff
   288  DATA masks<>+0x64(SB)/4, $0x0000ffff
   289  DATA masks<>+0x68(SB)/4, $0x00000000
   290  DATA masks<>+0x6c(SB)/4, $0x00000000
   291  
   292  DATA masks<>+0x70(SB)/4, $0xffffffff
   293  DATA masks<>+0x74(SB)/4, $0x00ffffff
   294  DATA masks<>+0x78(SB)/4, $0x00000000
   295  DATA masks<>+0x7c(SB)/4, $0x00000000
   296  
   297  DATA masks<>+0x80(SB)/4, $0xffffffff
   298  DATA masks<>+0x84(SB)/4, $0xffffffff
   299  DATA masks<>+0x88(SB)/4, $0x00000000
   300  DATA masks<>+0x8c(SB)/4, $0x00000000
   301  
   302  DATA masks<>+0x90(SB)/4, $0xffffffff
   303  DATA masks<>+0x94(SB)/4, $0xffffffff
   304  DATA masks<>+0x98(SB)/4, $0x000000ff
   305  DATA masks<>+0x9c(SB)/4, $0x00000000
   306  
   307  DATA masks<>+0xa0(SB)/4, $0xffffffff
   308  DATA masks<>+0xa4(SB)/4, $0xffffffff
   309  DATA masks<>+0xa8(SB)/4, $0x0000ffff
   310  DATA masks<>+0xac(SB)/4, $0x00000000
   311  
   312  DATA masks<>+0xb0(SB)/4, $0xffffffff
   313  DATA masks<>+0xb4(SB)/4, $0xffffffff
   314  DATA masks<>+0xb8(SB)/4, $0x00ffffff
   315  DATA masks<>+0xbc(SB)/4, $0x00000000
   316  
   317  DATA masks<>+0xc0(SB)/4, $0xffffffff
   318  DATA masks<>+0xc4(SB)/4, $0xffffffff
   319  DATA masks<>+0xc8(SB)/4, $0xffffffff
   320  DATA masks<>+0xcc(SB)/4, $0x00000000
   321  
   322  DATA masks<>+0xd0(SB)/4, $0xffffffff
   323  DATA masks<>+0xd4(SB)/4, $0xffffffff
   324  DATA masks<>+0xd8(SB)/4, $0xffffffff
   325  DATA masks<>+0xdc(SB)/4, $0x000000ff
   326  
   327  DATA masks<>+0xe0(SB)/4, $0xffffffff
   328  DATA masks<>+0xe4(SB)/4, $0xffffffff
   329  DATA masks<>+0xe8(SB)/4, $0xffffffff
   330  DATA masks<>+0xec(SB)/4, $0x0000ffff
   331  
   332  DATA masks<>+0xf0(SB)/4, $0xffffffff
   333  DATA masks<>+0xf4(SB)/4, $0xffffffff
   334  DATA masks<>+0xf8(SB)/4, $0xffffffff
   335  DATA masks<>+0xfc(SB)/4, $0x00ffffff
   336  
   337  GLOBL masks<>(SB),RODATA,$256
   338  
   339  // these are arguments to pshufb. They move data down from
   340  // the high bytes of the register to the low bytes of the register.
   341  // index is how many bytes to move.
   342  DATA shifts<>+0x00(SB)/4, $0x00000000
   343  DATA shifts<>+0x04(SB)/4, $0x00000000
   344  DATA shifts<>+0x08(SB)/4, $0x00000000
   345  DATA shifts<>+0x0c(SB)/4, $0x00000000
   346  
   347  DATA shifts<>+0x10(SB)/4, $0xffffff0f
   348  DATA shifts<>+0x14(SB)/4, $0xffffffff
   349  DATA shifts<>+0x18(SB)/4, $0xffffffff
   350  DATA shifts<>+0x1c(SB)/4, $0xffffffff
   351  
   352  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
   353  DATA shifts<>+0x24(SB)/4, $0xffffffff
   354  DATA shifts<>+0x28(SB)/4, $0xffffffff
   355  DATA shifts<>+0x2c(SB)/4, $0xffffffff
   356  
   357  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
   358  DATA shifts<>+0x34(SB)/4, $0xffffffff
   359  DATA shifts<>+0x38(SB)/4, $0xffffffff
   360  DATA shifts<>+0x3c(SB)/4, $0xffffffff
   361  
   362  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
   363  DATA shifts<>+0x44(SB)/4, $0xffffffff
   364  DATA shifts<>+0x48(SB)/4, $0xffffffff
   365  DATA shifts<>+0x4c(SB)/4, $0xffffffff
   366  
   367  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
   368  DATA shifts<>+0x54(SB)/4, $0xffffff0f
   369  DATA shifts<>+0x58(SB)/4, $0xffffffff
   370  DATA shifts<>+0x5c(SB)/4, $0xffffffff
   371  
   372  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
   373  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
   374  DATA shifts<>+0x68(SB)/4, $0xffffffff
   375  DATA shifts<>+0x6c(SB)/4, $0xffffffff
   376  
   377  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
   378  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
   379  DATA shifts<>+0x78(SB)/4, $0xffffffff
   380  DATA shifts<>+0x7c(SB)/4, $0xffffffff
   381  
   382  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
   383  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
   384  DATA shifts<>+0x88(SB)/4, $0xffffffff
   385  DATA shifts<>+0x8c(SB)/4, $0xffffffff
   386  
   387  DATA shifts<>+0x90(SB)/4, $0x0a090807
   388  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
   389  DATA shifts<>+0x98(SB)/4, $0xffffff0f
   390  DATA shifts<>+0x9c(SB)/4, $0xffffffff
   391  
   392  DATA shifts<>+0xa0(SB)/4, $0x09080706
   393  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
   394  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
   395  DATA shifts<>+0xac(SB)/4, $0xffffffff
   396  
   397  DATA shifts<>+0xb0(SB)/4, $0x08070605
   398  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
   399  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
   400  DATA shifts<>+0xbc(SB)/4, $0xffffffff
   401  
   402  DATA shifts<>+0xc0(SB)/4, $0x07060504
   403  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
   404  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
   405  DATA shifts<>+0xcc(SB)/4, $0xffffffff
   406  
   407  DATA shifts<>+0xd0(SB)/4, $0x06050403
   408  DATA shifts<>+0xd4(SB)/4, $0x0a090807
   409  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
   410  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
   411  
   412  DATA shifts<>+0xe0(SB)/4, $0x05040302
   413  DATA shifts<>+0xe4(SB)/4, $0x09080706
   414  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
   415  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
   416  
   417  DATA shifts<>+0xf0(SB)/4, $0x04030201
   418  DATA shifts<>+0xf4(SB)/4, $0x08070605
   419  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
   420  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
   421  
   422  GLOBL shifts<>(SB),RODATA,$256
   423  
   424  TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
   425  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
   426  	MOVL	$masks<>(SB), AX
   427  	MOVL	$shifts<>(SB), BX
   428  	ORL	BX, AX
   429  	TESTL	$15, AX
   430  	SETEQ   ret+0(FP)
   431  	RET
   432  

View as plain text