Text file src/internal/runtime/maps/memhash_amd64.s

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func MemHash32(p unsafe.Pointer, h uintptr) uintptr
     8  // ABIInternal for performance.
     9  TEXT ·MemHash32<ABIInternal>(SB),NOSPLIT,$0-24
    10  	// AX = ptr to data
    11  	// BX = seed
    12  	CMPB	·UseAeshash(SB), $0
    13  	JEQ	noaes
    14  	MOVQ	BX, X0	// X0 = seed
    15  	PINSRD	$2, (AX), X0	// data
    16  	AESENC	·aeskeysched+0(SB), X0
    17  	AESENC	·aeskeysched+16(SB), X0
    18  	AESENC	·aeskeysched+32(SB), X0
    19  	MOVQ	X0, AX	// return X0
    20  	RET
    21  noaes:
    22  	JMP	·memHash32Fallback<ABIInternal>(SB)
    23  
    24  // func MemHash64(p unsafe.Pointer, h uintptr) uintptr
    25  // ABIInternal for performance.
    26  TEXT ·MemHash64<ABIInternal>(SB),NOSPLIT,$0-24
    27  	// AX = ptr to data
    28  	// BX = seed
    29  	CMPB	·UseAeshash(SB), $0
    30  	JEQ	noaes
    31  	MOVQ	BX, X0	// X0 = seed
    32  	PINSRQ	$1, (AX), X0	// data
    33  	AESENC	·aeskeysched+0(SB), X0
    34  	AESENC	·aeskeysched+16(SB), X0
    35  	AESENC	·aeskeysched+32(SB), X0
    36  	MOVQ	X0, AX	// return X0
    37  	RET
    38  noaes:
    39  	JMP	·memHash64Fallback<ABIInternal>(SB)
    40  
    41  // func MemHash(p unsafe.Pointer, h, s uintptr) uintptr
    42  // hash function using AES hardware instructions
    43  TEXT ·MemHash<ABIInternal>(SB),NOSPLIT,$0-32
    44  	// AX = ptr to data
    45  	// BX = seed
    46  	// CX = size
    47  	CMPB	·UseAeshash(SB), $0
    48  	JEQ	noaes
    49  	JMP	·aeshashbody<>(SB)
    50  noaes:
    51  	JMP	·memHashFallback<ABIInternal>(SB)
    52  
    53  // func strhash(p unsafe.Pointer, h uintptr) uintptr
    54  TEXT ·StrHash<ABIInternal>(SB),NOSPLIT,$0-24
    55  	// AX = ptr to string struct
    56  	// BX = seed
    57  	CMPB	·UseAeshash(SB), $0
    58  	JEQ	noaes
    59  	MOVQ	8(AX), CX	// length of string
    60  	MOVQ	(AX), AX	// string data
    61  	JMP	·aeshashbody<>(SB)
    62  noaes:
    63  	JMP	·strHashFallback<ABIInternal>(SB)
    64  
    65  // AX: data
    66  // BX: hash seed
    67  // CX: length
    68  // At return: AX = return value
    69  TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
    70  	// Fill an SSE register with our seeds.
    71  	MOVQ	BX, X0				// 64 bits of per-table hash seed
    72  	PINSRW	$4, CX, X0			// 16 bits of length
    73  	PSHUFHW $0, X0, X0			// repeat length 4 times total
    74  	MOVO	X0, X1				// save unscrambled seed
    75  	PXOR	·aeskeysched(SB), X0	// xor in per-process seed
    76  	AESENC	X0, X0				// scramble seed
    77  
    78  	CMPQ	CX, $16
    79  	JB	aes0to15
    80  	JE	aes16
    81  	CMPQ	CX, $32
    82  	JBE	aes17to32
    83  	CMPQ	CX, $64
    84  	JBE	aes33to64
    85  	CMPQ	CX, $128
    86  	JBE	aes65to128
    87  	JMP	aes129plus
    88  
    89  aes0to15:
    90  	TESTQ	CX, CX
    91  	JE	aes0
    92  
    93  	ADDQ	$16, AX
    94  	TESTW	$0xff0, AX
    95  	JE	endofpage
    96  
    97  	// 16 bytes loaded at this address won't cross
    98  	// a page boundary, so we can load it directly.
    99  	MOVOU	-16(AX), X1
   100  	ADDQ	CX, CX
   101  	MOVQ	$masks<>(SB), AX
   102  	PAND	(AX)(CX*8), X1
   103  final1:
   104  	PXOR	X0, X1	// xor data with seed
   105  	AESENC	X1, X1	// scramble combo 3 times
   106  	AESENC	X1, X1
   107  	AESENC	X1, X1
   108  	MOVQ	X1, AX	// return X1
   109  	RET
   110  
   111  endofpage:
   112  	// address ends in 1111xxxx. Might be up against
   113  	// a page boundary, so load ending at last byte.
   114  	// Then shift bytes down using pshufb.
   115  	MOVOU	-32(AX)(CX*1), X1
   116  	ADDQ	CX, CX
   117  	MOVQ	$shifts<>(SB), AX
   118  	PSHUFB	(AX)(CX*8), X1
   119  	JMP	final1
   120  
   121  aes0:
   122  	// Return scrambled input seed
   123  	AESENC	X0, X0
   124  	MOVQ	X0, AX	// return X0
   125  	RET
   126  
   127  aes16:
   128  	MOVOU	(AX), X1
   129  	JMP	final1
   130  
   131  aes17to32:
   132  	// make second starting seed
   133  	PXOR	·aeskeysched+16(SB), X1
   134  	AESENC	X1, X1
   135  
   136  	// load data to be hashed
   137  	MOVOU	(AX), X2
   138  	MOVOU	-16(AX)(CX*1), X3
   139  
   140  	// xor with seed
   141  	PXOR	X0, X2
   142  	PXOR	X1, X3
   143  
   144  	// scramble 3 times
   145  	AESENC	X2, X2
   146  	AESENC	X3, X3
   147  	AESENC	X2, X2
   148  	AESENC	X3, X3
   149  	AESENC	X2, X2
   150  	AESENC	X3, X3
   151  
   152  	// combine results
   153  	PXOR	X3, X2
   154  	MOVQ	X2, AX	// return X2
   155  	RET
   156  
   157  aes33to64:
   158  	// make 3 more starting seeds
   159  	MOVO	X1, X2
   160  	MOVO	X1, X3
   161  	PXOR	·aeskeysched+16(SB), X1
   162  	PXOR	·aeskeysched+32(SB), X2
   163  	PXOR	·aeskeysched+48(SB), X3
   164  	AESENC	X1, X1
   165  	AESENC	X2, X2
   166  	AESENC	X3, X3
   167  
   168  	MOVOU	(AX), X4
   169  	MOVOU	16(AX), X5
   170  	MOVOU	-32(AX)(CX*1), X6
   171  	MOVOU	-16(AX)(CX*1), X7
   172  
   173  	PXOR	X0, X4
   174  	PXOR	X1, X5
   175  	PXOR	X2, X6
   176  	PXOR	X3, X7
   177  
   178  	AESENC	X4, X4
   179  	AESENC	X5, X5
   180  	AESENC	X6, X6
   181  	AESENC	X7, X7
   182  
   183  	AESENC	X4, X4
   184  	AESENC	X5, X5
   185  	AESENC	X6, X6
   186  	AESENC	X7, X7
   187  
   188  	AESENC	X4, X4
   189  	AESENC	X5, X5
   190  	AESENC	X6, X6
   191  	AESENC	X7, X7
   192  
   193  	PXOR	X6, X4
   194  	PXOR	X7, X5
   195  	PXOR	X5, X4
   196  	MOVQ	X4, AX	// return X4
   197  	RET
   198  
   199  aes65to128:
   200  	// make 7 more starting seeds
   201  	MOVO	X1, X2
   202  	MOVO	X1, X3
   203  	MOVO	X1, X4
   204  	MOVO	X1, X5
   205  	MOVO	X1, X6
   206  	MOVO	X1, X7
   207  	PXOR	·aeskeysched+16(SB), X1
   208  	PXOR	·aeskeysched+32(SB), X2
   209  	PXOR	·aeskeysched+48(SB), X3
   210  	PXOR	·aeskeysched+64(SB), X4
   211  	PXOR	·aeskeysched+80(SB), X5
   212  	PXOR	·aeskeysched+96(SB), X6
   213  	PXOR	·aeskeysched+112(SB), X7
   214  	AESENC	X1, X1
   215  	AESENC	X2, X2
   216  	AESENC	X3, X3
   217  	AESENC	X4, X4
   218  	AESENC	X5, X5
   219  	AESENC	X6, X6
   220  	AESENC	X7, X7
   221  
   222  	// load data
   223  	MOVOU	(AX), X8
   224  	MOVOU	16(AX), X9
   225  	MOVOU	32(AX), X10
   226  	MOVOU	48(AX), X11
   227  	MOVOU	-64(AX)(CX*1), X12
   228  	MOVOU	-48(AX)(CX*1), X13
   229  	MOVOU	-32(AX)(CX*1), X14
   230  	MOVOU	-16(AX)(CX*1), X15
   231  
   232  	// xor with seed
   233  	PXOR	X0, X8
   234  	PXOR	X1, X9
   235  	PXOR	X2, X10
   236  	PXOR	X3, X11
   237  	PXOR	X4, X12
   238  	PXOR	X5, X13
   239  	PXOR	X6, X14
   240  	PXOR	X7, X15
   241  
   242  	// scramble 3 times
   243  	AESENC	X8, X8
   244  	AESENC	X9, X9
   245  	AESENC	X10, X10
   246  	AESENC	X11, X11
   247  	AESENC	X12, X12
   248  	AESENC	X13, X13
   249  	AESENC	X14, X14
   250  	AESENC	X15, X15
   251  
   252  	AESENC	X8, X8
   253  	AESENC	X9, X9
   254  	AESENC	X10, X10
   255  	AESENC	X11, X11
   256  	AESENC	X12, X12
   257  	AESENC	X13, X13
   258  	AESENC	X14, X14
   259  	AESENC	X15, X15
   260  
   261  	AESENC	X8, X8
   262  	AESENC	X9, X9
   263  	AESENC	X10, X10
   264  	AESENC	X11, X11
   265  	AESENC	X12, X12
   266  	AESENC	X13, X13
   267  	AESENC	X14, X14
   268  	AESENC	X15, X15
   269  
   270  	// combine results
   271  	PXOR	X12, X8
   272  	PXOR	X13, X9
   273  	PXOR	X14, X10
   274  	PXOR	X15, X11
   275  	PXOR	X10, X8
   276  	PXOR	X11, X9
   277  	PXOR	X9, X8
   278  	// X15 must be zero on return
   279  	PXOR	X15, X15
   280  	MOVQ	X8, AX	// return X8
   281  	RET
   282  
   283  aes129plus:
   284  	// make 7 more starting seeds
   285  	MOVO	X1, X2
   286  	MOVO	X1, X3
   287  	MOVO	X1, X4
   288  	MOVO	X1, X5
   289  	MOVO	X1, X6
   290  	MOVO	X1, X7
   291  	PXOR	·aeskeysched+16(SB), X1
   292  	PXOR	·aeskeysched+32(SB), X2
   293  	PXOR	·aeskeysched+48(SB), X3
   294  	PXOR	·aeskeysched+64(SB), X4
   295  	PXOR	·aeskeysched+80(SB), X5
   296  	PXOR	·aeskeysched+96(SB), X6
   297  	PXOR	·aeskeysched+112(SB), X7
   298  	AESENC	X1, X1
   299  	AESENC	X2, X2
   300  	AESENC	X3, X3
   301  	AESENC	X4, X4
   302  	AESENC	X5, X5
   303  	AESENC	X6, X6
   304  	AESENC	X7, X7
   305  
   306  	// start with last (possibly overlapping) block
   307  	MOVOU	-128(AX)(CX*1), X8
   308  	MOVOU	-112(AX)(CX*1), X9
   309  	MOVOU	-96(AX)(CX*1), X10
   310  	MOVOU	-80(AX)(CX*1), X11
   311  	MOVOU	-64(AX)(CX*1), X12
   312  	MOVOU	-48(AX)(CX*1), X13
   313  	MOVOU	-32(AX)(CX*1), X14
   314  	MOVOU	-16(AX)(CX*1), X15
   315  
   316  	// xor in seed
   317  	PXOR	X0, X8
   318  	PXOR	X1, X9
   319  	PXOR	X2, X10
   320  	PXOR	X3, X11
   321  	PXOR	X4, X12
   322  	PXOR	X5, X13
   323  	PXOR	X6, X14
   324  	PXOR	X7, X15
   325  
   326  	// compute number of remaining 128-byte blocks
   327  	DECQ	CX
   328  	SHRQ	$7, CX
   329  
   330  	PCALIGN $16
   331  aesloop:
   332  	// scramble state
   333  	AESENC	X8, X8
   334  	AESENC	X9, X9
   335  	AESENC	X10, X10
   336  	AESENC	X11, X11
   337  	AESENC	X12, X12
   338  	AESENC	X13, X13
   339  	AESENC	X14, X14
   340  	AESENC	X15, X15
   341  
   342  	// scramble state, xor in a block
   343  	MOVOU	(AX), X0
   344  	MOVOU	16(AX), X1
   345  	MOVOU	32(AX), X2
   346  	MOVOU	48(AX), X3
   347  	AESENC	X0, X8
   348  	AESENC	X1, X9
   349  	AESENC	X2, X10
   350  	AESENC	X3, X11
   351  	MOVOU	64(AX), X4
   352  	MOVOU	80(AX), X5
   353  	MOVOU	96(AX), X6
   354  	MOVOU	112(AX), X7
   355  	AESENC	X4, X12
   356  	AESENC	X5, X13
   357  	AESENC	X6, X14
   358  	AESENC	X7, X15
   359  
   360  	ADDQ	$128, AX
   361  	DECQ	CX
   362  	JNE	aesloop
   363  
   364  	// 3 more scrambles to finish
   365  	AESENC	X8, X8
   366  	AESENC	X9, X9
   367  	AESENC	X10, X10
   368  	AESENC	X11, X11
   369  	AESENC	X12, X12
   370  	AESENC	X13, X13
   371  	AESENC	X14, X14
   372  	AESENC	X15, X15
   373  	AESENC	X8, X8
   374  	AESENC	X9, X9
   375  	AESENC	X10, X10
   376  	AESENC	X11, X11
   377  	AESENC	X12, X12
   378  	AESENC	X13, X13
   379  	AESENC	X14, X14
   380  	AESENC	X15, X15
   381  	AESENC	X8, X8
   382  	AESENC	X9, X9
   383  	AESENC	X10, X10
   384  	AESENC	X11, X11
   385  	AESENC	X12, X12
   386  	AESENC	X13, X13
   387  	AESENC	X14, X14
   388  	AESENC	X15, X15
   389  
   390  	PXOR	X12, X8
   391  	PXOR	X13, X9
   392  	PXOR	X14, X10
   393  	PXOR	X15, X11
   394  	PXOR	X10, X8
   395  	PXOR	X11, X9
   396  	PXOR	X9, X8
   397  	// X15 must be zero on return
   398  	PXOR	X15, X15
   399  	MOVQ	X8, AX	// return X8
   400  	RET
   401  
   402  // simple mask to get rid of data in the high part of the register.
   403  DATA masks<>+0x00(SB)/8, $0x0000000000000000
   404  DATA masks<>+0x08(SB)/8, $0x0000000000000000
   405  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
   406  DATA masks<>+0x18(SB)/8, $0x0000000000000000
   407  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
   408  DATA masks<>+0x28(SB)/8, $0x0000000000000000
   409  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
   410  DATA masks<>+0x38(SB)/8, $0x0000000000000000
   411  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
   412  DATA masks<>+0x48(SB)/8, $0x0000000000000000
   413  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
   414  DATA masks<>+0x58(SB)/8, $0x0000000000000000
   415  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
   416  DATA masks<>+0x68(SB)/8, $0x0000000000000000
   417  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
   418  DATA masks<>+0x78(SB)/8, $0x0000000000000000
   419  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
   420  DATA masks<>+0x88(SB)/8, $0x0000000000000000
   421  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
   422  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
   423  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
   424  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
   425  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
   426  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
   427  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
   428  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
   429  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
   430  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
   431  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
   432  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
   433  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
   434  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
   435  GLOBL masks<>(SB),RODATA,$256
   436  
   437  // these are arguments to pshufb. They move data down from
   438  // the high bytes of the register to the low bytes of the register.
   439  // index is how many bytes to move.
   440  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
   441  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
   442  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
   443  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
   444  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
   445  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
   446  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
   447  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
   448  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
   449  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
   450  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
   451  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
   452  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
   453  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
   454  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
   455  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
   456  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
   457  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
   458  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
   459  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
   460  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
   461  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
   462  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
   463  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
   464  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
   465  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
   466  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
   467  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
   468  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
   469  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
   470  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
   471  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
   472  GLOBL shifts<>(SB),RODATA,$256
   473  
   474  TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
   475  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
   476  	MOVQ	$masks<>(SB), AX
   477  	MOVQ	$shifts<>(SB), BX
   478  	ORQ	BX, AX
   479  	TESTQ	$15, AX
   480  	SETEQ	AX
   481  	RET
   482  

View as plain text