1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // func MemHash32(p unsafe.Pointer, h uintptr) uintptr
8 // ABIInternal for performance.
9 TEXT ·MemHash32<ABIInternal>(SB),NOSPLIT,$0-24
10 // AX = ptr to data
11 // BX = seed
12 CMPB ·UseAeshash(SB), $0
13 JEQ noaes
14 MOVQ BX, X0 // X0 = seed
15 PINSRD $2, (AX), X0 // data
16 AESENC ·aeskeysched+0(SB), X0
17 AESENC ·aeskeysched+16(SB), X0
18 AESENC ·aeskeysched+32(SB), X0
19 MOVQ X0, AX // return X0
20 RET
21 noaes:
22 JMP ·memHash32Fallback<ABIInternal>(SB)
23
24 // func MemHash64(p unsafe.Pointer, h uintptr) uintptr
25 // ABIInternal for performance.
26 TEXT ·MemHash64<ABIInternal>(SB),NOSPLIT,$0-24
27 // AX = ptr to data
28 // BX = seed
29 CMPB ·UseAeshash(SB), $0
30 JEQ noaes
31 MOVQ BX, X0 // X0 = seed
32 PINSRQ $1, (AX), X0 // data
33 AESENC ·aeskeysched+0(SB), X0
34 AESENC ·aeskeysched+16(SB), X0
35 AESENC ·aeskeysched+32(SB), X0
36 MOVQ X0, AX // return X0
37 RET
38 noaes:
39 JMP ·memHash64Fallback<ABIInternal>(SB)
40
41 // func MemHash(p unsafe.Pointer, h, s uintptr) uintptr
42 // hash function using AES hardware instructions
43 TEXT ·MemHash<ABIInternal>(SB),NOSPLIT,$0-32
44 // AX = ptr to data
45 // BX = seed
46 // CX = size
47 CMPB ·UseAeshash(SB), $0
48 JEQ noaes
49 JMP ·aeshashbody<>(SB)
50 noaes:
51 JMP ·memHashFallback<ABIInternal>(SB)
52
53 // func strhash(p unsafe.Pointer, h uintptr) uintptr
54 TEXT ·StrHash<ABIInternal>(SB),NOSPLIT,$0-24
55 // AX = ptr to string struct
56 // BX = seed
57 CMPB ·UseAeshash(SB), $0
58 JEQ noaes
59 MOVQ 8(AX), CX // length of string
60 MOVQ (AX), AX // string data
61 JMP ·aeshashbody<>(SB)
62 noaes:
63 JMP ·strHashFallback<ABIInternal>(SB)
64
65 // AX: data
66 // BX: hash seed
67 // CX: length
68 // At return: AX = return value
69 TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
70 // Fill an SSE register with our seeds.
71 MOVQ BX, X0 // 64 bits of per-table hash seed
72 PINSRW $4, CX, X0 // 16 bits of length
73 PSHUFHW $0, X0, X0 // repeat length 4 times total
74 MOVO X0, X1 // save unscrambled seed
75 PXOR ·aeskeysched(SB), X0 // xor in per-process seed
76 AESENC X0, X0 // scramble seed
77
78 CMPQ CX, $16
79 JB aes0to15
80 JE aes16
81 CMPQ CX, $32
82 JBE aes17to32
83 CMPQ CX, $64
84 JBE aes33to64
85 CMPQ CX, $128
86 JBE aes65to128
87 JMP aes129plus
88
89 aes0to15:
90 TESTQ CX, CX
91 JE aes0
92
93 ADDQ $16, AX
94 TESTW $0xff0, AX
95 JE endofpage
96
97 // 16 bytes loaded at this address won't cross
98 // a page boundary, so we can load it directly.
99 MOVOU -16(AX), X1
100 ADDQ CX, CX
101 MOVQ $masks<>(SB), AX
102 PAND (AX)(CX*8), X1
103 final1:
104 PXOR X0, X1 // xor data with seed
105 AESENC X1, X1 // scramble combo 3 times
106 AESENC X1, X1
107 AESENC X1, X1
108 MOVQ X1, AX // return X1
109 RET
110
111 endofpage:
112 // address ends in 1111xxxx. Might be up against
113 // a page boundary, so load ending at last byte.
114 // Then shift bytes down using pshufb.
115 MOVOU -32(AX)(CX*1), X1
116 ADDQ CX, CX
117 MOVQ $shifts<>(SB), AX
118 PSHUFB (AX)(CX*8), X1
119 JMP final1
120
121 aes0:
122 // Return scrambled input seed
123 AESENC X0, X0
124 MOVQ X0, AX // return X0
125 RET
126
127 aes16:
128 MOVOU (AX), X1
129 JMP final1
130
131 aes17to32:
132 // make second starting seed
133 PXOR ·aeskeysched+16(SB), X1
134 AESENC X1, X1
135
136 // load data to be hashed
137 MOVOU (AX), X2
138 MOVOU -16(AX)(CX*1), X3
139
140 // xor with seed
141 PXOR X0, X2
142 PXOR X1, X3
143
144 // scramble 3 times
145 AESENC X2, X2
146 AESENC X3, X3
147 AESENC X2, X2
148 AESENC X3, X3
149 AESENC X2, X2
150 AESENC X3, X3
151
152 // combine results
153 PXOR X3, X2
154 MOVQ X2, AX // return X2
155 RET
156
157 aes33to64:
158 // make 3 more starting seeds
159 MOVO X1, X2
160 MOVO X1, X3
161 PXOR ·aeskeysched+16(SB), X1
162 PXOR ·aeskeysched+32(SB), X2
163 PXOR ·aeskeysched+48(SB), X3
164 AESENC X1, X1
165 AESENC X2, X2
166 AESENC X3, X3
167
168 MOVOU (AX), X4
169 MOVOU 16(AX), X5
170 MOVOU -32(AX)(CX*1), X6
171 MOVOU -16(AX)(CX*1), X7
172
173 PXOR X0, X4
174 PXOR X1, X5
175 PXOR X2, X6
176 PXOR X3, X7
177
178 AESENC X4, X4
179 AESENC X5, X5
180 AESENC X6, X6
181 AESENC X7, X7
182
183 AESENC X4, X4
184 AESENC X5, X5
185 AESENC X6, X6
186 AESENC X7, X7
187
188 AESENC X4, X4
189 AESENC X5, X5
190 AESENC X6, X6
191 AESENC X7, X7
192
193 PXOR X6, X4
194 PXOR X7, X5
195 PXOR X5, X4
196 MOVQ X4, AX // return X4
197 RET
198
199 aes65to128:
200 // make 7 more starting seeds
201 MOVO X1, X2
202 MOVO X1, X3
203 MOVO X1, X4
204 MOVO X1, X5
205 MOVO X1, X6
206 MOVO X1, X7
207 PXOR ·aeskeysched+16(SB), X1
208 PXOR ·aeskeysched+32(SB), X2
209 PXOR ·aeskeysched+48(SB), X3
210 PXOR ·aeskeysched+64(SB), X4
211 PXOR ·aeskeysched+80(SB), X5
212 PXOR ·aeskeysched+96(SB), X6
213 PXOR ·aeskeysched+112(SB), X7
214 AESENC X1, X1
215 AESENC X2, X2
216 AESENC X3, X3
217 AESENC X4, X4
218 AESENC X5, X5
219 AESENC X6, X6
220 AESENC X7, X7
221
222 // load data
223 MOVOU (AX), X8
224 MOVOU 16(AX), X9
225 MOVOU 32(AX), X10
226 MOVOU 48(AX), X11
227 MOVOU -64(AX)(CX*1), X12
228 MOVOU -48(AX)(CX*1), X13
229 MOVOU -32(AX)(CX*1), X14
230 MOVOU -16(AX)(CX*1), X15
231
232 // xor with seed
233 PXOR X0, X8
234 PXOR X1, X9
235 PXOR X2, X10
236 PXOR X3, X11
237 PXOR X4, X12
238 PXOR X5, X13
239 PXOR X6, X14
240 PXOR X7, X15
241
242 // scramble 3 times
243 AESENC X8, X8
244 AESENC X9, X9
245 AESENC X10, X10
246 AESENC X11, X11
247 AESENC X12, X12
248 AESENC X13, X13
249 AESENC X14, X14
250 AESENC X15, X15
251
252 AESENC X8, X8
253 AESENC X9, X9
254 AESENC X10, X10
255 AESENC X11, X11
256 AESENC X12, X12
257 AESENC X13, X13
258 AESENC X14, X14
259 AESENC X15, X15
260
261 AESENC X8, X8
262 AESENC X9, X9
263 AESENC X10, X10
264 AESENC X11, X11
265 AESENC X12, X12
266 AESENC X13, X13
267 AESENC X14, X14
268 AESENC X15, X15
269
270 // combine results
271 PXOR X12, X8
272 PXOR X13, X9
273 PXOR X14, X10
274 PXOR X15, X11
275 PXOR X10, X8
276 PXOR X11, X9
277 PXOR X9, X8
278 // X15 must be zero on return
279 PXOR X15, X15
280 MOVQ X8, AX // return X8
281 RET
282
283 aes129plus:
284 // make 7 more starting seeds
285 MOVO X1, X2
286 MOVO X1, X3
287 MOVO X1, X4
288 MOVO X1, X5
289 MOVO X1, X6
290 MOVO X1, X7
291 PXOR ·aeskeysched+16(SB), X1
292 PXOR ·aeskeysched+32(SB), X2
293 PXOR ·aeskeysched+48(SB), X3
294 PXOR ·aeskeysched+64(SB), X4
295 PXOR ·aeskeysched+80(SB), X5
296 PXOR ·aeskeysched+96(SB), X6
297 PXOR ·aeskeysched+112(SB), X7
298 AESENC X1, X1
299 AESENC X2, X2
300 AESENC X3, X3
301 AESENC X4, X4
302 AESENC X5, X5
303 AESENC X6, X6
304 AESENC X7, X7
305
306 // start with last (possibly overlapping) block
307 MOVOU -128(AX)(CX*1), X8
308 MOVOU -112(AX)(CX*1), X9
309 MOVOU -96(AX)(CX*1), X10
310 MOVOU -80(AX)(CX*1), X11
311 MOVOU -64(AX)(CX*1), X12
312 MOVOU -48(AX)(CX*1), X13
313 MOVOU -32(AX)(CX*1), X14
314 MOVOU -16(AX)(CX*1), X15
315
316 // xor in seed
317 PXOR X0, X8
318 PXOR X1, X9
319 PXOR X2, X10
320 PXOR X3, X11
321 PXOR X4, X12
322 PXOR X5, X13
323 PXOR X6, X14
324 PXOR X7, X15
325
326 // compute number of remaining 128-byte blocks
327 DECQ CX
328 SHRQ $7, CX
329
330 PCALIGN $16
331 aesloop:
332 // scramble state
333 AESENC X8, X8
334 AESENC X9, X9
335 AESENC X10, X10
336 AESENC X11, X11
337 AESENC X12, X12
338 AESENC X13, X13
339 AESENC X14, X14
340 AESENC X15, X15
341
342 // scramble state, xor in a block
343 MOVOU (AX), X0
344 MOVOU 16(AX), X1
345 MOVOU 32(AX), X2
346 MOVOU 48(AX), X3
347 AESENC X0, X8
348 AESENC X1, X9
349 AESENC X2, X10
350 AESENC X3, X11
351 MOVOU 64(AX), X4
352 MOVOU 80(AX), X5
353 MOVOU 96(AX), X6
354 MOVOU 112(AX), X7
355 AESENC X4, X12
356 AESENC X5, X13
357 AESENC X6, X14
358 AESENC X7, X15
359
360 ADDQ $128, AX
361 DECQ CX
362 JNE aesloop
363
364 // 3 more scrambles to finish
365 AESENC X8, X8
366 AESENC X9, X9
367 AESENC X10, X10
368 AESENC X11, X11
369 AESENC X12, X12
370 AESENC X13, X13
371 AESENC X14, X14
372 AESENC X15, X15
373 AESENC X8, X8
374 AESENC X9, X9
375 AESENC X10, X10
376 AESENC X11, X11
377 AESENC X12, X12
378 AESENC X13, X13
379 AESENC X14, X14
380 AESENC X15, X15
381 AESENC X8, X8
382 AESENC X9, X9
383 AESENC X10, X10
384 AESENC X11, X11
385 AESENC X12, X12
386 AESENC X13, X13
387 AESENC X14, X14
388 AESENC X15, X15
389
390 PXOR X12, X8
391 PXOR X13, X9
392 PXOR X14, X10
393 PXOR X15, X11
394 PXOR X10, X8
395 PXOR X11, X9
396 PXOR X9, X8
397 // X15 must be zero on return
398 PXOR X15, X15
399 MOVQ X8, AX // return X8
400 RET
401
402 // simple mask to get rid of data in the high part of the register.
403 DATA masks<>+0x00(SB)/8, $0x0000000000000000
404 DATA masks<>+0x08(SB)/8, $0x0000000000000000
405 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
406 DATA masks<>+0x18(SB)/8, $0x0000000000000000
407 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
408 DATA masks<>+0x28(SB)/8, $0x0000000000000000
409 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
410 DATA masks<>+0x38(SB)/8, $0x0000000000000000
411 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
412 DATA masks<>+0x48(SB)/8, $0x0000000000000000
413 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
414 DATA masks<>+0x58(SB)/8, $0x0000000000000000
415 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
416 DATA masks<>+0x68(SB)/8, $0x0000000000000000
417 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
418 DATA masks<>+0x78(SB)/8, $0x0000000000000000
419 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
420 DATA masks<>+0x88(SB)/8, $0x0000000000000000
421 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
422 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
423 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
424 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
425 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
426 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
427 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
428 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
429 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
430 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
431 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
432 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
433 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
434 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
435 GLOBL masks<>(SB),RODATA,$256
436
437 // these are arguments to pshufb. They move data down from
438 // the high bytes of the register to the low bytes of the register.
439 // index is how many bytes to move.
440 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
441 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
442 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
443 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
444 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
445 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
446 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
447 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
448 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
449 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
450 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
451 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
452 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
453 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
454 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
455 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
456 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
457 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
458 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
459 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
460 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
461 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
462 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
463 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
464 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
465 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
466 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
467 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
468 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
469 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
470 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
471 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
472 GLOBL shifts<>(SB),RODATA,$256
473
474 TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
475 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
476 MOVQ $masks<>(SB), AX
477 MOVQ $shifts<>(SB), BX
478 ORQ BX, AX
479 TESTQ $15, AX
480 SETEQ AX
481 RET
482
View as plain text