1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // hash function using AES hardware instructions
8 TEXT ·MemHash32(SB),NOSPLIT,$0-12
9 CMPB ·UseAeshash(SB), $0
10 JEQ noaes
11 MOVL p+0(FP), AX // ptr to data
12 MOVL h+4(FP), X0 // seed
13 PINSRD $1, (AX), X0 // data
14 AESENC ·aeskeysched+0(SB), X0
15 AESENC ·aeskeysched+16(SB), X0
16 AESENC ·aeskeysched+32(SB), X0
17 MOVL X0, ret+8(FP)
18 RET
19 noaes:
20 JMP ·memHash32Fallback(SB)
21
22 TEXT ·MemHash64(SB),NOSPLIT,$0-12
23 CMPB ·UseAeshash(SB), $0
24 JEQ noaes
25 MOVL p+0(FP), AX // ptr to data
26 MOVQ (AX), X0 // data
27 PINSRD $2, h+4(FP), X0 // seed
28 AESENC ·aeskeysched+0(SB), X0
29 AESENC ·aeskeysched+16(SB), X0
30 AESENC ·aeskeysched+32(SB), X0
31 MOVL X0, ret+8(FP)
32 RET
33 noaes:
34 JMP ·memHash64Fallback(SB)
35
36 TEXT ·MemHash(SB),NOSPLIT,$0-16
37 CMPB ·UseAeshash(SB), $0
38 JEQ noaes
39 MOVL p+0(FP), AX // ptr to data
40 MOVL s+8(FP), BX // size
41 LEAL ret+12(FP), DX
42 JMP ·aeshashbody<>(SB)
43 noaes:
44 JMP ·memHashFallback(SB)
45
46 TEXT ·StrHash(SB),NOSPLIT,$0-12
47 CMPB ·UseAeshash(SB), $0
48 JEQ noaes
49 MOVL p+0(FP), AX // ptr to string object
50 MOVL 4(AX), BX // length of string
51 MOVL (AX), AX // string data
52 LEAL ret+8(FP), DX
53 JMP ·aeshashbody<>(SB)
54 noaes:
55 JMP ·strHashFallback(SB)
56
57 // AX: data
58 // BX: length
59 // DX: address to put return value
60 TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
61 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
62 PINSRW $4, BX, X0 // 16 bits of length
63 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
64 MOVO X0, X1 // save unscrambled seed
65 PXOR ·aeskeysched(SB), X0 // xor in per-process seed
66 AESENC X0, X0 // scramble seed
67
68 CMPL BX, $16
69 JB aes0to15
70 JE aes16
71 CMPL BX, $32
72 JBE aes17to32
73 CMPL BX, $64
74 JBE aes33to64
75 JMP aes65plus
76
77 aes0to15:
78 TESTL BX, BX
79 JE aes0
80
81 ADDL $16, AX
82 TESTW $0xff0, AX
83 JE endofpage
84
85 // 16 bytes loaded at this address won't cross
86 // a page boundary, so we can load it directly.
87 MOVOU -16(AX), X1
88 ADDL BX, BX
89 PAND masks<>(SB)(BX*8), X1
90
91 final1:
92 PXOR X0, X1 // xor data with seed
93 AESENC X1, X1 // scramble combo 3 times
94 AESENC X1, X1
95 AESENC X1, X1
96 MOVL X1, (DX)
97 RET
98
99 endofpage:
100 // address ends in 1111xxxx. Might be up against
101 // a page boundary, so load ending at last byte.
102 // Then shift bytes down using pshufb.
103 MOVOU -32(AX)(BX*1), X1
104 ADDL BX, BX
105 PSHUFB shifts<>(SB)(BX*8), X1
106 JMP final1
107
108 aes0:
109 // Return scrambled input seed
110 AESENC X0, X0
111 MOVL X0, (DX)
112 RET
113
114 aes16:
115 MOVOU (AX), X1
116 JMP final1
117
118 aes17to32:
119 // make second starting seed
120 PXOR ·aeskeysched+16(SB), X1
121 AESENC X1, X1
122
123 // load data to be hashed
124 MOVOU (AX), X2
125 MOVOU -16(AX)(BX*1), X3
126
127 // xor with seed
128 PXOR X0, X2
129 PXOR X1, X3
130
131 // scramble 3 times
132 AESENC X2, X2
133 AESENC X3, X3
134 AESENC X2, X2
135 AESENC X3, X3
136 AESENC X2, X2
137 AESENC X3, X3
138
139 // combine results
140 PXOR X3, X2
141 MOVL X2, (DX)
142 RET
143
144 aes33to64:
145 // make 3 more starting seeds
146 MOVO X1, X2
147 MOVO X1, X3
148 PXOR ·aeskeysched+16(SB), X1
149 PXOR ·aeskeysched+32(SB), X2
150 PXOR ·aeskeysched+48(SB), X3
151 AESENC X1, X1
152 AESENC X2, X2
153 AESENC X3, X3
154
155 MOVOU (AX), X4
156 MOVOU 16(AX), X5
157 MOVOU -32(AX)(BX*1), X6
158 MOVOU -16(AX)(BX*1), X7
159
160 PXOR X0, X4
161 PXOR X1, X5
162 PXOR X2, X6
163 PXOR X3, X7
164
165 AESENC X4, X4
166 AESENC X5, X5
167 AESENC X6, X6
168 AESENC X7, X7
169
170 AESENC X4, X4
171 AESENC X5, X5
172 AESENC X6, X6
173 AESENC X7, X7
174
175 AESENC X4, X4
176 AESENC X5, X5
177 AESENC X6, X6
178 AESENC X7, X7
179
180 PXOR X6, X4
181 PXOR X7, X5
182 PXOR X5, X4
183 MOVL X4, (DX)
184 RET
185
186 aes65plus:
187 // make 3 more starting seeds
188 MOVO X1, X2
189 MOVO X1, X3
190 PXOR ·aeskeysched+16(SB), X1
191 PXOR ·aeskeysched+32(SB), X2
192 PXOR ·aeskeysched+48(SB), X3
193 AESENC X1, X1
194 AESENC X2, X2
195 AESENC X3, X3
196
197 // start with last (possibly overlapping) block
198 MOVOU -64(AX)(BX*1), X4
199 MOVOU -48(AX)(BX*1), X5
200 MOVOU -32(AX)(BX*1), X6
201 MOVOU -16(AX)(BX*1), X7
202
203 // scramble state once
204 AESENC X0, X4
205 AESENC X1, X5
206 AESENC X2, X6
207 AESENC X3, X7
208
209 // compute number of remaining 64-byte blocks
210 DECL BX
211 SHRL $6, BX
212
213 aesloop:
214 // scramble state, xor in a block
215 MOVOU (AX), X0
216 MOVOU 16(AX), X1
217 MOVOU 32(AX), X2
218 MOVOU 48(AX), X3
219 AESENC X0, X4
220 AESENC X1, X5
221 AESENC X2, X6
222 AESENC X3, X7
223
224 // scramble state
225 AESENC X4, X4
226 AESENC X5, X5
227 AESENC X6, X6
228 AESENC X7, X7
229
230 ADDL $64, AX
231 DECL BX
232 JNE aesloop
233
234 // 3 more scrambles to finish
235 AESENC X4, X4
236 AESENC X5, X5
237 AESENC X6, X6
238 AESENC X7, X7
239
240 AESENC X4, X4
241 AESENC X5, X5
242 AESENC X6, X6
243 AESENC X7, X7
244
245 AESENC X4, X4
246 AESENC X5, X5
247 AESENC X6, X6
248 AESENC X7, X7
249
250 PXOR X6, X4
251 PXOR X7, X5
252 PXOR X5, X4
253 MOVL X4, (DX)
254 RET
255
256 // simple mask to get rid of data in the high part of the register.
257 DATA masks<>+0x00(SB)/4, $0x00000000
258 DATA masks<>+0x04(SB)/4, $0x00000000
259 DATA masks<>+0x08(SB)/4, $0x00000000
260 DATA masks<>+0x0c(SB)/4, $0x00000000
261
262 DATA masks<>+0x10(SB)/4, $0x000000ff
263 DATA masks<>+0x14(SB)/4, $0x00000000
264 DATA masks<>+0x18(SB)/4, $0x00000000
265 DATA masks<>+0x1c(SB)/4, $0x00000000
266
267 DATA masks<>+0x20(SB)/4, $0x0000ffff
268 DATA masks<>+0x24(SB)/4, $0x00000000
269 DATA masks<>+0x28(SB)/4, $0x00000000
270 DATA masks<>+0x2c(SB)/4, $0x00000000
271
272 DATA masks<>+0x30(SB)/4, $0x00ffffff
273 DATA masks<>+0x34(SB)/4, $0x00000000
274 DATA masks<>+0x38(SB)/4, $0x00000000
275 DATA masks<>+0x3c(SB)/4, $0x00000000
276
277 DATA masks<>+0x40(SB)/4, $0xffffffff
278 DATA masks<>+0x44(SB)/4, $0x00000000
279 DATA masks<>+0x48(SB)/4, $0x00000000
280 DATA masks<>+0x4c(SB)/4, $0x00000000
281
282 DATA masks<>+0x50(SB)/4, $0xffffffff
283 DATA masks<>+0x54(SB)/4, $0x000000ff
284 DATA masks<>+0x58(SB)/4, $0x00000000
285 DATA masks<>+0x5c(SB)/4, $0x00000000
286
287 DATA masks<>+0x60(SB)/4, $0xffffffff
288 DATA masks<>+0x64(SB)/4, $0x0000ffff
289 DATA masks<>+0x68(SB)/4, $0x00000000
290 DATA masks<>+0x6c(SB)/4, $0x00000000
291
292 DATA masks<>+0x70(SB)/4, $0xffffffff
293 DATA masks<>+0x74(SB)/4, $0x00ffffff
294 DATA masks<>+0x78(SB)/4, $0x00000000
295 DATA masks<>+0x7c(SB)/4, $0x00000000
296
297 DATA masks<>+0x80(SB)/4, $0xffffffff
298 DATA masks<>+0x84(SB)/4, $0xffffffff
299 DATA masks<>+0x88(SB)/4, $0x00000000
300 DATA masks<>+0x8c(SB)/4, $0x00000000
301
302 DATA masks<>+0x90(SB)/4, $0xffffffff
303 DATA masks<>+0x94(SB)/4, $0xffffffff
304 DATA masks<>+0x98(SB)/4, $0x000000ff
305 DATA masks<>+0x9c(SB)/4, $0x00000000
306
307 DATA masks<>+0xa0(SB)/4, $0xffffffff
308 DATA masks<>+0xa4(SB)/4, $0xffffffff
309 DATA masks<>+0xa8(SB)/4, $0x0000ffff
310 DATA masks<>+0xac(SB)/4, $0x00000000
311
312 DATA masks<>+0xb0(SB)/4, $0xffffffff
313 DATA masks<>+0xb4(SB)/4, $0xffffffff
314 DATA masks<>+0xb8(SB)/4, $0x00ffffff
315 DATA masks<>+0xbc(SB)/4, $0x00000000
316
317 DATA masks<>+0xc0(SB)/4, $0xffffffff
318 DATA masks<>+0xc4(SB)/4, $0xffffffff
319 DATA masks<>+0xc8(SB)/4, $0xffffffff
320 DATA masks<>+0xcc(SB)/4, $0x00000000
321
322 DATA masks<>+0xd0(SB)/4, $0xffffffff
323 DATA masks<>+0xd4(SB)/4, $0xffffffff
324 DATA masks<>+0xd8(SB)/4, $0xffffffff
325 DATA masks<>+0xdc(SB)/4, $0x000000ff
326
327 DATA masks<>+0xe0(SB)/4, $0xffffffff
328 DATA masks<>+0xe4(SB)/4, $0xffffffff
329 DATA masks<>+0xe8(SB)/4, $0xffffffff
330 DATA masks<>+0xec(SB)/4, $0x0000ffff
331
332 DATA masks<>+0xf0(SB)/4, $0xffffffff
333 DATA masks<>+0xf4(SB)/4, $0xffffffff
334 DATA masks<>+0xf8(SB)/4, $0xffffffff
335 DATA masks<>+0xfc(SB)/4, $0x00ffffff
336
337 GLOBL masks<>(SB),RODATA,$256
338
339 // these are arguments to pshufb. They move data down from
340 // the high bytes of the register to the low bytes of the register.
341 // index is how many bytes to move.
342 DATA shifts<>+0x00(SB)/4, $0x00000000
343 DATA shifts<>+0x04(SB)/4, $0x00000000
344 DATA shifts<>+0x08(SB)/4, $0x00000000
345 DATA shifts<>+0x0c(SB)/4, $0x00000000
346
347 DATA shifts<>+0x10(SB)/4, $0xffffff0f
348 DATA shifts<>+0x14(SB)/4, $0xffffffff
349 DATA shifts<>+0x18(SB)/4, $0xffffffff
350 DATA shifts<>+0x1c(SB)/4, $0xffffffff
351
352 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
353 DATA shifts<>+0x24(SB)/4, $0xffffffff
354 DATA shifts<>+0x28(SB)/4, $0xffffffff
355 DATA shifts<>+0x2c(SB)/4, $0xffffffff
356
357 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
358 DATA shifts<>+0x34(SB)/4, $0xffffffff
359 DATA shifts<>+0x38(SB)/4, $0xffffffff
360 DATA shifts<>+0x3c(SB)/4, $0xffffffff
361
362 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
363 DATA shifts<>+0x44(SB)/4, $0xffffffff
364 DATA shifts<>+0x48(SB)/4, $0xffffffff
365 DATA shifts<>+0x4c(SB)/4, $0xffffffff
366
367 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
368 DATA shifts<>+0x54(SB)/4, $0xffffff0f
369 DATA shifts<>+0x58(SB)/4, $0xffffffff
370 DATA shifts<>+0x5c(SB)/4, $0xffffffff
371
372 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
373 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
374 DATA shifts<>+0x68(SB)/4, $0xffffffff
375 DATA shifts<>+0x6c(SB)/4, $0xffffffff
376
377 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
378 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
379 DATA shifts<>+0x78(SB)/4, $0xffffffff
380 DATA shifts<>+0x7c(SB)/4, $0xffffffff
381
382 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
383 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
384 DATA shifts<>+0x88(SB)/4, $0xffffffff
385 DATA shifts<>+0x8c(SB)/4, $0xffffffff
386
387 DATA shifts<>+0x90(SB)/4, $0x0a090807
388 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
389 DATA shifts<>+0x98(SB)/4, $0xffffff0f
390 DATA shifts<>+0x9c(SB)/4, $0xffffffff
391
392 DATA shifts<>+0xa0(SB)/4, $0x09080706
393 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
394 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
395 DATA shifts<>+0xac(SB)/4, $0xffffffff
396
397 DATA shifts<>+0xb0(SB)/4, $0x08070605
398 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
399 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
400 DATA shifts<>+0xbc(SB)/4, $0xffffffff
401
402 DATA shifts<>+0xc0(SB)/4, $0x07060504
403 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
404 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
405 DATA shifts<>+0xcc(SB)/4, $0xffffffff
406
407 DATA shifts<>+0xd0(SB)/4, $0x06050403
408 DATA shifts<>+0xd4(SB)/4, $0x0a090807
409 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
410 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
411
412 DATA shifts<>+0xe0(SB)/4, $0x05040302
413 DATA shifts<>+0xe4(SB)/4, $0x09080706
414 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
415 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
416
417 DATA shifts<>+0xf0(SB)/4, $0x04030201
418 DATA shifts<>+0xf4(SB)/4, $0x08070605
419 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
420 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
421
422 GLOBL shifts<>(SB),RODATA,$256
423
424 TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
425 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
426 MOVL $masks<>(SB), AX
427 MOVL $shifts<>(SB), BX
428 ORL BX, AX
429 TESTL $15, AX
430 SETEQ ret+0(FP)
431 RET
432
View as plain text