1 // Code generated by command: go run sha512block_amd64_asm.go -out ../sha512block_amd64.s. DO NOT EDIT.
2
3 //go:build !purego
4
5 #include "textflag.h"
6
7 // func blockAVX2(dig *Digest, p []byte)
8 // Requires: AVX, AVX2, BMI2
9 TEXT ·blockAVX2(SB), NOSPLIT, $56-32
10 MOVQ dig+0(FP), SI
11 MOVQ p_base+8(FP), DI
12 MOVQ p_len+16(FP), DX
13 SHRQ $0x07, DX
14 SHLQ $0x07, DX
15 JZ done_hash
16 ADDQ DI, DX
17 MOVQ DX, 48(SP)
18 MOVQ (SI), AX
19 MOVQ 8(SI), BX
20 MOVQ 16(SI), CX
21 MOVQ 24(SI), R8
22 MOVQ 32(SI), DX
23 MOVQ 40(SI), R9
24 MOVQ 48(SI), R10
25 MOVQ 56(SI), R11
26 VMOVDQU PSHUFFLE_BYTE_FLIP_MASK<>+0(SB), Y9
27
28 loop0:
29 MOVQ $·_K+0(SB), BP
30 VMOVDQU (DI), Y4
31 VPSHUFB Y9, Y4, Y4
32 VMOVDQU 32(DI), Y5
33 VPSHUFB Y9, Y5, Y5
34 VMOVDQU 64(DI), Y6
35 VPSHUFB Y9, Y6, Y6
36 VMOVDQU 96(DI), Y7
37 VPSHUFB Y9, Y7, Y7
38 MOVQ DI, 40(SP)
39 MOVQ $0x00000004, 32(SP)
40
41 loop1:
42 VPADDQ (BP), Y4, Y0
43 VMOVDQU Y0, (SP)
44 VPERM2F128 $0x03, Y6, Y7, Y0
45 VPALIGNR $0x08, Y6, Y0, Y0
46 VPADDQ Y4, Y0, Y0
47 VPERM2F128 $0x03, Y4, Y5, Y1
48 VPALIGNR $0x08, Y4, Y1, Y1
49 VPSRLQ $0x01, Y1, Y2
50 VPSLLQ $0x3f, Y1, Y3
51 VPOR Y2, Y3, Y3
52 VPSRLQ $0x07, Y1, Y8
53 MOVQ AX, DI
54 RORXQ $0x29, DX, R13
55 RORXQ $0x12, DX, R14
56 ADDQ (SP), R11
57 ORQ CX, DI
58 MOVQ R9, R15
59 RORXQ $0x22, AX, R12
60 XORQ R14, R13
61 XORQ R10, R15
62 RORXQ $0x0e, DX, R14
63 ANDQ DX, R15
64 XORQ R14, R13
65 RORXQ $0x27, AX, R14
66 ADDQ R11, R8
67 ANDQ BX, DI
68 XORQ R12, R14
69 RORXQ $0x1c, AX, R12
70 XORQ R10, R15
71 XORQ R12, R14
72 MOVQ AX, R12
73 ANDQ CX, R12
74 ADDQ R13, R15
75 ORQ R12, DI
76 ADDQ R14, R11
77 ADDQ R15, R8
78 ADDQ R15, R11
79 ADDQ DI, R11
80 VPSRLQ $0x08, Y1, Y2
81 VPSLLQ $0x38, Y1, Y1
82 VPOR Y2, Y1, Y1
83 VPXOR Y8, Y3, Y3
84 VPXOR Y1, Y3, Y1
85 VPADDQ Y1, Y0, Y0
86 VPERM2F128 $0x00, Y0, Y0, Y4
87 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
88 VPERM2F128 $0x11, Y7, Y7, Y2
89 VPSRLQ $0x06, Y2, Y8
90 MOVQ R11, DI
91 RORXQ $0x29, R8, R13
92 RORXQ $0x12, R8, R14
93 ADDQ 8(SP), R10
94 ORQ BX, DI
95 MOVQ DX, R15
96 RORXQ $0x22, R11, R12
97 XORQ R14, R13
98 XORQ R9, R15
99 RORXQ $0x0e, R8, R14
100 XORQ R14, R13
101 RORXQ $0x27, R11, R14
102 ANDQ R8, R15
103 ADDQ R10, CX
104 ANDQ AX, DI
105 XORQ R12, R14
106 RORXQ $0x1c, R11, R12
107 XORQ R9, R15
108 XORQ R12, R14
109 MOVQ R11, R12
110 ANDQ BX, R12
111 ADDQ R13, R15
112 ORQ R12, DI
113 ADDQ R14, R10
114 ADDQ R15, CX
115 ADDQ R15, R10
116 ADDQ DI, R10
117 VPSRLQ $0x13, Y2, Y3
118 VPSLLQ $0x2d, Y2, Y1
119 VPOR Y1, Y3, Y3
120 VPXOR Y3, Y8, Y8
121 VPSRLQ $0x3d, Y2, Y3
122 VPSLLQ $0x03, Y2, Y1
123 VPOR Y1, Y3, Y3
124 VPXOR Y3, Y8, Y8
125 VPADDQ Y8, Y4, Y4
126 VPSRLQ $0x06, Y4, Y8
127 MOVQ R10, DI
128 RORXQ $0x29, CX, R13
129 ADDQ 16(SP), R9
130 RORXQ $0x12, CX, R14
131 ORQ AX, DI
132 MOVQ R8, R15
133 XORQ DX, R15
134 RORXQ $0x22, R10, R12
135 XORQ R14, R13
136 ANDQ CX, R15
137 RORXQ $0x0e, CX, R14
138 ADDQ R9, BX
139 ANDQ R11, DI
140 XORQ R14, R13
141 RORXQ $0x27, R10, R14
142 XORQ DX, R15
143 XORQ R12, R14
144 RORXQ $0x1c, R10, R12
145 XORQ R12, R14
146 MOVQ R10, R12
147 ANDQ AX, R12
148 ADDQ R13, R15
149 ORQ R12, DI
150 ADDQ R14, R9
151 ADDQ R15, BX
152 ADDQ R15, R9
153 ADDQ DI, R9
154 VPSRLQ $0x13, Y4, Y3
155 VPSLLQ $0x2d, Y4, Y1
156 VPOR Y1, Y3, Y3
157 VPXOR Y3, Y8, Y8
158 VPSRLQ $0x3d, Y4, Y3
159 VPSLLQ $0x03, Y4, Y1
160 VPOR Y1, Y3, Y3
161 VPXOR Y3, Y8, Y8
162 VPADDQ Y8, Y0, Y2
163 VPBLENDD $0xf0, Y2, Y4, Y4
164 MOVQ R9, DI
165 RORXQ $0x29, BX, R13
166 RORXQ $0x12, BX, R14
167 ADDQ 24(SP), DX
168 ORQ R11, DI
169 MOVQ CX, R15
170 RORXQ $0x22, R9, R12
171 XORQ R14, R13
172 XORQ R8, R15
173 RORXQ $0x0e, BX, R14
174 ANDQ BX, R15
175 ADDQ DX, AX
176 ANDQ R10, DI
177 XORQ R14, R13
178 XORQ R8, R15
179 RORXQ $0x27, R9, R14
180 ADDQ R13, R15
181 XORQ R12, R14
182 ADDQ R15, AX
183 RORXQ $0x1c, R9, R12
184 XORQ R12, R14
185 MOVQ R9, R12
186 ANDQ R11, R12
187 ORQ R12, DI
188 ADDQ R14, DX
189 ADDQ R15, DX
190 ADDQ DI, DX
191 VPADDQ 32(BP), Y5, Y0
192 VMOVDQU Y0, (SP)
193 VPERM2F128 $0x03, Y7, Y4, Y0
194 VPALIGNR $0x08, Y7, Y0, Y0
195 VPADDQ Y5, Y0, Y0
196 VPERM2F128 $0x03, Y5, Y6, Y1
197 VPALIGNR $0x08, Y5, Y1, Y1
198 VPSRLQ $0x01, Y1, Y2
199 VPSLLQ $0x3f, Y1, Y3
200 VPOR Y2, Y3, Y3
201 VPSRLQ $0x07, Y1, Y8
202 MOVQ DX, DI
203 RORXQ $0x29, AX, R13
204 RORXQ $0x12, AX, R14
205 ADDQ (SP), R8
206 ORQ R10, DI
207 MOVQ BX, R15
208 RORXQ $0x22, DX, R12
209 XORQ R14, R13
210 XORQ CX, R15
211 RORXQ $0x0e, AX, R14
212 ANDQ AX, R15
213 XORQ R14, R13
214 RORXQ $0x27, DX, R14
215 ADDQ R8, R11
216 ANDQ R9, DI
217 XORQ R12, R14
218 RORXQ $0x1c, DX, R12
219 XORQ CX, R15
220 XORQ R12, R14
221 MOVQ DX, R12
222 ANDQ R10, R12
223 ADDQ R13, R15
224 ORQ R12, DI
225 ADDQ R14, R8
226 ADDQ R15, R11
227 ADDQ R15, R8
228 ADDQ DI, R8
229 VPSRLQ $0x08, Y1, Y2
230 VPSLLQ $0x38, Y1, Y1
231 VPOR Y2, Y1, Y1
232 VPXOR Y8, Y3, Y3
233 VPXOR Y1, Y3, Y1
234 VPADDQ Y1, Y0, Y0
235 VPERM2F128 $0x00, Y0, Y0, Y5
236 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
237 VPERM2F128 $0x11, Y4, Y4, Y2
238 VPSRLQ $0x06, Y2, Y8
239 MOVQ R8, DI
240 RORXQ $0x29, R11, R13
241 RORXQ $0x12, R11, R14
242 ADDQ 8(SP), CX
243 ORQ R9, DI
244 MOVQ AX, R15
245 RORXQ $0x22, R8, R12
246 XORQ R14, R13
247 XORQ BX, R15
248 RORXQ $0x0e, R11, R14
249 XORQ R14, R13
250 RORXQ $0x27, R8, R14
251 ANDQ R11, R15
252 ADDQ CX, R10
253 ANDQ DX, DI
254 XORQ R12, R14
255 RORXQ $0x1c, R8, R12
256 XORQ BX, R15
257 XORQ R12, R14
258 MOVQ R8, R12
259 ANDQ R9, R12
260 ADDQ R13, R15
261 ORQ R12, DI
262 ADDQ R14, CX
263 ADDQ R15, R10
264 ADDQ R15, CX
265 ADDQ DI, CX
266 VPSRLQ $0x13, Y2, Y3
267 VPSLLQ $0x2d, Y2, Y1
268 VPOR Y1, Y3, Y3
269 VPXOR Y3, Y8, Y8
270 VPSRLQ $0x3d, Y2, Y3
271 VPSLLQ $0x03, Y2, Y1
272 VPOR Y1, Y3, Y3
273 VPXOR Y3, Y8, Y8
274 VPADDQ Y8, Y5, Y5
275 VPSRLQ $0x06, Y5, Y8
276 MOVQ CX, DI
277 RORXQ $0x29, R10, R13
278 ADDQ 16(SP), BX
279 RORXQ $0x12, R10, R14
280 ORQ DX, DI
281 MOVQ R11, R15
282 XORQ AX, R15
283 RORXQ $0x22, CX, R12
284 XORQ R14, R13
285 ANDQ R10, R15
286 RORXQ $0x0e, R10, R14
287 ADDQ BX, R9
288 ANDQ R8, DI
289 XORQ R14, R13
290 RORXQ $0x27, CX, R14
291 XORQ AX, R15
292 XORQ R12, R14
293 RORXQ $0x1c, CX, R12
294 XORQ R12, R14
295 MOVQ CX, R12
296 ANDQ DX, R12
297 ADDQ R13, R15
298 ORQ R12, DI
299 ADDQ R14, BX
300 ADDQ R15, R9
301 ADDQ R15, BX
302 ADDQ DI, BX
303 VPSRLQ $0x13, Y5, Y3
304 VPSLLQ $0x2d, Y5, Y1
305 VPOR Y1, Y3, Y3
306 VPXOR Y3, Y8, Y8
307 VPSRLQ $0x3d, Y5, Y3
308 VPSLLQ $0x03, Y5, Y1
309 VPOR Y1, Y3, Y3
310 VPXOR Y3, Y8, Y8
311 VPADDQ Y8, Y0, Y2
312 VPBLENDD $0xf0, Y2, Y5, Y5
313 MOVQ BX, DI
314 RORXQ $0x29, R9, R13
315 RORXQ $0x12, R9, R14
316 ADDQ 24(SP), AX
317 ORQ R8, DI
318 MOVQ R10, R15
319 RORXQ $0x22, BX, R12
320 XORQ R14, R13
321 XORQ R11, R15
322 RORXQ $0x0e, R9, R14
323 ANDQ R9, R15
324 ADDQ AX, DX
325 ANDQ CX, DI
326 XORQ R14, R13
327 XORQ R11, R15
328 RORXQ $0x27, BX, R14
329 ADDQ R13, R15
330 XORQ R12, R14
331 ADDQ R15, DX
332 RORXQ $0x1c, BX, R12
333 XORQ R12, R14
334 MOVQ BX, R12
335 ANDQ R8, R12
336 ORQ R12, DI
337 ADDQ R14, AX
338 ADDQ R15, AX
339 ADDQ DI, AX
340 VPADDQ 64(BP), Y6, Y0
341 VMOVDQU Y0, (SP)
342 VPERM2F128 $0x03, Y4, Y5, Y0
343 VPALIGNR $0x08, Y4, Y0, Y0
344 VPADDQ Y6, Y0, Y0
345 VPERM2F128 $0x03, Y6, Y7, Y1
346 VPALIGNR $0x08, Y6, Y1, Y1
347 VPSRLQ $0x01, Y1, Y2
348 VPSLLQ $0x3f, Y1, Y3
349 VPOR Y2, Y3, Y3
350 VPSRLQ $0x07, Y1, Y8
351 MOVQ AX, DI
352 RORXQ $0x29, DX, R13
353 RORXQ $0x12, DX, R14
354 ADDQ (SP), R11
355 ORQ CX, DI
356 MOVQ R9, R15
357 RORXQ $0x22, AX, R12
358 XORQ R14, R13
359 XORQ R10, R15
360 RORXQ $0x0e, DX, R14
361 ANDQ DX, R15
362 XORQ R14, R13
363 RORXQ $0x27, AX, R14
364 ADDQ R11, R8
365 ANDQ BX, DI
366 XORQ R12, R14
367 RORXQ $0x1c, AX, R12
368 XORQ R10, R15
369 XORQ R12, R14
370 MOVQ AX, R12
371 ANDQ CX, R12
372 ADDQ R13, R15
373 ORQ R12, DI
374 ADDQ R14, R11
375 ADDQ R15, R8
376 ADDQ R15, R11
377 ADDQ DI, R11
378 VPSRLQ $0x08, Y1, Y2
379 VPSLLQ $0x38, Y1, Y1
380 VPOR Y2, Y1, Y1
381 VPXOR Y8, Y3, Y3
382 VPXOR Y1, Y3, Y1
383 VPADDQ Y1, Y0, Y0
384 VPERM2F128 $0x00, Y0, Y0, Y6
385 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
386 VPERM2F128 $0x11, Y5, Y5, Y2
387 VPSRLQ $0x06, Y2, Y8
388 MOVQ R11, DI
389 RORXQ $0x29, R8, R13
390 RORXQ $0x12, R8, R14
391 ADDQ 8(SP), R10
392 ORQ BX, DI
393 MOVQ DX, R15
394 RORXQ $0x22, R11, R12
395 XORQ R14, R13
396 XORQ R9, R15
397 RORXQ $0x0e, R8, R14
398 XORQ R14, R13
399 RORXQ $0x27, R11, R14
400 ANDQ R8, R15
401 ADDQ R10, CX
402 ANDQ AX, DI
403 XORQ R12, R14
404 RORXQ $0x1c, R11, R12
405 XORQ R9, R15
406 XORQ R12, R14
407 MOVQ R11, R12
408 ANDQ BX, R12
409 ADDQ R13, R15
410 ORQ R12, DI
411 ADDQ R14, R10
412 ADDQ R15, CX
413 ADDQ R15, R10
414 ADDQ DI, R10
415 VPSRLQ $0x13, Y2, Y3
416 VPSLLQ $0x2d, Y2, Y1
417 VPOR Y1, Y3, Y3
418 VPXOR Y3, Y8, Y8
419 VPSRLQ $0x3d, Y2, Y3
420 VPSLLQ $0x03, Y2, Y1
421 VPOR Y1, Y3, Y3
422 VPXOR Y3, Y8, Y8
423 VPADDQ Y8, Y6, Y6
424 VPSRLQ $0x06, Y6, Y8
425 MOVQ R10, DI
426 RORXQ $0x29, CX, R13
427 ADDQ 16(SP), R9
428 RORXQ $0x12, CX, R14
429 ORQ AX, DI
430 MOVQ R8, R15
431 XORQ DX, R15
432 RORXQ $0x22, R10, R12
433 XORQ R14, R13
434 ANDQ CX, R15
435 RORXQ $0x0e, CX, R14
436 ADDQ R9, BX
437 ANDQ R11, DI
438 XORQ R14, R13
439 RORXQ $0x27, R10, R14
440 XORQ DX, R15
441 XORQ R12, R14
442 RORXQ $0x1c, R10, R12
443 XORQ R12, R14
444 MOVQ R10, R12
445 ANDQ AX, R12
446 ADDQ R13, R15
447 ORQ R12, DI
448 ADDQ R14, R9
449 ADDQ R15, BX
450 ADDQ R15, R9
451 ADDQ DI, R9
452 VPSRLQ $0x13, Y6, Y3
453 VPSLLQ $0x2d, Y6, Y1
454 VPOR Y1, Y3, Y3
455 VPXOR Y3, Y8, Y8
456 VPSRLQ $0x3d, Y6, Y3
457 VPSLLQ $0x03, Y6, Y1
458 VPOR Y1, Y3, Y3
459 VPXOR Y3, Y8, Y8
460 VPADDQ Y8, Y0, Y2
461 VPBLENDD $0xf0, Y2, Y6, Y6
462 MOVQ R9, DI
463 RORXQ $0x29, BX, R13
464 RORXQ $0x12, BX, R14
465 ADDQ 24(SP), DX
466 ORQ R11, DI
467 MOVQ CX, R15
468 RORXQ $0x22, R9, R12
469 XORQ R14, R13
470 XORQ R8, R15
471 RORXQ $0x0e, BX, R14
472 ANDQ BX, R15
473 ADDQ DX, AX
474 ANDQ R10, DI
475 XORQ R14, R13
476 XORQ R8, R15
477 RORXQ $0x27, R9, R14
478 ADDQ R13, R15
479 XORQ R12, R14
480 ADDQ R15, AX
481 RORXQ $0x1c, R9, R12
482 XORQ R12, R14
483 MOVQ R9, R12
484 ANDQ R11, R12
485 ORQ R12, DI
486 ADDQ R14, DX
487 ADDQ R15, DX
488 ADDQ DI, DX
489 VPADDQ 96(BP), Y7, Y0
490 VMOVDQU Y0, (SP)
491 ADDQ $0x80, BP
492 VPERM2F128 $0x03, Y5, Y6, Y0
493 VPALIGNR $0x08, Y5, Y0, Y0
494 VPADDQ Y7, Y0, Y0
495 VPERM2F128 $0x03, Y7, Y4, Y1
496 VPALIGNR $0x08, Y7, Y1, Y1
497 VPSRLQ $0x01, Y1, Y2
498 VPSLLQ $0x3f, Y1, Y3
499 VPOR Y2, Y3, Y3
500 VPSRLQ $0x07, Y1, Y8
501 MOVQ DX, DI
502 RORXQ $0x29, AX, R13
503 RORXQ $0x12, AX, R14
504 ADDQ (SP), R8
505 ORQ R10, DI
506 MOVQ BX, R15
507 RORXQ $0x22, DX, R12
508 XORQ R14, R13
509 XORQ CX, R15
510 RORXQ $0x0e, AX, R14
511 ANDQ AX, R15
512 XORQ R14, R13
513 RORXQ $0x27, DX, R14
514 ADDQ R8, R11
515 ANDQ R9, DI
516 XORQ R12, R14
517 RORXQ $0x1c, DX, R12
518 XORQ CX, R15
519 XORQ R12, R14
520 MOVQ DX, R12
521 ANDQ R10, R12
522 ADDQ R13, R15
523 ORQ R12, DI
524 ADDQ R14, R8
525 ADDQ R15, R11
526 ADDQ R15, R8
527 ADDQ DI, R8
528 VPSRLQ $0x08, Y1, Y2
529 VPSLLQ $0x38, Y1, Y1
530 VPOR Y2, Y1, Y1
531 VPXOR Y8, Y3, Y3
532 VPXOR Y1, Y3, Y1
533 VPADDQ Y1, Y0, Y0
534 VPERM2F128 $0x00, Y0, Y0, Y7
535 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
536 VPERM2F128 $0x11, Y6, Y6, Y2
537 VPSRLQ $0x06, Y2, Y8
538 MOVQ R8, DI
539 RORXQ $0x29, R11, R13
540 RORXQ $0x12, R11, R14
541 ADDQ 8(SP), CX
542 ORQ R9, DI
543 MOVQ AX, R15
544 RORXQ $0x22, R8, R12
545 XORQ R14, R13
546 XORQ BX, R15
547 RORXQ $0x0e, R11, R14
548 XORQ R14, R13
549 RORXQ $0x27, R8, R14
550 ANDQ R11, R15
551 ADDQ CX, R10
552 ANDQ DX, DI
553 XORQ R12, R14
554 RORXQ $0x1c, R8, R12
555 XORQ BX, R15
556 XORQ R12, R14
557 MOVQ R8, R12
558 ANDQ R9, R12
559 ADDQ R13, R15
560 ORQ R12, DI
561 ADDQ R14, CX
562 ADDQ R15, R10
563 ADDQ R15, CX
564 ADDQ DI, CX
565 VPSRLQ $0x13, Y2, Y3
566 VPSLLQ $0x2d, Y2, Y1
567 VPOR Y1, Y3, Y3
568 VPXOR Y3, Y8, Y8
569 VPSRLQ $0x3d, Y2, Y3
570 VPSLLQ $0x03, Y2, Y1
571 VPOR Y1, Y3, Y3
572 VPXOR Y3, Y8, Y8
573 VPADDQ Y8, Y7, Y7
574 VPSRLQ $0x06, Y7, Y8
575 MOVQ CX, DI
576 RORXQ $0x29, R10, R13
577 ADDQ 16(SP), BX
578 RORXQ $0x12, R10, R14
579 ORQ DX, DI
580 MOVQ R11, R15
581 XORQ AX, R15
582 RORXQ $0x22, CX, R12
583 XORQ R14, R13
584 ANDQ R10, R15
585 RORXQ $0x0e, R10, R14
586 ADDQ BX, R9
587 ANDQ R8, DI
588 XORQ R14, R13
589 RORXQ $0x27, CX, R14
590 XORQ AX, R15
591 XORQ R12, R14
592 RORXQ $0x1c, CX, R12
593 XORQ R12, R14
594 MOVQ CX, R12
595 ANDQ DX, R12
596 ADDQ R13, R15
597 ORQ R12, DI
598 ADDQ R14, BX
599 ADDQ R15, R9
600 ADDQ R15, BX
601 ADDQ DI, BX
602 VPSRLQ $0x13, Y7, Y3
603 VPSLLQ $0x2d, Y7, Y1
604 VPOR Y1, Y3, Y3
605 VPXOR Y3, Y8, Y8
606 VPSRLQ $0x3d, Y7, Y3
607 VPSLLQ $0x03, Y7, Y1
608 VPOR Y1, Y3, Y3
609 VPXOR Y3, Y8, Y8
610 VPADDQ Y8, Y0, Y2
611 VPBLENDD $0xf0, Y2, Y7, Y7
612 MOVQ BX, DI
613 RORXQ $0x29, R9, R13
614 RORXQ $0x12, R9, R14
615 ADDQ 24(SP), AX
616 ORQ R8, DI
617 MOVQ R10, R15
618 RORXQ $0x22, BX, R12
619 XORQ R14, R13
620 XORQ R11, R15
621 RORXQ $0x0e, R9, R14
622 ANDQ R9, R15
623 ADDQ AX, DX
624 ANDQ CX, DI
625 XORQ R14, R13
626 XORQ R11, R15
627 RORXQ $0x27, BX, R14
628 ADDQ R13, R15
629 XORQ R12, R14
630 ADDQ R15, DX
631 RORXQ $0x1c, BX, R12
632 XORQ R12, R14
633 MOVQ BX, R12
634 ANDQ R8, R12
635 ORQ R12, DI
636 ADDQ R14, AX
637 ADDQ R15, AX
638 ADDQ DI, AX
639 SUBQ $0x01, 32(SP)
640 JNE loop1
641 MOVQ $0x00000002, 32(SP)
642
643 loop2:
644 VPADDQ (BP), Y4, Y0
645 VMOVDQU Y0, (SP)
646 MOVQ R9, R15
647 RORXQ $0x29, DX, R13
648 RORXQ $0x12, DX, R14
649 XORQ R10, R15
650 XORQ R14, R13
651 RORXQ $0x0e, DX, R14
652 ANDQ DX, R15
653 XORQ R14, R13
654 RORXQ $0x22, AX, R12
655 XORQ R10, R15
656 RORXQ $0x27, AX, R14
657 MOVQ AX, DI
658 XORQ R12, R14
659 RORXQ $0x1c, AX, R12
660 ADDQ (SP), R11
661 ORQ CX, DI
662 XORQ R12, R14
663 MOVQ AX, R12
664 ANDQ BX, DI
665 ANDQ CX, R12
666 ADDQ R13, R15
667 ADDQ R11, R8
668 ORQ R12, DI
669 ADDQ R14, R11
670 ADDQ R15, R8
671 ADDQ R15, R11
672 MOVQ DX, R15
673 RORXQ $0x29, R8, R13
674 RORXQ $0x12, R8, R14
675 XORQ R9, R15
676 XORQ R14, R13
677 RORXQ $0x0e, R8, R14
678 ANDQ R8, R15
679 ADDQ DI, R11
680 XORQ R14, R13
681 RORXQ $0x22, R11, R12
682 XORQ R9, R15
683 RORXQ $0x27, R11, R14
684 MOVQ R11, DI
685 XORQ R12, R14
686 RORXQ $0x1c, R11, R12
687 ADDQ 8(SP), R10
688 ORQ BX, DI
689 XORQ R12, R14
690 MOVQ R11, R12
691 ANDQ AX, DI
692 ANDQ BX, R12
693 ADDQ R13, R15
694 ADDQ R10, CX
695 ORQ R12, DI
696 ADDQ R14, R10
697 ADDQ R15, CX
698 ADDQ R15, R10
699 MOVQ R8, R15
700 RORXQ $0x29, CX, R13
701 RORXQ $0x12, CX, R14
702 XORQ DX, R15
703 XORQ R14, R13
704 RORXQ $0x0e, CX, R14
705 ANDQ CX, R15
706 ADDQ DI, R10
707 XORQ R14, R13
708 RORXQ $0x22, R10, R12
709 XORQ DX, R15
710 RORXQ $0x27, R10, R14
711 MOVQ R10, DI
712 XORQ R12, R14
713 RORXQ $0x1c, R10, R12
714 ADDQ 16(SP), R9
715 ORQ AX, DI
716 XORQ R12, R14
717 MOVQ R10, R12
718 ANDQ R11, DI
719 ANDQ AX, R12
720 ADDQ R13, R15
721 ADDQ R9, BX
722 ORQ R12, DI
723 ADDQ R14, R9
724 ADDQ R15, BX
725 ADDQ R15, R9
726 MOVQ CX, R15
727 RORXQ $0x29, BX, R13
728 RORXQ $0x12, BX, R14
729 XORQ R8, R15
730 XORQ R14, R13
731 RORXQ $0x0e, BX, R14
732 ANDQ BX, R15
733 ADDQ DI, R9
734 XORQ R14, R13
735 RORXQ $0x22, R9, R12
736 XORQ R8, R15
737 RORXQ $0x27, R9, R14
738 MOVQ R9, DI
739 XORQ R12, R14
740 RORXQ $0x1c, R9, R12
741 ADDQ 24(SP), DX
742 ORQ R11, DI
743 XORQ R12, R14
744 MOVQ R9, R12
745 ANDQ R10, DI
746 ANDQ R11, R12
747 ADDQ R13, R15
748 ADDQ DX, AX
749 ORQ R12, DI
750 ADDQ R14, DX
751 ADDQ R15, AX
752 ADDQ R15, DX
753 ADDQ DI, DX
754 VPADDQ 32(BP), Y5, Y0
755 VMOVDQU Y0, (SP)
756 ADDQ $0x40, BP
757 MOVQ BX, R15
758 RORXQ $0x29, AX, R13
759 RORXQ $0x12, AX, R14
760 XORQ CX, R15
761 XORQ R14, R13
762 RORXQ $0x0e, AX, R14
763 ANDQ AX, R15
764 XORQ R14, R13
765 RORXQ $0x22, DX, R12
766 XORQ CX, R15
767 RORXQ $0x27, DX, R14
768 MOVQ DX, DI
769 XORQ R12, R14
770 RORXQ $0x1c, DX, R12
771 ADDQ (SP), R8
772 ORQ R10, DI
773 XORQ R12, R14
774 MOVQ DX, R12
775 ANDQ R9, DI
776 ANDQ R10, R12
777 ADDQ R13, R15
778 ADDQ R8, R11
779 ORQ R12, DI
780 ADDQ R14, R8
781 ADDQ R15, R11
782 ADDQ R15, R8
783 MOVQ AX, R15
784 RORXQ $0x29, R11, R13
785 RORXQ $0x12, R11, R14
786 XORQ BX, R15
787 XORQ R14, R13
788 RORXQ $0x0e, R11, R14
789 ANDQ R11, R15
790 ADDQ DI, R8
791 XORQ R14, R13
792 RORXQ $0x22, R8, R12
793 XORQ BX, R15
794 RORXQ $0x27, R8, R14
795 MOVQ R8, DI
796 XORQ R12, R14
797 RORXQ $0x1c, R8, R12
798 ADDQ 8(SP), CX
799 ORQ R9, DI
800 XORQ R12, R14
801 MOVQ R8, R12
802 ANDQ DX, DI
803 ANDQ R9, R12
804 ADDQ R13, R15
805 ADDQ CX, R10
806 ORQ R12, DI
807 ADDQ R14, CX
808 ADDQ R15, R10
809 ADDQ R15, CX
810 MOVQ R11, R15
811 RORXQ $0x29, R10, R13
812 RORXQ $0x12, R10, R14
813 XORQ AX, R15
814 XORQ R14, R13
815 RORXQ $0x0e, R10, R14
816 ANDQ R10, R15
817 ADDQ DI, CX
818 XORQ R14, R13
819 RORXQ $0x22, CX, R12
820 XORQ AX, R15
821 RORXQ $0x27, CX, R14
822 MOVQ CX, DI
823 XORQ R12, R14
824 RORXQ $0x1c, CX, R12
825 ADDQ 16(SP), BX
826 ORQ DX, DI
827 XORQ R12, R14
828 MOVQ CX, R12
829 ANDQ R8, DI
830 ANDQ DX, R12
831 ADDQ R13, R15
832 ADDQ BX, R9
833 ORQ R12, DI
834 ADDQ R14, BX
835 ADDQ R15, R9
836 ADDQ R15, BX
837 MOVQ R10, R15
838 RORXQ $0x29, R9, R13
839 RORXQ $0x12, R9, R14
840 XORQ R11, R15
841 XORQ R14, R13
842 RORXQ $0x0e, R9, R14
843 ANDQ R9, R15
844 ADDQ DI, BX
845 XORQ R14, R13
846 RORXQ $0x22, BX, R12
847 XORQ R11, R15
848 RORXQ $0x27, BX, R14
849 MOVQ BX, DI
850 XORQ R12, R14
851 RORXQ $0x1c, BX, R12
852 ADDQ 24(SP), AX
853 ORQ R8, DI
854 XORQ R12, R14
855 MOVQ BX, R12
856 ANDQ CX, DI
857 ANDQ R8, R12
858 ADDQ R13, R15
859 ADDQ AX, DX
860 ORQ R12, DI
861 ADDQ R14, AX
862 ADDQ R15, DX
863 ADDQ R15, AX
864 ADDQ DI, AX
865 VMOVDQU Y6, Y4
866 VMOVDQU Y7, Y5
867 SUBQ $0x01, 32(SP)
868 JNE loop2
869 ADDQ (SI), AX
870 MOVQ AX, (SI)
871 ADDQ 8(SI), BX
872 MOVQ BX, 8(SI)
873 ADDQ 16(SI), CX
874 MOVQ CX, 16(SI)
875 ADDQ 24(SI), R8
876 MOVQ R8, 24(SI)
877 ADDQ 32(SI), DX
878 MOVQ DX, 32(SI)
879 ADDQ 40(SI), R9
880 MOVQ R9, 40(SI)
881 ADDQ 48(SI), R10
882 MOVQ R10, 48(SI)
883 ADDQ 56(SI), R11
884 MOVQ R11, 56(SI)
885 MOVQ 40(SP), DI
886 ADDQ $0x80, DI
887 CMPQ DI, 48(SP)
888 JNE loop0
889
890 done_hash:
891 VZEROUPPER
892 RET
893
894 DATA PSHUFFLE_BYTE_FLIP_MASK<>+0(SB)/8, $0x0001020304050607
895 DATA PSHUFFLE_BYTE_FLIP_MASK<>+8(SB)/8, $0x08090a0b0c0d0e0f
896 DATA PSHUFFLE_BYTE_FLIP_MASK<>+16(SB)/8, $0x1011121314151617
897 DATA PSHUFFLE_BYTE_FLIP_MASK<>+24(SB)/8, $0x18191a1b1c1d1e1f
898 GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), RODATA|NOPTR, $32
899
900 DATA MASK_YMM_LO<>+0(SB)/8, $0x0000000000000000
901 DATA MASK_YMM_LO<>+8(SB)/8, $0x0000000000000000
902 DATA MASK_YMM_LO<>+16(SB)/8, $0xffffffffffffffff
903 DATA MASK_YMM_LO<>+24(SB)/8, $0xffffffffffffffff
904 GLOBL MASK_YMM_LO<>(SB), RODATA|NOPTR, $32
905
View as plain text