1
2
3
4
5
6
7 package simd_test
8
9 import (
10 "fmt"
11 "os"
12 "reflect"
13 "simd/archsimd"
14 "slices"
15 "testing"
16 )
17
18 func TestMain(m *testing.M) {
19 if !archsimd.X86.AVX() {
20 fmt.Fprintln(os.Stderr, "Skipping tests: AVX is not available")
21 os.Exit(0)
22 }
23 os.Exit(m.Run())
24 }
25
26 var sink any
27
28 func TestType(t *testing.T) {
29
30
31
32
33
34
35
36 type alias = archsimd.Int32x4
37 type maskT archsimd.Mask32x4
38 type myStruct struct {
39 x alias
40 y *archsimd.Int32x4
41 z maskT
42 }
43 vals := [4]int32{1, 2, 3, 4}
44 v := myStruct{x: archsimd.LoadInt32x4(&vals)}
45
46 want := []int32{2, 4, 0, 0}
47 y := archsimd.LoadInt32x4(&vals)
48 v.y = &y
49 sink = y
50
51 if !archsimd.X86.AVX512GFNI() {
52 t.Skip("Test requires X86.AVX512, not available on this hardware")
53 return
54 }
55 v.z = maskT(archsimd.Mask32x4FromBits(0b0011))
56 *v.y = v.y.Add(v.x).Masked(archsimd.Mask32x4(v.z))
57
58 got := [4]int32{}
59 v.y.Store(&got)
60 checkSlices(t, got[:], want)
61 }
62
63 func TestUncomparable(t *testing.T) {
64
65 var x, y any = archsimd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), archsimd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
66 shouldPanic := func(fn func()) {
67 defer func() {
68 if recover() == nil {
69 panic("did not panic")
70 }
71 }()
72 fn()
73 }
74 shouldPanic(func() { _ = x == y })
75 }
76
77 func TestFuncValue(t *testing.T) {
78
79 xv := [4]int32{1, 2, 3, 4}
80 yv := [4]int32{5, 6, 7, 8}
81 want := []int32{6, 8, 10, 12}
82 x := archsimd.LoadInt32x4(&xv)
83 y := archsimd.LoadInt32x4(&yv)
84 fn := archsimd.Int32x4.Add
85 sink = fn
86 x = fn(x, y)
87 got := [4]int32{}
88 x.Store(&got)
89 checkSlices(t, got[:], want)
90 }
91
92 func TestReflectMethod(t *testing.T) {
93
94
95 xv := [4]int32{1, 2, 3, 4}
96 yv := [4]int32{5, 6, 7, 8}
97 want := []int32{6, 8, 10, 12}
98 x := archsimd.LoadInt32x4(&xv)
99 y := archsimd.LoadInt32x4(&yv)
100 m, ok := reflect.TypeOf(x).MethodByName("Add")
101 if !ok {
102 t.Fatal("Add method not found")
103 }
104 fn := m.Func.Interface().(func(x, y archsimd.Int32x4) archsimd.Int32x4)
105 x = fn(x, y)
106 got := [4]int32{}
107 x.Store(&got)
108 checkSlices(t, got[:], want)
109 }
110
111 func TestVectorConversion(t *testing.T) {
112 if !archsimd.X86.AVX512GFNI() {
113 t.Skip("Test requires X86.AVX512, not available on this hardware")
114 return
115 }
116 xv := [4]int32{1, 2, 3, 4}
117 x := archsimd.LoadInt32x4(&xv)
118 xPromoted := x.AsInt64x2()
119 xPromotedDemoted := xPromoted.AsInt32x4()
120 got := [4]int32{}
121 xPromotedDemoted.Store(&got)
122 for i := range 4 {
123 if xv[i] != got[i] {
124 t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i])
125 }
126 }
127 }
128
129 func TestMaskConversion(t *testing.T) {
130 if !archsimd.X86.AVX512GFNI() {
131 t.Skip("Test requires X86.AVX512, not available on this hardware")
132 return
133 }
134 x := archsimd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
135 mask := archsimd.Int32x4{}.Sub(x).ToMask()
136 y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
137 want := [4]int32{6, 0, 10, 0}
138 got := make([]int32, 4)
139 y.StoreSlice(got)
140 checkSlices(t, got[:], want[:])
141 }
142
143 func TestPermute(t *testing.T) {
144 if !archsimd.X86.AVX512() {
145 t.Skip("Test requires X86.AVX512, not available on this hardware")
146 return
147 }
148 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
149 indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
150 want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
151 got := make([]int64, 8)
152 archsimd.LoadInt64x8Slice(x).Permute(archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
153 checkSlices(t, got, want)
154 }
155
156 func TestPermuteOrZero(t *testing.T) {
157 x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
158 indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
159 want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
160 got := make([]uint8, len(x))
161 archsimd.LoadUint8x16Slice(x).PermuteOrZero(archsimd.LoadInt8x16Slice(indices)).StoreSlice(got)
162 checkSlices(t, got, want)
163 }
164
165 func TestConcatPermute(t *testing.T) {
166 if !archsimd.X86.AVX512() {
167 t.Skip("Test requires X86.AVX512, not available on this hardware")
168 return
169 }
170 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
171 y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
172 indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
173 want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
174 got := make([]int64, 8)
175 archsimd.LoadInt64x8Slice(x).ConcatPermute(archsimd.LoadInt64x8Slice(y), archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
176 checkSlices(t, got, want)
177 }
178
179 func TestCompress(t *testing.T) {
180 if !archsimd.X86.AVX512() {
181 t.Skip("Test requires X86.AVX512, not available on this hardware")
182 return
183 }
184 v1234 := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
185 v2400 := v1234.Compress(archsimd.Mask32x4FromBits(0b1010))
186 got := make([]int32, 4)
187 v2400.StoreSlice(got)
188 want := []int32{2, 4, 0, 0}
189 if !slices.Equal(got, want) {
190 t.Errorf("want and got differ, want=%v, got=%v", want, got)
191 }
192 }
193
194 func TestExpand(t *testing.T) {
195 if !archsimd.X86.AVX512() {
196 t.Skip("Test requires X86.AVX512, not available on this hardware")
197 return
198 }
199 v3400 := archsimd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
200 v2400 := v3400.Expand(archsimd.Mask32x4FromBits(0b1010))
201 got := make([]int32, 4)
202 v2400.StoreSlice(got)
203 want := []int32{0, 3, 0, 4}
204 if !slices.Equal(got, want) {
205 t.Errorf("want and got differ, want=%v, got=%v", want, got)
206 }
207 }
208
209 var testShiftAllVal uint64 = 3
210
211 func TestShiftAll(t *testing.T) {
212 got := make([]int32, 4)
213 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
214 for _, v := range got {
215 if v != 0b1100 {
216 t.Errorf("expect 0b1100, got %b", v)
217 }
218 }
219 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
220 for _, v := range got {
221 if v != 0b11000 {
222 t.Errorf("expect 0b11000, got %b", v)
223 }
224 }
225 }
226
227 func TestSlicesInt8(t *testing.T) {
228 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
229 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
230 v := archsimd.LoadInt8x32Slice(a)
231 b := make([]int8, 32, 32)
232 v.StoreSlice(b)
233 checkSlices(t, a, b)
234 }
235
236 func TestSlicesInt8SetElem(t *testing.T) {
237 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
238 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
239 v := archsimd.LoadInt8x16Slice(a)
240
241 v = v.SetElem(3, 13)
242 a[3] = 13
243
244 b := make([]int8, 16, 16)
245 v.StoreSlice(b)
246 checkSlices(t, a, b)
247 }
248
249 func TestSlicesInt8GetElem(t *testing.T) {
250 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
251 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
252 v := archsimd.LoadInt8x16Slice(a)
253 e := v.GetElem(2)
254 if e != a[2] {
255 t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
256 }
257
258 }
259
260 func TestSlicesInt8TooShortLoad(t *testing.T) {
261 defer func() {
262 if r := recover(); r != nil {
263 t.Logf("Saw EXPECTED panic %v", r)
264 } else {
265 t.Errorf("Did not see expected panic")
266 }
267 }()
268 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
269 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}
270 v := archsimd.LoadInt8x32Slice(a)
271 b := make([]int8, 32, 32)
272 v.StoreSlice(b)
273 checkSlices(t, a, b)
274 }
275
276 func TestSlicesInt8TooShortStore(t *testing.T) {
277 defer func() {
278 if r := recover(); r != nil {
279 t.Logf("Saw EXPECTED panic %v", r)
280 } else {
281 t.Errorf("Did not see expected panic")
282 }
283 }()
284 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
285 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
286 v := archsimd.LoadInt8x32Slice(a)
287 b := make([]int8, 31)
288 v.StoreSlice(b)
289 checkSlices(t, a, b)
290 }
291
292 func TestSlicesFloat64(t *testing.T) {
293 a := []float64{1, 2, 3, 4, 5, 6, 7, 8}
294 v := archsimd.LoadFloat64x4Slice(a)
295 b := make([]float64, 4, 4)
296 v.StoreSlice(b)
297 for i := range b {
298 if a[i] != b[i] {
299 t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
300 }
301 }
302 }
303
304
305 func TestMergeLocals(t *testing.T) {
306 testMergeLocalswrapper(t, archsimd.Int64x4.Add)
307 }
308
309
310 func forceSpill() {}
311
312 func testMergeLocalswrapper(t *testing.T, op func(archsimd.Int64x4, archsimd.Int64x4) archsimd.Int64x4) {
313 t.Helper()
314 s0 := []int64{0, 1, 2, 3}
315 s1 := []int64{-1, 0, -1, 0}
316 want := []int64{-1, 1, 1, 3}
317 v := archsimd.LoadInt64x4Slice(s0)
318 m := archsimd.LoadInt64x4Slice(s1)
319 forceSpill()
320 got := make([]int64, 4)
321 gotv := op(v, m)
322 gotv.StoreSlice(got)
323 for i := range len(want) {
324 if !(got[i] == want[i]) {
325 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
326 }
327 }
328 }
329
330 func TestBitMaskFromBits(t *testing.T) {
331 if !archsimd.X86.AVX512() {
332 t.Skip("Test requires X86.AVX512, not available on this hardware")
333 return
334 }
335 results := [2]int64{}
336 want := [2]int64{0, 6}
337 m := archsimd.Mask64x2FromBits(0b10)
338 archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
339 for i := range 2 {
340 if results[i] != want[i] {
341 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
342 }
343 }
344 }
345
346 var maskForTestBitMaskFromBitsLoad = uint8(0b10)
347
348 func TestBitMaskFromBitsLoad(t *testing.T) {
349 if !archsimd.X86.AVX512() {
350 t.Skip("Test requires X86.AVX512, not available on this hardware")
351 return
352 }
353 results := [2]int64{}
354 want := [2]int64{0, 6}
355 m := archsimd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
356 archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
357 for i := range 2 {
358 if results[i] != want[i] {
359 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
360 }
361 }
362 }
363
364 func TestBitMaskToBits(t *testing.T) {
365 if !archsimd.X86.AVX512() {
366 t.Skip("Test requires X86.AVX512, not available on this hardware")
367 return
368 }
369 if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
370 t.Errorf("Want 0b101, got %b", v)
371 }
372 }
373
374 var maskForTestBitMaskFromBitsStore uint8
375
376 func TestBitMaskToBitsStore(t *testing.T) {
377 if !archsimd.X86.AVX512() {
378 t.Skip("Test requires X86.AVX512, not available on this hardware")
379 return
380 }
381 maskForTestBitMaskFromBitsStore = archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
382 if maskForTestBitMaskFromBitsStore != 0b101 {
383 t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore)
384 }
385 }
386
387 func TestMergeFloat(t *testing.T) {
388 k := make([]int64, 4, 4)
389 s := make([]float64, 4, 4)
390
391 a := archsimd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
392 b := archsimd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
393 g := a.Greater(b)
394 g.ToInt64x4().StoreSlice(k)
395 c := a.Merge(b, g)
396
397 c.StoreSlice(s)
398
399 checkSlices[int64](t, k, []int64{0, 0, 0, -1})
400 checkSlices[float64](t, s, []float64{4, 2, 3, 4})
401 }
402
403 func TestMergeFloat512(t *testing.T) {
404 if !archsimd.X86.AVX512() {
405 t.Skip("Test requires X86.AVX512, not available on this hardware")
406 return
407 }
408
409 k := make([]int64, 8, 8)
410 s := make([]float64, 8, 8)
411
412 a := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
413 b := archsimd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
414 g := a.Greater(b)
415 g.ToInt64x8().StoreSlice(k)
416 c := a.Merge(b, g)
417 d := a.Masked(g)
418
419 checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1})
420
421 c.StoreSlice(s)
422 checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8})
423
424 d.StoreSlice(s)
425 checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8})
426 }
427
428 var ro uint8 = 2
429
430 func TestRotateAllVariable(t *testing.T) {
431 if !archsimd.X86.AVX512() {
432 t.Skip("Test requires X86.AVX512, not available on this hardware")
433 return
434 }
435 got := make([]int32, 4)
436 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
437 for _, v := range got {
438 if v != 0b1100 {
439 t.Errorf("Want 0b1100, got %b", v)
440 }
441 }
442 }
443
444 func TestBroadcastUint32x4(t *testing.T) {
445 s := make([]uint32, 4, 4)
446 archsimd.BroadcastUint32x4(123456789).StoreSlice(s)
447 checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
448 }
449
450 func TestBroadcastFloat32x8(t *testing.T) {
451 s := make([]float32, 8, 8)
452 archsimd.BroadcastFloat32x8(123456789).StoreSlice(s)
453 checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
454 }
455
456 func TestBroadcastFloat64x2(t *testing.T) {
457 s := make([]float64, 2, 2)
458 archsimd.BroadcastFloat64x2(123456789).StoreSlice(s)
459 checkSlices(t, s, []float64{123456789, 123456789})
460 }
461
462 func TestBroadcastUint64x2(t *testing.T) {
463 s := make([]uint64, 2, 2)
464 archsimd.BroadcastUint64x2(123456789).StoreSlice(s)
465 checkSlices(t, s, []uint64{123456789, 123456789})
466 }
467
468 func TestBroadcastUint16x8(t *testing.T) {
469 s := make([]uint16, 8, 8)
470 archsimd.BroadcastUint16x8(12345).StoreSlice(s)
471 checkSlices(t, s, []uint16{12345, 12345, 12345, 12345})
472 }
473
474 func TestBroadcastInt8x32(t *testing.T) {
475 s := make([]int8, 32, 32)
476 archsimd.BroadcastInt8x32(-123).StoreSlice(s)
477 checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
478 -123, -123, -123, -123, -123, -123, -123, -123,
479 -123, -123, -123, -123, -123, -123, -123, -123,
480 -123, -123, -123, -123, -123, -123, -123, -123,
481 })
482 }
483
484 func TestMaskOpt512(t *testing.T) {
485 if !archsimd.X86.AVX512() {
486 t.Skip("Test requires X86.AVX512, not available on this hardware")
487 return
488 }
489
490 k := make([]int64, 8, 8)
491 s := make([]float64, 8, 8)
492
493 a := archsimd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
494 b := archsimd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
495 c := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
496 d := archsimd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
497 g := a.Greater(b)
498 e := c.Add(d).Masked(g)
499 e.StoreSlice(s)
500 g.ToInt64x8().StoreSlice(k)
501 checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0})
502 checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0})
503 }
504
505
506
507
508
509 func flattenedTranspose(x, y archsimd.Int32x4) (a, b archsimd.Int32x4) {
510 return x.InterleaveLo(y), x.InterleaveHi(y)
511 }
512
513 func TestFlattenedTranspose(t *testing.T) {
514 r := make([]int32, 4, 4)
515 s := make([]int32, 4, 4)
516
517 x := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
518 y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
519 a, b := flattenedTranspose(x, y)
520
521 a.StoreSlice(r)
522 b.StoreSlice(s)
523
524 checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2})
525 checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
526
527 }
528
529 func TestClearAVXUpperBits(t *testing.T) {
530
531
532 if !archsimd.X86.AVX2() {
533 t.Skip("Test requires X86.AVX2, not available on this hardware")
534 return
535 }
536
537 r := make([]int64, 4)
538 s := make([]int64, 4)
539
540 x := archsimd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
541 y := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
542
543 x.Add(y).StoreSlice(r)
544 archsimd.ClearAVXUpperBits()
545 x.Sub(y).StoreSlice(s)
546
547 checkSlices[int64](t, r, []int64{11, 22, 33, 44})
548 checkSlices[int64](t, s, []int64{9, 18, 27, 36})
549 }
550
551 func TestLeadingZeros(t *testing.T) {
552 if !archsimd.X86.AVX512() {
553 t.Skip("Test requires X86.AVX512, not available on this hardware")
554 return
555 }
556
557 src := []uint64{0b1111, 0}
558 want := []uint64{60, 64}
559 got := make([]uint64, 2)
560 archsimd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
561 for i := range 2 {
562 if want[i] != got[i] {
563 t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
564 }
565 }
566 }
567
568 func TestIsZero(t *testing.T) {
569 v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
570 v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0})
571 if v1.IsZero() {
572 t.Errorf("Result incorrect, want false, got true")
573 }
574 if !v2.IsZero() {
575 t.Errorf("Result incorrect, want true, got false")
576 }
577 if !v1.And(v2).IsZero() {
578 t.Errorf("Result incorrect, want true, got false")
579 }
580 if v1.AndNot(v2).IsZero() {
581 t.Errorf("Result incorrect, want false, got true")
582 }
583 if !v2.And(v1).IsZero() {
584 t.Errorf("Result incorrect, want true, got false")
585 }
586 if !v2.AndNot(v1).IsZero() {
587 t.Errorf("Result incorrect, want true, got false")
588 }
589 }
590
591 func TestSelect4FromPairConst(t *testing.T) {
592 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
593 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
594
595 llll := x.SelectFromPair(0, 1, 2, 3, y)
596 hhhh := x.SelectFromPair(4, 5, 6, 7, y)
597 llhh := x.SelectFromPair(0, 1, 6, 7, y)
598 hhll := x.SelectFromPair(6, 7, 0, 1, y)
599
600 lllh := x.SelectFromPair(0, 1, 2, 7, y)
601 llhl := x.SelectFromPair(0, 1, 7, 2, y)
602 lhll := x.SelectFromPair(0, 7, 1, 2, y)
603 hlll := x.SelectFromPair(7, 0, 1, 2, y)
604
605 hhhl := x.SelectFromPair(4, 5, 6, 0, y)
606 hhlh := x.SelectFromPair(4, 5, 0, 6, y)
607 hlhh := x.SelectFromPair(4, 0, 5, 6, y)
608 lhhh := x.SelectFromPair(0, 4, 5, 6, y)
609
610 lhlh := x.SelectFromPair(0, 4, 1, 5, y)
611 hlhl := x.SelectFromPair(4, 0, 5, 1, y)
612 lhhl := x.SelectFromPair(0, 4, 5, 1, y)
613 hllh := x.SelectFromPair(4, 0, 1, 5, y)
614
615 r := make([]int32, 4, 4)
616
617 foo := func(v archsimd.Int32x4, a, b, c, d int32) {
618 v.StoreSlice(r)
619 checkSlices[int32](t, r, []int32{a, b, c, d})
620 }
621
622 foo(llll, 0, 1, 2, 3)
623 foo(hhhh, 4, 5, 6, 7)
624 foo(llhh, 0, 1, 6, 7)
625 foo(hhll, 6, 7, 0, 1)
626
627 foo(lllh, 0, 1, 2, 7)
628 foo(llhl, 0, 1, 7, 2)
629 foo(lhll, 0, 7, 1, 2)
630 foo(hlll, 7, 0, 1, 2)
631
632 foo(hhhl, 4, 5, 6, 0)
633 foo(hhlh, 4, 5, 0, 6)
634 foo(hlhh, 4, 0, 5, 6)
635 foo(lhhh, 0, 4, 5, 6)
636
637 foo(lhlh, 0, 4, 1, 5)
638 foo(hlhl, 4, 0, 5, 1)
639 foo(lhhl, 0, 4, 5, 1)
640 foo(hllh, 4, 0, 1, 5)
641 }
642
643
644 func selectFromPairInt32x4(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
645 return x.SelectFromPair(a, b, c, d, y)
646 }
647
648 func TestSelect4FromPairVar(t *testing.T) {
649 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
650 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
651
652 llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y)
653 hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y)
654 llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y)
655 hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y)
656
657 lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y)
658 llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y)
659 lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y)
660 hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y)
661
662 hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y)
663 hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y)
664 hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y)
665 lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y)
666
667 lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y)
668 hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y)
669 lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y)
670 hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y)
671
672 r := make([]int32, 4, 4)
673
674 foo := func(v archsimd.Int32x4, a, b, c, d int32) {
675 v.StoreSlice(r)
676 checkSlices[int32](t, r, []int32{a, b, c, d})
677 }
678
679 foo(llll, 0, 1, 2, 3)
680 foo(hhhh, 4, 5, 6, 7)
681 foo(llhh, 0, 1, 6, 7)
682 foo(hhll, 6, 7, 0, 1)
683
684 foo(lllh, 0, 1, 2, 7)
685 foo(llhl, 0, 1, 7, 2)
686 foo(lhll, 0, 7, 1, 2)
687 foo(hlll, 7, 0, 1, 2)
688
689 foo(hhhl, 4, 5, 6, 0)
690 foo(hhlh, 4, 5, 0, 6)
691 foo(hlhh, 4, 0, 5, 6)
692 foo(lhhh, 0, 4, 5, 6)
693
694 foo(lhlh, 0, 4, 1, 5)
695 foo(hlhl, 4, 0, 5, 1)
696 foo(lhhl, 0, 4, 5, 1)
697 foo(hllh, 4, 0, 1, 5)
698 }
699
700 func TestSelect4FromPairConstGrouped(t *testing.T) {
701 x := archsimd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
702 y := archsimd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
703
704 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
705 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
706 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
707 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
708
709 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
710 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
711 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
712 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
713
714 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
715 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
716 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
717 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
718
719 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
720 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
721 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
722 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
723
724 r := make([]float32, 8, 8)
725
726 foo := func(v archsimd.Float32x8, a, b, c, d float32) {
727 v.StoreSlice(r)
728 checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d})
729 }
730
731 foo(llll, 0, 1, 2, 3)
732 foo(hhhh, 4, 5, 6, 7)
733 foo(llhh, 0, 1, 6, 7)
734 foo(hhll, 6, 7, 0, 1)
735
736 foo(lllh, 0, 1, 2, 7)
737 foo(llhl, 0, 1, 7, 2)
738 foo(lhll, 0, 7, 1, 2)
739 foo(hlll, 7, 0, 1, 2)
740
741 foo(hhhl, 4, 5, 6, 0)
742 foo(hhlh, 4, 5, 0, 6)
743 foo(hlhh, 4, 0, 5, 6)
744 foo(lhhh, 0, 4, 5, 6)
745
746 foo(lhlh, 0, 4, 1, 5)
747 foo(hlhl, 4, 0, 5, 1)
748 foo(lhhl, 0, 4, 5, 1)
749 foo(hllh, 4, 0, 1, 5)
750 }
751
752 func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
753 if !archsimd.X86.AVX512() {
754 t.Skip("Test requires X86.AVX512, not available on this hardware")
755 return
756 }
757 x := archsimd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
758 y := archsimd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
759
760 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
761 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
762 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
763 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
764
765 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
766 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
767 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
768 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
769
770 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
771 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
772 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
773 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
774
775 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
776 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
777 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
778 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
779
780 r := make([]uint32, 16, 16)
781
782 foo := func(v archsimd.Uint32x16, a, b, c, d uint32) {
783 v.StoreSlice(r)
784 checkSlices[uint32](t, r, []uint32{a, b, c, d,
785 10 + a, 10 + b, 10 + c, 10 + d,
786 20 + a, 20 + b, 20 + c, 20 + d,
787 30 + a, 30 + b, 30 + c, 30 + d,
788 })
789 }
790
791 foo(llll, 0, 1, 2, 3)
792 foo(hhhh, 4, 5, 6, 7)
793 foo(llhh, 0, 1, 6, 7)
794 foo(hhll, 6, 7, 0, 1)
795
796 foo(lllh, 0, 1, 2, 7)
797 foo(llhl, 0, 1, 7, 2)
798 foo(lhll, 0, 7, 1, 2)
799 foo(hlll, 7, 0, 1, 2)
800
801 foo(hhhl, 4, 5, 6, 0)
802 foo(hhlh, 4, 5, 0, 6)
803 foo(hlhh, 4, 0, 5, 6)
804 foo(lhhh, 0, 4, 5, 6)
805
806 foo(lhlh, 0, 4, 1, 5)
807 foo(hlhl, 4, 0, 5, 1)
808 foo(lhhl, 0, 4, 5, 1)
809 foo(hllh, 4, 0, 1, 5)
810 }
811
812 func TestSelect128FromPair(t *testing.T) {
813 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
814 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
815
816 aa := x.Select128FromPair(0, 0, y)
817 ab := x.Select128FromPair(0, 1, y)
818 bc := x.Select128FromPair(1, 2, y)
819 cd := x.Select128FromPair(2, 3, y)
820 da := x.Select128FromPair(3, 0, y)
821 dc := x.Select128FromPair(3, 2, y)
822
823 r := make([]uint64, 4, 4)
824
825 foo := func(v archsimd.Uint64x4, a, b uint64) {
826 a, b = 2*a, 2*b
827 v.StoreSlice(r)
828 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
829 }
830
831 foo(aa, 0, 0)
832 foo(ab, 0, 1)
833 foo(bc, 1, 2)
834 foo(cd, 2, 3)
835 foo(da, 3, 0)
836 foo(dc, 3, 2)
837 }
838
839 func TestSelect128FromPairError(t *testing.T) {
840 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
841 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
842
843 defer func() {
844 if r := recover(); r != nil {
845 t.Logf("Saw expected panic %v", r)
846 }
847 }()
848 _ = x.Select128FromPair(0, 4, y)
849
850 t.Errorf("Should have panicked")
851 }
852
853
854 func select128FromPair(x archsimd.Uint64x4, lo, hi uint8, y archsimd.Uint64x4) archsimd.Uint64x4 {
855 return x.Select128FromPair(lo, hi, y)
856 }
857
858 func TestSelect128FromPairVar(t *testing.T) {
859 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
860 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
861
862 aa := select128FromPair(x, 0, 0, y)
863 ab := select128FromPair(x, 0, 1, y)
864 bc := select128FromPair(x, 1, 2, y)
865 cd := select128FromPair(x, 2, 3, y)
866 da := select128FromPair(x, 3, 0, y)
867 dc := select128FromPair(x, 3, 2, y)
868
869 r := make([]uint64, 4, 4)
870
871 foo := func(v archsimd.Uint64x4, a, b uint64) {
872 a, b = 2*a, 2*b
873 v.StoreSlice(r)
874 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
875 }
876
877 foo(aa, 0, 0)
878 foo(ab, 0, 1)
879 foo(bc, 1, 2)
880 foo(cd, 2, 3)
881 foo(da, 3, 0)
882 foo(dc, 3, 2)
883 }
884
885 func TestSelect2FromPairConst(t *testing.T) {
886 x := archsimd.LoadUint64x2Slice([]uint64{0, 1})
887 y := archsimd.LoadUint64x2Slice([]uint64{2, 3})
888
889 ll := x.SelectFromPair(0, 1, y)
890 hh := x.SelectFromPair(3, 2, y)
891 lh := x.SelectFromPair(0, 3, y)
892 hl := x.SelectFromPair(2, 1, y)
893
894 r := make([]uint64, 2, 2)
895
896 foo := func(v archsimd.Uint64x2, a, b uint64) {
897 v.StoreSlice(r)
898 checkSlices[uint64](t, r, []uint64{a, b})
899 }
900
901 foo(ll, 0, 1)
902 foo(hh, 3, 2)
903 foo(lh, 0, 3)
904 foo(hl, 2, 1)
905 }
906
907 func TestSelect2FromPairConstGroupedUint(t *testing.T) {
908 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
909 y := archsimd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
910
911 ll := x.SelectFromPairGrouped(0, 1, y)
912 hh := x.SelectFromPairGrouped(3, 2, y)
913 lh := x.SelectFromPairGrouped(0, 3, y)
914 hl := x.SelectFromPairGrouped(2, 1, y)
915
916 r := make([]uint64, 4, 4)
917
918 foo := func(v archsimd.Uint64x4, a, b uint64) {
919 v.StoreSlice(r)
920 checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
921 }
922
923 foo(ll, 0, 1)
924 foo(hh, 3, 2)
925 foo(lh, 0, 3)
926 foo(hl, 2, 1)
927 }
928
929 func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
930 x := archsimd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
931 y := archsimd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
932
933 ll := x.SelectFromPairGrouped(0, 1, y)
934 hh := x.SelectFromPairGrouped(3, 2, y)
935 lh := x.SelectFromPairGrouped(0, 3, y)
936 hl := x.SelectFromPairGrouped(2, 1, y)
937
938 r := make([]float64, 4, 4)
939
940 foo := func(v archsimd.Float64x4, a, b float64) {
941 v.StoreSlice(r)
942 checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
943 }
944
945 foo(ll, 0, 1)
946 foo(hh, 3, 2)
947 foo(lh, 0, 3)
948 foo(hl, 2, 1)
949 }
950
951 func TestSelect2FromPairConstGroupedInt(t *testing.T) {
952 x := archsimd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
953 y := archsimd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
954
955 ll := x.SelectFromPairGrouped(0, 1, y)
956 hh := x.SelectFromPairGrouped(3, 2, y)
957 lh := x.SelectFromPairGrouped(0, 3, y)
958 hl := x.SelectFromPairGrouped(2, 1, y)
959
960 r := make([]int64, 4, 4)
961
962 foo := func(v archsimd.Int64x4, a, b int64) {
963 v.StoreSlice(r)
964 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
965 }
966
967 foo(ll, 0, 1)
968 foo(hh, 3, 2)
969 foo(lh, 0, 3)
970 foo(hl, 2, 1)
971 }
972
973 func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
974 if !archsimd.X86.AVX512() {
975 t.Skip("Test requires X86.AVX512, not available on this hardware")
976 return
977 }
978
979 x := archsimd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
980 y := archsimd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
981
982 ll := x.SelectFromPairGrouped(0, 1, y)
983 hh := x.SelectFromPairGrouped(3, 2, y)
984 lh := x.SelectFromPairGrouped(0, 3, y)
985 hl := x.SelectFromPairGrouped(2, 1, y)
986
987 r := make([]int64, 8, 8)
988
989 foo := func(v archsimd.Int64x8, a, b int64) {
990 v.StoreSlice(r)
991 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
992 }
993
994 foo(ll, 0, 1)
995 foo(hh, 3, 2)
996 foo(lh, 0, 3)
997 foo(hl, 2, 1)
998 }
999
1000 func TestString(t *testing.T) {
1001 x := archsimd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
1002 y := archsimd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
1003 z := archsimd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
1004 w := archsimd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
1005
1006 sx := "{0,1,2,3}"
1007 sy := "{-4,-5,-6,-7}"
1008 sz := "{0.5,1.5,-2.5,3.5e+09}"
1009 sw := sz
1010
1011 if x.String() != sx {
1012 t.Errorf("x=%s wanted %s", x, sx)
1013 }
1014 if y.String() != sy {
1015 t.Errorf("y=%s wanted %s", y, sy)
1016 }
1017 if z.String() != sz {
1018 t.Errorf("z=%s wanted %s", z, sz)
1019 }
1020 if w.String() != sw {
1021 t.Errorf("w=%s wanted %s", w, sw)
1022 }
1023 t.Logf("w=%s", w)
1024 t.Logf("x=%s", x)
1025 t.Logf("y=%s", y)
1026 t.Logf("z=%s", z)
1027 }
1028
1029
1030 func a() []int32 {
1031 return make([]int32, 16, 16)
1032 }
1033
1034
1035
1036 func applyTo3(x, y, z archsimd.Int32x16, f func(x, y, z int32) int32) []int32 {
1037 ax, ay, az := a(), a(), a()
1038 x.StoreSlice(ax)
1039 y.StoreSlice(ay)
1040 z.StoreSlice(az)
1041
1042 r := a()
1043 for i := range r {
1044 r[i] = f(ax[i], ay[i], az[i])
1045 }
1046 return r
1047 }
1048
1049
1050
1051 func applyTo4(x, y, z, w archsimd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
1052 ax, ay, az, aw := a(), a(), a(), a()
1053 x.StoreSlice(ax)
1054 y.StoreSlice(ay)
1055 z.StoreSlice(az)
1056 w.StoreSlice(aw)
1057
1058 r := make([]int32, len(ax), len(ax))
1059 for i := range r {
1060 r[i] = f(ax[i], ay[i], az[i], aw[i])
1061 }
1062 return r
1063 }
1064
1065 func TestSelectTernOptInt32x16(t *testing.T) {
1066 if !archsimd.X86.AVX512() {
1067 t.Skip("Test requires X86.AVX512, not available on this hardware")
1068 return
1069 }
1070 ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1071 ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}
1072 az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}
1073 aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1074 am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
1075
1076 x := archsimd.LoadInt32x16Slice(ax)
1077 y := archsimd.LoadInt32x16Slice(ay)
1078 z := archsimd.LoadInt32x16Slice(az)
1079 w := archsimd.LoadInt32x16Slice(aw)
1080 m := archsimd.LoadInt32x16Slice(am)
1081
1082 foo := func(v archsimd.Int32x16, s []int32) {
1083 r := make([]int32, 16, 16)
1084 v.StoreSlice(r)
1085 checkSlices[int32](t, r, s)
1086 }
1087
1088 t0 := w.Xor(y).Xor(z)
1089 ft0 := func(w, y, z int32) int32 {
1090 return w ^ y ^ z
1091 }
1092 foo(t0, applyTo3(w, y, z, ft0))
1093
1094 t1 := m.And(w.Xor(y).Xor(z.Not()))
1095 ft1 := func(m, w, y, z int32) int32 {
1096 return m & (w ^ y ^ ^z)
1097 }
1098 foo(t1, applyTo4(m, w, y, z, ft1))
1099
1100 t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not()))
1101 ft2 := func(x, y, z int32) int32 {
1102 return (x ^ y ^ z) & (x ^ y ^ ^z)
1103 }
1104 foo(t2, applyTo3(x, y, z, ft2))
1105 }
1106
1107 func TestMaskedMerge(t *testing.T) {
1108 x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
1109 y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
1110 z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
1111 res := make([]int64, 4)
1112 expected := []int64{6, 8, -3, -4}
1113 mask := x.Less(y)
1114 if archsimd.X86.AVX512() {
1115 x.Add(y).Merge(z, mask).StoreSlice(res)
1116 } else {
1117 x.Add(y).Merge(z, mask).StoreSlice(res)
1118 }
1119 for i := range 4 {
1120 if res[i] != expected[i] {
1121 t.Errorf("got %d wanted %d", res[i], expected[i])
1122 }
1123 }
1124 }
1125
1126 func TestDotProductQuadruple(t *testing.T) {
1127 if !archsimd.X86.AVXVNNI() {
1128 t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
1129 return
1130 }
1131 xd := make([]int8, 16)
1132 yd := make([]uint8, 16)
1133 zd := make([]int32, 4)
1134 wanted1 := make([]int32, 4)
1135 wanted2 := make([]int32, 4)
1136 res1 := make([]int32, 4)
1137 res2 := make([]int32, 4)
1138 for i := range 4 {
1139 xd[i] = 5
1140 yd[i] = 6
1141 zd[i] = 3
1142 wanted1[i] = 30
1143 wanted2[i] = 30
1144 }
1145 x := archsimd.LoadInt8x16Slice(xd)
1146 y := archsimd.LoadUint8x16Slice(yd)
1147 z := archsimd.LoadInt32x4Slice(zd)
1148 x.DotProductQuadruple(y).StoreSlice(res1)
1149 x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
1150 for i := range 4 {
1151 if res1[i] != wanted1[i] {
1152 t.Errorf("got %d wanted %d", res1[i], wanted1[i])
1153 }
1154 if res2[i] != wanted2[i] {
1155 t.Errorf("got %d wanted %d", res2[i], wanted2[i])
1156 }
1157 }
1158 }
1159
1160 func TestPermuteScalars(t *testing.T) {
1161 x := []int32{11, 12, 13, 14}
1162 want := []int32{12, 13, 14, 11}
1163 got := make([]int32, 4)
1164 archsimd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
1165 checkSlices(t, got, want)
1166 }
1167
1168 func TestPermuteScalarsGrouped(t *testing.T) {
1169 x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
1170 want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
1171 got := make([]int32, 8)
1172 archsimd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
1173 checkSlices(t, got, want)
1174 }
1175
1176 func TestPermuteScalarsHi(t *testing.T) {
1177 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
1178 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
1179 got := make([]int16, len(x))
1180 archsimd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
1181 checkSlices(t, got, want)
1182 }
1183
1184 func TestPermuteScalarsLo(t *testing.T) {
1185 x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
1186 want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
1187 got := make([]int16, len(x))
1188 archsimd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
1189 checkSlices(t, got, want)
1190 }
1191
1192 func TestPermuteScalarsHiGrouped(t *testing.T) {
1193 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
1194 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
1195 got := make([]int16, len(x))
1196 archsimd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
1197 checkSlices(t, got, want)
1198 }
1199
1200 func TestPermuteScalarsLoGrouped(t *testing.T) {
1201 x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
1202 want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
1203 got := make([]int16, len(x))
1204 archsimd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
1205 checkSlices(t, got, want)
1206 }
1207
1208 func TestClMul(t *testing.T) {
1209 var x = archsimd.LoadUint64x2Slice([]uint64{1, 5})
1210 var y = archsimd.LoadUint64x2Slice([]uint64{3, 9})
1211
1212 foo := func(v archsimd.Uint64x2, s []uint64) {
1213 r := make([]uint64, 2, 2)
1214 v.StoreSlice(r)
1215 checkSlices[uint64](t, r, s)
1216 }
1217
1218 foo(x.CarrylessMultiply(0, 0, y), []uint64{3, 0})
1219 foo(x.CarrylessMultiply(0, 1, y), []uint64{9, 0})
1220 foo(x.CarrylessMultiply(1, 0, y), []uint64{15, 0})
1221 foo(x.CarrylessMultiply(1, 1, y), []uint64{45, 0})
1222 foo(y.CarrylessMultiply(0, 0, y), []uint64{5, 0})
1223
1224 }
1225
View as plain text