Source file src/simd/archsimd/ops_amd64.go

     1  // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
     2  
     3  //go:build goexperiment.simd
     4  
     5  package archsimd
     6  
     7  /* AESDecryptLastRound */
     8  
     9  // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    10  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    11  // y is the chunk of dw array in use.
    12  // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
    13  //
    14  // Asm: VAESDECLAST, CPU Feature: AVX, AES
    15  func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
    16  
    17  // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    18  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    19  // y is the chunk of dw array in use.
    20  // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
    21  //
    22  // Asm: VAESDECLAST, CPU Feature: AVX512VAES
    23  func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
    24  
    25  // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    26  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    27  // y is the chunk of dw array in use.
    28  // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
    29  //
    30  // Asm: VAESDECLAST, CPU Feature: AVX512VAES
    31  func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64
    32  
    33  /* AESDecryptOneRound */
    34  
    35  // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    36  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    37  // y is the chunk of dw array in use.
    38  // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
    39  //
    40  // Asm: VAESDEC, CPU Feature: AVX, AES
    41  func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
    42  
    43  // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    44  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    45  // y is the chunk of dw array in use.
    46  // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
    47  //
    48  // Asm: VAESDEC, CPU Feature: AVX512VAES
    49  func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32
    50  
    51  // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    52  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    53  // y is the chunk of dw array in use.
    54  // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
    55  //
    56  // Asm: VAESDEC, CPU Feature: AVX512VAES
    57  func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64
    58  
    59  /* AESEncryptLastRound */
    60  
    61  // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    62  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    63  // y is the chunk of w array in use.
    64  // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
    65  //
    66  // Asm: VAESENCLAST, CPU Feature: AVX, AES
    67  func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
    68  
    69  // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    70  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    71  // y is the chunk of w array in use.
    72  // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
    73  //
    74  // Asm: VAESENCLAST, CPU Feature: AVX512VAES
    75  func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
    76  
    77  // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    78  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    79  // y is the chunk of w array in use.
    80  // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
    81  //
    82  // Asm: VAESENCLAST, CPU Feature: AVX512VAES
    83  func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64
    84  
    85  /* AESEncryptOneRound */
    86  
    87  // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    88  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    89  // y is the chunk of w array in use.
    90  // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
    91  //
    92  // Asm: VAESENC, CPU Feature: AVX, AES
    93  func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
    94  
    95  // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    96  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    97  // y is the chunk of w array in use.
    98  // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
    99  //
   100  // Asm: VAESENC, CPU Feature: AVX512VAES
   101  func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32
   102  
   103  // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
   104  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
   105  // y is the chunk of w array in use.
   106  // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
   107  //
   108  // Asm: VAESENC, CPU Feature: AVX512VAES
   109  func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64
   110  
   111  /* AESInvMixColumns */
   112  
   113  // AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
   114  // x is the chunk of w array in use.
   115  // result = InvMixColumns(x)
   116  //
   117  // Asm: VAESIMC, CPU Feature: AVX, AES
   118  func (x Uint32x4) AESInvMixColumns() Uint32x4
   119  
   120  /* AESRoundKeyGenAssist */
   121  
   122  // AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
   123  // x is an array of AES words, but only x[0] and x[2] are used.
   124  // r is a value from the Rcon constant array.
   125  // result[0] = XOR(SubWord(RotWord(x[0])), r)
   126  // result[1] = SubWord(x[1])
   127  // result[2] = XOR(SubWord(RotWord(x[2])), r)
   128  // result[3] = SubWord(x[3])
   129  //
   130  // rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   131  //
   132  // Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
   133  func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
   134  
   135  /* Abs */
   136  
   137  // Abs computes the absolute value of each element.
   138  //
   139  // Asm: VPABSB, CPU Feature: AVX
   140  func (x Int8x16) Abs() Int8x16
   141  
   142  // Abs computes the absolute value of each element.
   143  //
   144  // Asm: VPABSB, CPU Feature: AVX2
   145  func (x Int8x32) Abs() Int8x32
   146  
   147  // Abs computes the absolute value of each element.
   148  //
   149  // Asm: VPABSB, CPU Feature: AVX512
   150  func (x Int8x64) Abs() Int8x64
   151  
   152  // Abs computes the absolute value of each element.
   153  //
   154  // Asm: VPABSW, CPU Feature: AVX
   155  func (x Int16x8) Abs() Int16x8
   156  
   157  // Abs computes the absolute value of each element.
   158  //
   159  // Asm: VPABSW, CPU Feature: AVX2
   160  func (x Int16x16) Abs() Int16x16
   161  
   162  // Abs computes the absolute value of each element.
   163  //
   164  // Asm: VPABSW, CPU Feature: AVX512
   165  func (x Int16x32) Abs() Int16x32
   166  
   167  // Abs computes the absolute value of each element.
   168  //
   169  // Asm: VPABSD, CPU Feature: AVX
   170  func (x Int32x4) Abs() Int32x4
   171  
   172  // Abs computes the absolute value of each element.
   173  //
   174  // Asm: VPABSD, CPU Feature: AVX2
   175  func (x Int32x8) Abs() Int32x8
   176  
   177  // Abs computes the absolute value of each element.
   178  //
   179  // Asm: VPABSD, CPU Feature: AVX512
   180  func (x Int32x16) Abs() Int32x16
   181  
   182  // Abs computes the absolute value of each element.
   183  //
   184  // Asm: VPABSQ, CPU Feature: AVX512
   185  func (x Int64x2) Abs() Int64x2
   186  
   187  // Abs computes the absolute value of each element.
   188  //
   189  // Asm: VPABSQ, CPU Feature: AVX512
   190  func (x Int64x4) Abs() Int64x4
   191  
   192  // Abs computes the absolute value of each element.
   193  //
   194  // Asm: VPABSQ, CPU Feature: AVX512
   195  func (x Int64x8) Abs() Int64x8
   196  
   197  /* Add */
   198  
   199  // Add adds corresponding elements of two vectors.
   200  //
   201  // Asm: VADDPS, CPU Feature: AVX
   202  func (x Float32x4) Add(y Float32x4) Float32x4
   203  
   204  // Add adds corresponding elements of two vectors.
   205  //
   206  // Asm: VADDPS, CPU Feature: AVX
   207  func (x Float32x8) Add(y Float32x8) Float32x8
   208  
   209  // Add adds corresponding elements of two vectors.
   210  //
   211  // Asm: VADDPS, CPU Feature: AVX512
   212  func (x Float32x16) Add(y Float32x16) Float32x16
   213  
   214  // Add adds corresponding elements of two vectors.
   215  //
   216  // Asm: VADDPD, CPU Feature: AVX
   217  func (x Float64x2) Add(y Float64x2) Float64x2
   218  
   219  // Add adds corresponding elements of two vectors.
   220  //
   221  // Asm: VADDPD, CPU Feature: AVX
   222  func (x Float64x4) Add(y Float64x4) Float64x4
   223  
   224  // Add adds corresponding elements of two vectors.
   225  //
   226  // Asm: VADDPD, CPU Feature: AVX512
   227  func (x Float64x8) Add(y Float64x8) Float64x8
   228  
   229  // Add adds corresponding elements of two vectors.
   230  //
   231  // Asm: VPADDB, CPU Feature: AVX
   232  func (x Int8x16) Add(y Int8x16) Int8x16
   233  
   234  // Add adds corresponding elements of two vectors.
   235  //
   236  // Asm: VPADDB, CPU Feature: AVX2
   237  func (x Int8x32) Add(y Int8x32) Int8x32
   238  
   239  // Add adds corresponding elements of two vectors.
   240  //
   241  // Asm: VPADDB, CPU Feature: AVX512
   242  func (x Int8x64) Add(y Int8x64) Int8x64
   243  
   244  // Add adds corresponding elements of two vectors.
   245  //
   246  // Asm: VPADDW, CPU Feature: AVX
   247  func (x Int16x8) Add(y Int16x8) Int16x8
   248  
   249  // Add adds corresponding elements of two vectors.
   250  //
   251  // Asm: VPADDW, CPU Feature: AVX2
   252  func (x Int16x16) Add(y Int16x16) Int16x16
   253  
   254  // Add adds corresponding elements of two vectors.
   255  //
   256  // Asm: VPADDW, CPU Feature: AVX512
   257  func (x Int16x32) Add(y Int16x32) Int16x32
   258  
   259  // Add adds corresponding elements of two vectors.
   260  //
   261  // Asm: VPADDD, CPU Feature: AVX
   262  func (x Int32x4) Add(y Int32x4) Int32x4
   263  
   264  // Add adds corresponding elements of two vectors.
   265  //
   266  // Asm: VPADDD, CPU Feature: AVX2
   267  func (x Int32x8) Add(y Int32x8) Int32x8
   268  
   269  // Add adds corresponding elements of two vectors.
   270  //
   271  // Asm: VPADDD, CPU Feature: AVX512
   272  func (x Int32x16) Add(y Int32x16) Int32x16
   273  
   274  // Add adds corresponding elements of two vectors.
   275  //
   276  // Asm: VPADDQ, CPU Feature: AVX
   277  func (x Int64x2) Add(y Int64x2) Int64x2
   278  
   279  // Add adds corresponding elements of two vectors.
   280  //
   281  // Asm: VPADDQ, CPU Feature: AVX2
   282  func (x Int64x4) Add(y Int64x4) Int64x4
   283  
   284  // Add adds corresponding elements of two vectors.
   285  //
   286  // Asm: VPADDQ, CPU Feature: AVX512
   287  func (x Int64x8) Add(y Int64x8) Int64x8
   288  
   289  // Add adds corresponding elements of two vectors.
   290  //
   291  // Asm: VPADDB, CPU Feature: AVX
   292  func (x Uint8x16) Add(y Uint8x16) Uint8x16
   293  
   294  // Add adds corresponding elements of two vectors.
   295  //
   296  // Asm: VPADDB, CPU Feature: AVX2
   297  func (x Uint8x32) Add(y Uint8x32) Uint8x32
   298  
   299  // Add adds corresponding elements of two vectors.
   300  //
   301  // Asm: VPADDB, CPU Feature: AVX512
   302  func (x Uint8x64) Add(y Uint8x64) Uint8x64
   303  
   304  // Add adds corresponding elements of two vectors.
   305  //
   306  // Asm: VPADDW, CPU Feature: AVX
   307  func (x Uint16x8) Add(y Uint16x8) Uint16x8
   308  
   309  // Add adds corresponding elements of two vectors.
   310  //
   311  // Asm: VPADDW, CPU Feature: AVX2
   312  func (x Uint16x16) Add(y Uint16x16) Uint16x16
   313  
   314  // Add adds corresponding elements of two vectors.
   315  //
   316  // Asm: VPADDW, CPU Feature: AVX512
   317  func (x Uint16x32) Add(y Uint16x32) Uint16x32
   318  
   319  // Add adds corresponding elements of two vectors.
   320  //
   321  // Asm: VPADDD, CPU Feature: AVX
   322  func (x Uint32x4) Add(y Uint32x4) Uint32x4
   323  
   324  // Add adds corresponding elements of two vectors.
   325  //
   326  // Asm: VPADDD, CPU Feature: AVX2
   327  func (x Uint32x8) Add(y Uint32x8) Uint32x8
   328  
   329  // Add adds corresponding elements of two vectors.
   330  //
   331  // Asm: VPADDD, CPU Feature: AVX512
   332  func (x Uint32x16) Add(y Uint32x16) Uint32x16
   333  
   334  // Add adds corresponding elements of two vectors.
   335  //
   336  // Asm: VPADDQ, CPU Feature: AVX
   337  func (x Uint64x2) Add(y Uint64x2) Uint64x2
   338  
   339  // Add adds corresponding elements of two vectors.
   340  //
   341  // Asm: VPADDQ, CPU Feature: AVX2
   342  func (x Uint64x4) Add(y Uint64x4) Uint64x4
   343  
   344  // Add adds corresponding elements of two vectors.
   345  //
   346  // Asm: VPADDQ, CPU Feature: AVX512
   347  func (x Uint64x8) Add(y Uint64x8) Uint64x8
   348  
   349  /* AddPairs */
   350  
   351  // AddPairs horizontally adds adjacent pairs of elements.
   352  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   353  //
   354  // Asm: VHADDPS, CPU Feature: AVX
   355  func (x Float32x4) AddPairs(y Float32x4) Float32x4
   356  
   357  // AddPairs horizontally adds adjacent pairs of elements.
   358  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   359  //
   360  // Asm: VHADDPS, CPU Feature: AVX
   361  func (x Float32x8) AddPairs(y Float32x8) Float32x8
   362  
   363  // AddPairs horizontally adds adjacent pairs of elements.
   364  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   365  //
   366  // Asm: VHADDPD, CPU Feature: AVX
   367  func (x Float64x2) AddPairs(y Float64x2) Float64x2
   368  
   369  // AddPairs horizontally adds adjacent pairs of elements.
   370  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   371  //
   372  // Asm: VHADDPD, CPU Feature: AVX
   373  func (x Float64x4) AddPairs(y Float64x4) Float64x4
   374  
   375  // AddPairs horizontally adds adjacent pairs of elements.
   376  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   377  //
   378  // Asm: VPHADDW, CPU Feature: AVX
   379  func (x Int16x8) AddPairs(y Int16x8) Int16x8
   380  
   381  // AddPairs horizontally adds adjacent pairs of elements.
   382  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   383  //
   384  // Asm: VPHADDW, CPU Feature: AVX2
   385  func (x Int16x16) AddPairs(y Int16x16) Int16x16
   386  
   387  // AddPairs horizontally adds adjacent pairs of elements.
   388  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   389  //
   390  // Asm: VPHADDD, CPU Feature: AVX
   391  func (x Int32x4) AddPairs(y Int32x4) Int32x4
   392  
   393  // AddPairs horizontally adds adjacent pairs of elements.
   394  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   395  //
   396  // Asm: VPHADDD, CPU Feature: AVX2
   397  func (x Int32x8) AddPairs(y Int32x8) Int32x8
   398  
   399  // AddPairs horizontally adds adjacent pairs of elements.
   400  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   401  //
   402  // Asm: VPHADDW, CPU Feature: AVX
   403  func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
   404  
   405  // AddPairs horizontally adds adjacent pairs of elements.
   406  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   407  //
   408  // Asm: VPHADDW, CPU Feature: AVX2
   409  func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
   410  
   411  // AddPairs horizontally adds adjacent pairs of elements.
   412  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   413  //
   414  // Asm: VPHADDD, CPU Feature: AVX
   415  func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
   416  
   417  // AddPairs horizontally adds adjacent pairs of elements.
   418  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   419  //
   420  // Asm: VPHADDD, CPU Feature: AVX2
   421  func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
   422  
   423  /* AddPairsSaturated */
   424  
   425  // AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
   426  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   427  //
   428  // Asm: VPHADDSW, CPU Feature: AVX
   429  func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
   430  
   431  // AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
   432  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   433  //
   434  // Asm: VPHADDSW, CPU Feature: AVX2
   435  func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
   436  
   437  /* AddSaturated */
   438  
   439  // AddSaturated adds corresponding elements of two vectors with saturation.
   440  //
   441  // Asm: VPADDSB, CPU Feature: AVX
   442  func (x Int8x16) AddSaturated(y Int8x16) Int8x16
   443  
   444  // AddSaturated adds corresponding elements of two vectors with saturation.
   445  //
   446  // Asm: VPADDSB, CPU Feature: AVX2
   447  func (x Int8x32) AddSaturated(y Int8x32) Int8x32
   448  
   449  // AddSaturated adds corresponding elements of two vectors with saturation.
   450  //
   451  // Asm: VPADDSB, CPU Feature: AVX512
   452  func (x Int8x64) AddSaturated(y Int8x64) Int8x64
   453  
   454  // AddSaturated adds corresponding elements of two vectors with saturation.
   455  //
   456  // Asm: VPADDSW, CPU Feature: AVX
   457  func (x Int16x8) AddSaturated(y Int16x8) Int16x8
   458  
   459  // AddSaturated adds corresponding elements of two vectors with saturation.
   460  //
   461  // Asm: VPADDSW, CPU Feature: AVX2
   462  func (x Int16x16) AddSaturated(y Int16x16) Int16x16
   463  
   464  // AddSaturated adds corresponding elements of two vectors with saturation.
   465  //
   466  // Asm: VPADDSW, CPU Feature: AVX512
   467  func (x Int16x32) AddSaturated(y Int16x32) Int16x32
   468  
   469  // AddSaturated adds corresponding elements of two vectors with saturation.
   470  //
   471  // Asm: VPADDUSB, CPU Feature: AVX
   472  func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
   473  
   474  // AddSaturated adds corresponding elements of two vectors with saturation.
   475  //
   476  // Asm: VPADDUSB, CPU Feature: AVX2
   477  func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
   478  
   479  // AddSaturated adds corresponding elements of two vectors with saturation.
   480  //
   481  // Asm: VPADDUSB, CPU Feature: AVX512
   482  func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
   483  
   484  // AddSaturated adds corresponding elements of two vectors with saturation.
   485  //
   486  // Asm: VPADDUSW, CPU Feature: AVX
   487  func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8
   488  
   489  // AddSaturated adds corresponding elements of two vectors with saturation.
   490  //
   491  // Asm: VPADDUSW, CPU Feature: AVX2
   492  func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
   493  
   494  // AddSaturated adds corresponding elements of two vectors with saturation.
   495  //
   496  // Asm: VPADDUSW, CPU Feature: AVX512
   497  func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
   498  
   499  /* AddSub */
   500  
   501  // AddSub subtracts even elements and adds odd elements of two vectors.
   502  //
   503  // Asm: VADDSUBPS, CPU Feature: AVX
   504  func (x Float32x4) AddSub(y Float32x4) Float32x4
   505  
   506  // AddSub subtracts even elements and adds odd elements of two vectors.
   507  //
   508  // Asm: VADDSUBPS, CPU Feature: AVX
   509  func (x Float32x8) AddSub(y Float32x8) Float32x8
   510  
   511  // AddSub subtracts even elements and adds odd elements of two vectors.
   512  //
   513  // Asm: VADDSUBPD, CPU Feature: AVX
   514  func (x Float64x2) AddSub(y Float64x2) Float64x2
   515  
   516  // AddSub subtracts even elements and adds odd elements of two vectors.
   517  //
   518  // Asm: VADDSUBPD, CPU Feature: AVX
   519  func (x Float64x4) AddSub(y Float64x4) Float64x4
   520  
   521  /* And */
   522  
   523  // And performs a bitwise AND operation between two vectors.
   524  //
   525  // Asm: VPAND, CPU Feature: AVX
   526  func (x Int8x16) And(y Int8x16) Int8x16
   527  
   528  // And performs a bitwise AND operation between two vectors.
   529  //
   530  // Asm: VPAND, CPU Feature: AVX2
   531  func (x Int8x32) And(y Int8x32) Int8x32
   532  
   533  // And performs a bitwise AND operation between two vectors.
   534  //
   535  // Asm: VPANDD, CPU Feature: AVX512
   536  func (x Int8x64) And(y Int8x64) Int8x64
   537  
   538  // And performs a bitwise AND operation between two vectors.
   539  //
   540  // Asm: VPAND, CPU Feature: AVX
   541  func (x Int16x8) And(y Int16x8) Int16x8
   542  
   543  // And performs a bitwise AND operation between two vectors.
   544  //
   545  // Asm: VPAND, CPU Feature: AVX2
   546  func (x Int16x16) And(y Int16x16) Int16x16
   547  
   548  // And performs a bitwise AND operation between two vectors.
   549  //
   550  // Asm: VPANDD, CPU Feature: AVX512
   551  func (x Int16x32) And(y Int16x32) Int16x32
   552  
   553  // And performs a bitwise AND operation between two vectors.
   554  //
   555  // Asm: VPAND, CPU Feature: AVX
   556  func (x Int32x4) And(y Int32x4) Int32x4
   557  
   558  // And performs a bitwise AND operation between two vectors.
   559  //
   560  // Asm: VPAND, CPU Feature: AVX2
   561  func (x Int32x8) And(y Int32x8) Int32x8
   562  
   563  // And performs a bitwise AND operation between two vectors.
   564  //
   565  // Asm: VPANDD, CPU Feature: AVX512
   566  func (x Int32x16) And(y Int32x16) Int32x16
   567  
   568  // And performs a bitwise AND operation between two vectors.
   569  //
   570  // Asm: VPAND, CPU Feature: AVX
   571  func (x Int64x2) And(y Int64x2) Int64x2
   572  
   573  // And performs a bitwise AND operation between two vectors.
   574  //
   575  // Asm: VPAND, CPU Feature: AVX2
   576  func (x Int64x4) And(y Int64x4) Int64x4
   577  
   578  // And performs a bitwise AND operation between two vectors.
   579  //
   580  // Asm: VPANDQ, CPU Feature: AVX512
   581  func (x Int64x8) And(y Int64x8) Int64x8
   582  
   583  // And performs a bitwise AND operation between two vectors.
   584  //
   585  // Asm: VPAND, CPU Feature: AVX
   586  func (x Uint8x16) And(y Uint8x16) Uint8x16
   587  
   588  // And performs a bitwise AND operation between two vectors.
   589  //
   590  // Asm: VPAND, CPU Feature: AVX2
   591  func (x Uint8x32) And(y Uint8x32) Uint8x32
   592  
   593  // And performs a bitwise AND operation between two vectors.
   594  //
   595  // Asm: VPANDD, CPU Feature: AVX512
   596  func (x Uint8x64) And(y Uint8x64) Uint8x64
   597  
   598  // And performs a bitwise AND operation between two vectors.
   599  //
   600  // Asm: VPAND, CPU Feature: AVX
   601  func (x Uint16x8) And(y Uint16x8) Uint16x8
   602  
   603  // And performs a bitwise AND operation between two vectors.
   604  //
   605  // Asm: VPAND, CPU Feature: AVX2
   606  func (x Uint16x16) And(y Uint16x16) Uint16x16
   607  
   608  // And performs a bitwise AND operation between two vectors.
   609  //
   610  // Asm: VPANDD, CPU Feature: AVX512
   611  func (x Uint16x32) And(y Uint16x32) Uint16x32
   612  
   613  // And performs a bitwise AND operation between two vectors.
   614  //
   615  // Asm: VPAND, CPU Feature: AVX
   616  func (x Uint32x4) And(y Uint32x4) Uint32x4
   617  
   618  // And performs a bitwise AND operation between two vectors.
   619  //
   620  // Asm: VPAND, CPU Feature: AVX2
   621  func (x Uint32x8) And(y Uint32x8) Uint32x8
   622  
   623  // And performs a bitwise AND operation between two vectors.
   624  //
   625  // Asm: VPANDD, CPU Feature: AVX512
   626  func (x Uint32x16) And(y Uint32x16) Uint32x16
   627  
   628  // And performs a bitwise AND operation between two vectors.
   629  //
   630  // Asm: VPAND, CPU Feature: AVX
   631  func (x Uint64x2) And(y Uint64x2) Uint64x2
   632  
   633  // And performs a bitwise AND operation between two vectors.
   634  //
   635  // Asm: VPAND, CPU Feature: AVX2
   636  func (x Uint64x4) And(y Uint64x4) Uint64x4
   637  
   638  // And performs a bitwise AND operation between two vectors.
   639  //
   640  // Asm: VPANDQ, CPU Feature: AVX512
   641  func (x Uint64x8) And(y Uint64x8) Uint64x8
   642  
   643  /* AndNot */
   644  
   645  // AndNot performs a bitwise x &^ y.
   646  //
   647  // Asm: VPANDN, CPU Feature: AVX
   648  func (x Int8x16) AndNot(y Int8x16) Int8x16
   649  
   650  // AndNot performs a bitwise x &^ y.
   651  //
   652  // Asm: VPANDN, CPU Feature: AVX2
   653  func (x Int8x32) AndNot(y Int8x32) Int8x32
   654  
   655  // AndNot performs a bitwise x &^ y.
   656  //
   657  // Asm: VPANDND, CPU Feature: AVX512
   658  func (x Int8x64) AndNot(y Int8x64) Int8x64
   659  
   660  // AndNot performs a bitwise x &^ y.
   661  //
   662  // Asm: VPANDN, CPU Feature: AVX
   663  func (x Int16x8) AndNot(y Int16x8) Int16x8
   664  
   665  // AndNot performs a bitwise x &^ y.
   666  //
   667  // Asm: VPANDN, CPU Feature: AVX2
   668  func (x Int16x16) AndNot(y Int16x16) Int16x16
   669  
   670  // AndNot performs a bitwise x &^ y.
   671  //
   672  // Asm: VPANDND, CPU Feature: AVX512
   673  func (x Int16x32) AndNot(y Int16x32) Int16x32
   674  
   675  // AndNot performs a bitwise x &^ y.
   676  //
   677  // Asm: VPANDN, CPU Feature: AVX
   678  func (x Int32x4) AndNot(y Int32x4) Int32x4
   679  
   680  // AndNot performs a bitwise x &^ y.
   681  //
   682  // Asm: VPANDN, CPU Feature: AVX2
   683  func (x Int32x8) AndNot(y Int32x8) Int32x8
   684  
   685  // AndNot performs a bitwise x &^ y.
   686  //
   687  // Asm: VPANDND, CPU Feature: AVX512
   688  func (x Int32x16) AndNot(y Int32x16) Int32x16
   689  
   690  // AndNot performs a bitwise x &^ y.
   691  //
   692  // Asm: VPANDN, CPU Feature: AVX
   693  func (x Int64x2) AndNot(y Int64x2) Int64x2
   694  
   695  // AndNot performs a bitwise x &^ y.
   696  //
   697  // Asm: VPANDN, CPU Feature: AVX2
   698  func (x Int64x4) AndNot(y Int64x4) Int64x4
   699  
   700  // AndNot performs a bitwise x &^ y.
   701  //
   702  // Asm: VPANDNQ, CPU Feature: AVX512
   703  func (x Int64x8) AndNot(y Int64x8) Int64x8
   704  
   705  // AndNot performs a bitwise x &^ y.
   706  //
   707  // Asm: VPANDN, CPU Feature: AVX
   708  func (x Uint8x16) AndNot(y Uint8x16) Uint8x16
   709  
   710  // AndNot performs a bitwise x &^ y.
   711  //
   712  // Asm: VPANDN, CPU Feature: AVX2
   713  func (x Uint8x32) AndNot(y Uint8x32) Uint8x32
   714  
   715  // AndNot performs a bitwise x &^ y.
   716  //
   717  // Asm: VPANDND, CPU Feature: AVX512
   718  func (x Uint8x64) AndNot(y Uint8x64) Uint8x64
   719  
   720  // AndNot performs a bitwise x &^ y.
   721  //
   722  // Asm: VPANDN, CPU Feature: AVX
   723  func (x Uint16x8) AndNot(y Uint16x8) Uint16x8
   724  
   725  // AndNot performs a bitwise x &^ y.
   726  //
   727  // Asm: VPANDN, CPU Feature: AVX2
   728  func (x Uint16x16) AndNot(y Uint16x16) Uint16x16
   729  
   730  // AndNot performs a bitwise x &^ y.
   731  //
   732  // Asm: VPANDND, CPU Feature: AVX512
   733  func (x Uint16x32) AndNot(y Uint16x32) Uint16x32
   734  
   735  // AndNot performs a bitwise x &^ y.
   736  //
   737  // Asm: VPANDN, CPU Feature: AVX
   738  func (x Uint32x4) AndNot(y Uint32x4) Uint32x4
   739  
   740  // AndNot performs a bitwise x &^ y.
   741  //
   742  // Asm: VPANDN, CPU Feature: AVX2
   743  func (x Uint32x8) AndNot(y Uint32x8) Uint32x8
   744  
   745  // AndNot performs a bitwise x &^ y.
   746  //
   747  // Asm: VPANDND, CPU Feature: AVX512
   748  func (x Uint32x16) AndNot(y Uint32x16) Uint32x16
   749  
   750  // AndNot performs a bitwise x &^ y.
   751  //
   752  // Asm: VPANDN, CPU Feature: AVX
   753  func (x Uint64x2) AndNot(y Uint64x2) Uint64x2
   754  
   755  // AndNot performs a bitwise x &^ y.
   756  //
   757  // Asm: VPANDN, CPU Feature: AVX2
   758  func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
   759  
   760  // AndNot performs a bitwise x &^ y.
   761  //
   762  // Asm: VPANDNQ, CPU Feature: AVX512
   763  func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
   764  
   765  /* Average */
   766  
   767  // Average computes the rounded average of corresponding elements.
   768  //
   769  // Asm: VPAVGB, CPU Feature: AVX
   770  func (x Uint8x16) Average(y Uint8x16) Uint8x16
   771  
   772  // Average computes the rounded average of corresponding elements.
   773  //
   774  // Asm: VPAVGB, CPU Feature: AVX2
   775  func (x Uint8x32) Average(y Uint8x32) Uint8x32
   776  
   777  // Average computes the rounded average of corresponding elements.
   778  //
   779  // Asm: VPAVGB, CPU Feature: AVX512
   780  func (x Uint8x64) Average(y Uint8x64) Uint8x64
   781  
   782  // Average computes the rounded average of corresponding elements.
   783  //
   784  // Asm: VPAVGW, CPU Feature: AVX
   785  func (x Uint16x8) Average(y Uint16x8) Uint16x8
   786  
   787  // Average computes the rounded average of corresponding elements.
   788  //
   789  // Asm: VPAVGW, CPU Feature: AVX2
   790  func (x Uint16x16) Average(y Uint16x16) Uint16x16
   791  
   792  // Average computes the rounded average of corresponding elements.
   793  //
   794  // Asm: VPAVGW, CPU Feature: AVX512
   795  func (x Uint16x32) Average(y Uint16x32) Uint16x32
   796  
   797  /* Broadcast128 */
   798  
   799  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   800  // the 128-bit output vector.
   801  //
   802  // Asm: VBROADCASTSS, CPU Feature: AVX2
   803  func (x Float32x4) Broadcast128() Float32x4
   804  
   805  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   806  // the 128-bit output vector.
   807  //
   808  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   809  func (x Float64x2) Broadcast128() Float64x2
   810  
   811  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   812  // the 128-bit output vector.
   813  //
   814  // Asm: VPBROADCASTB, CPU Feature: AVX2
   815  func (x Int8x16) Broadcast128() Int8x16
   816  
   817  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   818  // the 128-bit output vector.
   819  //
   820  // Asm: VPBROADCASTW, CPU Feature: AVX2
   821  func (x Int16x8) Broadcast128() Int16x8
   822  
   823  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   824  // the 128-bit output vector.
   825  //
   826  // Asm: VPBROADCASTD, CPU Feature: AVX2
   827  func (x Int32x4) Broadcast128() Int32x4
   828  
   829  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   830  // the 128-bit output vector.
   831  //
   832  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   833  func (x Int64x2) Broadcast128() Int64x2
   834  
   835  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   836  // the 128-bit output vector.
   837  //
   838  // Asm: VPBROADCASTB, CPU Feature: AVX2
   839  func (x Uint8x16) Broadcast128() Uint8x16
   840  
   841  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   842  // the 128-bit output vector.
   843  //
   844  // Asm: VPBROADCASTW, CPU Feature: AVX2
   845  func (x Uint16x8) Broadcast128() Uint16x8
   846  
   847  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   848  // the 128-bit output vector.
   849  //
   850  // Asm: VPBROADCASTD, CPU Feature: AVX2
   851  func (x Uint32x4) Broadcast128() Uint32x4
   852  
   853  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   854  // the 128-bit output vector.
   855  //
   856  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   857  func (x Uint64x2) Broadcast128() Uint64x2
   858  
   859  /* Broadcast256 */
   860  
   861  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   862  // the 256-bit output vector.
   863  //
   864  // Asm: VBROADCASTSS, CPU Feature: AVX2
   865  func (x Float32x4) Broadcast256() Float32x8
   866  
   867  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   868  // the 256-bit output vector.
   869  //
   870  // Asm: VBROADCASTSD, CPU Feature: AVX2
   871  func (x Float64x2) Broadcast256() Float64x4
   872  
   873  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   874  // the 256-bit output vector.
   875  //
   876  // Asm: VPBROADCASTB, CPU Feature: AVX2
   877  func (x Int8x16) Broadcast256() Int8x32
   878  
   879  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   880  // the 256-bit output vector.
   881  //
   882  // Asm: VPBROADCASTW, CPU Feature: AVX2
   883  func (x Int16x8) Broadcast256() Int16x16
   884  
   885  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   886  // the 256-bit output vector.
   887  //
   888  // Asm: VPBROADCASTD, CPU Feature: AVX2
   889  func (x Int32x4) Broadcast256() Int32x8
   890  
   891  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   892  // the 256-bit output vector.
   893  //
   894  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   895  func (x Int64x2) Broadcast256() Int64x4
   896  
   897  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   898  // the 256-bit output vector.
   899  //
   900  // Asm: VPBROADCASTB, CPU Feature: AVX2
   901  func (x Uint8x16) Broadcast256() Uint8x32
   902  
   903  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   904  // the 256-bit output vector.
   905  //
   906  // Asm: VPBROADCASTW, CPU Feature: AVX2
   907  func (x Uint16x8) Broadcast256() Uint16x16
   908  
   909  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   910  // the 256-bit output vector.
   911  //
   912  // Asm: VPBROADCASTD, CPU Feature: AVX2
   913  func (x Uint32x4) Broadcast256() Uint32x8
   914  
   915  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   916  // the 256-bit output vector.
   917  //
   918  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   919  func (x Uint64x2) Broadcast256() Uint64x4
   920  
   921  /* Broadcast512 */
   922  
   923  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   924  // the 512-bit output vector.
   925  //
   926  // Asm: VBROADCASTSS, CPU Feature: AVX512
   927  func (x Float32x4) Broadcast512() Float32x16
   928  
   929  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   930  // the 512-bit output vector.
   931  //
   932  // Asm: VBROADCASTSD, CPU Feature: AVX512
   933  func (x Float64x2) Broadcast512() Float64x8
   934  
   935  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   936  // the 512-bit output vector.
   937  //
   938  // Asm: VPBROADCASTB, CPU Feature: AVX512
   939  func (x Int8x16) Broadcast512() Int8x64
   940  
   941  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   942  // the 512-bit output vector.
   943  //
   944  // Asm: VPBROADCASTW, CPU Feature: AVX512
   945  func (x Int16x8) Broadcast512() Int16x32
   946  
   947  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   948  // the 512-bit output vector.
   949  //
   950  // Asm: VPBROADCASTD, CPU Feature: AVX512
   951  func (x Int32x4) Broadcast512() Int32x16
   952  
   953  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   954  // the 512-bit output vector.
   955  //
   956  // Asm: VPBROADCASTQ, CPU Feature: AVX512
   957  func (x Int64x2) Broadcast512() Int64x8
   958  
   959  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   960  // the 512-bit output vector.
   961  //
   962  // Asm: VPBROADCASTB, CPU Feature: AVX512
   963  func (x Uint8x16) Broadcast512() Uint8x64
   964  
   965  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   966  // the 512-bit output vector.
   967  //
   968  // Asm: VPBROADCASTW, CPU Feature: AVX512
   969  func (x Uint16x8) Broadcast512() Uint16x32
   970  
   971  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   972  // the 512-bit output vector.
   973  //
   974  // Asm: VPBROADCASTD, CPU Feature: AVX512
   975  func (x Uint32x4) Broadcast512() Uint32x16
   976  
   977  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   978  // the 512-bit output vector.
   979  //
   980  // Asm: VPBROADCASTQ, CPU Feature: AVX512
   981  func (x Uint64x2) Broadcast512() Uint64x8
   982  
   983  /* Ceil */
   984  
   985  // Ceil rounds elements up to the nearest integer.
   986  //
   987  // Asm: VROUNDPS, CPU Feature: AVX
   988  func (x Float32x4) Ceil() Float32x4
   989  
   990  // Ceil rounds elements up to the nearest integer.
   991  //
   992  // Asm: VROUNDPS, CPU Feature: AVX
   993  func (x Float32x8) Ceil() Float32x8
   994  
   995  // Ceil rounds elements up to the nearest integer.
   996  //
   997  // Asm: VROUNDPD, CPU Feature: AVX
   998  func (x Float64x2) Ceil() Float64x2
   999  
  1000  // Ceil rounds elements up to the nearest integer.
  1001  //
  1002  // Asm: VROUNDPD, CPU Feature: AVX
  1003  func (x Float64x4) Ceil() Float64x4
  1004  
  1005  /* CeilScaled */
  1006  
  1007  // CeilScaled rounds elements up with specified precision.
  1008  //
  1009  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1010  //
  1011  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  1012  func (x Float32x4) CeilScaled(prec uint8) Float32x4
  1013  
  1014  // CeilScaled rounds elements up with specified precision.
  1015  //
  1016  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1017  //
  1018  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  1019  func (x Float32x8) CeilScaled(prec uint8) Float32x8
  1020  
  1021  // CeilScaled rounds elements up with specified precision.
  1022  //
  1023  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1024  //
  1025  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  1026  func (x Float32x16) CeilScaled(prec uint8) Float32x16
  1027  
  1028  // CeilScaled rounds elements up with specified precision.
  1029  //
  1030  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1031  //
  1032  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  1033  func (x Float64x2) CeilScaled(prec uint8) Float64x2
  1034  
  1035  // CeilScaled rounds elements up with specified precision.
  1036  //
  1037  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1038  //
  1039  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  1040  func (x Float64x4) CeilScaled(prec uint8) Float64x4
  1041  
  1042  // CeilScaled rounds elements up with specified precision.
  1043  //
  1044  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1045  //
  1046  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  1047  func (x Float64x8) CeilScaled(prec uint8) Float64x8
  1048  
  1049  /* CeilScaledResidue */
  1050  
  1051  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1052  //
  1053  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1054  //
  1055  // Asm: VREDUCEPS, CPU Feature: AVX512
  1056  func (x Float32x4) CeilScaledResidue(prec uint8) Float32x4
  1057  
  1058  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1059  //
  1060  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1061  //
  1062  // Asm: VREDUCEPS, CPU Feature: AVX512
  1063  func (x Float32x8) CeilScaledResidue(prec uint8) Float32x8
  1064  
  1065  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1066  //
  1067  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1068  //
  1069  // Asm: VREDUCEPS, CPU Feature: AVX512
  1070  func (x Float32x16) CeilScaledResidue(prec uint8) Float32x16
  1071  
  1072  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1073  //
  1074  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1075  //
  1076  // Asm: VREDUCEPD, CPU Feature: AVX512
  1077  func (x Float64x2) CeilScaledResidue(prec uint8) Float64x2
  1078  
  1079  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1080  //
  1081  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1082  //
  1083  // Asm: VREDUCEPD, CPU Feature: AVX512
  1084  func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
  1085  
  1086  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1087  //
  1088  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1089  //
  1090  // Asm: VREDUCEPD, CPU Feature: AVX512
  1091  func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
  1092  
  1093  /* Compress */
  1094  
  1095  // Compress performs a compression on vector x using mask by
  1096  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1097  //
  1098  // Asm: VCOMPRESSPS, CPU Feature: AVX512
  1099  func (x Float32x4) Compress(mask Mask32x4) Float32x4
  1100  
  1101  // Compress performs a compression on vector x using mask by
  1102  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1103  //
  1104  // Asm: VCOMPRESSPS, CPU Feature: AVX512
  1105  func (x Float32x8) Compress(mask Mask32x8) Float32x8
  1106  
  1107  // Compress performs a compression on vector x using mask by
  1108  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1109  //
  1110  // Asm: VCOMPRESSPS, CPU Feature: AVX512
  1111  func (x Float32x16) Compress(mask Mask32x16) Float32x16
  1112  
  1113  // Compress performs a compression on vector x using mask by
  1114  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1115  //
  1116  // Asm: VCOMPRESSPD, CPU Feature: AVX512
  1117  func (x Float64x2) Compress(mask Mask64x2) Float64x2
  1118  
  1119  // Compress performs a compression on vector x using mask by
  1120  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1121  //
  1122  // Asm: VCOMPRESSPD, CPU Feature: AVX512
  1123  func (x Float64x4) Compress(mask Mask64x4) Float64x4
  1124  
  1125  // Compress performs a compression on vector x using mask by
  1126  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1127  //
  1128  // Asm: VCOMPRESSPD, CPU Feature: AVX512
  1129  func (x Float64x8) Compress(mask Mask64x8) Float64x8
  1130  
  1131  // Compress performs a compression on vector x using mask by
  1132  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1133  //
  1134  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1135  func (x Int8x16) Compress(mask Mask8x16) Int8x16
  1136  
  1137  // Compress performs a compression on vector x using mask by
  1138  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1139  //
  1140  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1141  func (x Int8x32) Compress(mask Mask8x32) Int8x32
  1142  
  1143  // Compress performs a compression on vector x using mask by
  1144  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1145  //
  1146  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1147  func (x Int8x64) Compress(mask Mask8x64) Int8x64
  1148  
  1149  // Compress performs a compression on vector x using mask by
  1150  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1151  //
  1152  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1153  func (x Int16x8) Compress(mask Mask16x8) Int16x8
  1154  
  1155  // Compress performs a compression on vector x using mask by
  1156  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1157  //
  1158  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1159  func (x Int16x16) Compress(mask Mask16x16) Int16x16
  1160  
  1161  // Compress performs a compression on vector x using mask by
  1162  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1163  //
  1164  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1165  func (x Int16x32) Compress(mask Mask16x32) Int16x32
  1166  
  1167  // Compress performs a compression on vector x using mask by
  1168  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1169  //
  1170  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1171  func (x Int32x4) Compress(mask Mask32x4) Int32x4
  1172  
  1173  // Compress performs a compression on vector x using mask by
  1174  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1175  //
  1176  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1177  func (x Int32x8) Compress(mask Mask32x8) Int32x8
  1178  
  1179  // Compress performs a compression on vector x using mask by
  1180  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1181  //
  1182  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1183  func (x Int32x16) Compress(mask Mask32x16) Int32x16
  1184  
  1185  // Compress performs a compression on vector x using mask by
  1186  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1187  //
  1188  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1189  func (x Int64x2) Compress(mask Mask64x2) Int64x2
  1190  
  1191  // Compress performs a compression on vector x using mask by
  1192  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1193  //
  1194  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1195  func (x Int64x4) Compress(mask Mask64x4) Int64x4
  1196  
  1197  // Compress performs a compression on vector x using mask by
  1198  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1199  //
  1200  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1201  func (x Int64x8) Compress(mask Mask64x8) Int64x8
  1202  
  1203  // Compress performs a compression on vector x using mask by
  1204  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1205  //
  1206  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1207  func (x Uint8x16) Compress(mask Mask8x16) Uint8x16
  1208  
  1209  // Compress performs a compression on vector x using mask by
  1210  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1211  //
  1212  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1213  func (x Uint8x32) Compress(mask Mask8x32) Uint8x32
  1214  
  1215  // Compress performs a compression on vector x using mask by
  1216  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1217  //
  1218  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1219  func (x Uint8x64) Compress(mask Mask8x64) Uint8x64
  1220  
  1221  // Compress performs a compression on vector x using mask by
  1222  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1223  //
  1224  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1225  func (x Uint16x8) Compress(mask Mask16x8) Uint16x8
  1226  
  1227  // Compress performs a compression on vector x using mask by
  1228  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1229  //
  1230  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1231  func (x Uint16x16) Compress(mask Mask16x16) Uint16x16
  1232  
  1233  // Compress performs a compression on vector x using mask by
  1234  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1235  //
  1236  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1237  func (x Uint16x32) Compress(mask Mask16x32) Uint16x32
  1238  
  1239  // Compress performs a compression on vector x using mask by
  1240  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1241  //
  1242  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1243  func (x Uint32x4) Compress(mask Mask32x4) Uint32x4
  1244  
  1245  // Compress performs a compression on vector x using mask by
  1246  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1247  //
  1248  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1249  func (x Uint32x8) Compress(mask Mask32x8) Uint32x8
  1250  
  1251  // Compress performs a compression on vector x using mask by
  1252  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1253  //
  1254  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1255  func (x Uint32x16) Compress(mask Mask32x16) Uint32x16
  1256  
  1257  // Compress performs a compression on vector x using mask by
  1258  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1259  //
  1260  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1261  func (x Uint64x2) Compress(mask Mask64x2) Uint64x2
  1262  
  1263  // Compress performs a compression on vector x using mask by
  1264  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1265  //
  1266  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1267  func (x Uint64x4) Compress(mask Mask64x4) Uint64x4
  1268  
  1269  // Compress performs a compression on vector x using mask by
  1270  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1271  //
  1272  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1273  func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
  1274  
  1275  /* ConcatPermute */
  1276  
  1277  // ConcatPermute performs a full permutation of vector x, y using indices:
  1278  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1279  // where xy is the concatenation of x (lower half) and y (upper half).
  1280  // Only the needed bits to represent xy's index are used in indices' elements.
  1281  //
  1282  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1283  func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16
  1284  
  1285  // ConcatPermute performs a full permutation of vector x, y using indices:
  1286  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1287  // where xy is the concatenation of x (lower half) and y (upper half).
  1288  // Only the needed bits to represent xy's index are used in indices' elements.
  1289  //
  1290  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1291  func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16
  1292  
  1293  // ConcatPermute performs a full permutation of vector x, y using indices:
  1294  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1295  // where xy is the concatenation of x (lower half) and y (upper half).
  1296  // Only the needed bits to represent xy's index are used in indices' elements.
  1297  //
  1298  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1299  func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32
  1300  
  1301  // ConcatPermute performs a full permutation of vector x, y using indices:
  1302  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1303  // where xy is the concatenation of x (lower half) and y (upper half).
  1304  // Only the needed bits to represent xy's index are used in indices' elements.
  1305  //
  1306  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1307  func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32
  1308  
  1309  // ConcatPermute performs a full permutation of vector x, y using indices:
  1310  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1311  // where xy is the concatenation of x (lower half) and y (upper half).
  1312  // Only the needed bits to represent xy's index are used in indices' elements.
  1313  //
  1314  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1315  func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64
  1316  
  1317  // ConcatPermute performs a full permutation of vector x, y using indices:
  1318  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1319  // where xy is the concatenation of x (lower half) and y (upper half).
  1320  // Only the needed bits to represent xy's index are used in indices' elements.
  1321  //
  1322  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1323  func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64
  1324  
  1325  // ConcatPermute performs a full permutation of vector x, y using indices:
  1326  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1327  // where xy is the concatenation of x (lower half) and y (upper half).
  1328  // Only the needed bits to represent xy's index are used in indices' elements.
  1329  //
  1330  // Asm: VPERMI2W, CPU Feature: AVX512
  1331  func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8
  1332  
  1333  // ConcatPermute performs a full permutation of vector x, y using indices:
  1334  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1335  // where xy is the concatenation of x (lower half) and y (upper half).
  1336  // Only the needed bits to represent xy's index are used in indices' elements.
  1337  //
  1338  // Asm: VPERMI2W, CPU Feature: AVX512
  1339  func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8
  1340  
  1341  // ConcatPermute performs a full permutation of vector x, y using indices:
  1342  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1343  // where xy is the concatenation of x (lower half) and y (upper half).
  1344  // Only the needed bits to represent xy's index are used in indices' elements.
  1345  //
  1346  // Asm: VPERMI2W, CPU Feature: AVX512
  1347  func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16
  1348  
  1349  // ConcatPermute performs a full permutation of vector x, y using indices:
  1350  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1351  // where xy is the concatenation of x (lower half) and y (upper half).
  1352  // Only the needed bits to represent xy's index are used in indices' elements.
  1353  //
  1354  // Asm: VPERMI2W, CPU Feature: AVX512
  1355  func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16
  1356  
  1357  // ConcatPermute performs a full permutation of vector x, y using indices:
  1358  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1359  // where xy is the concatenation of x (lower half) and y (upper half).
  1360  // Only the needed bits to represent xy's index are used in indices' elements.
  1361  //
  1362  // Asm: VPERMI2W, CPU Feature: AVX512
  1363  func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32
  1364  
  1365  // ConcatPermute performs a full permutation of vector x, y using indices:
  1366  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1367  // where xy is the concatenation of x (lower half) and y (upper half).
  1368  // Only the needed bits to represent xy's index are used in indices' elements.
  1369  //
  1370  // Asm: VPERMI2W, CPU Feature: AVX512
  1371  func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32
  1372  
  1373  // ConcatPermute performs a full permutation of vector x, y using indices:
  1374  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1375  // where xy is the concatenation of x (lower half) and y (upper half).
  1376  // Only the needed bits to represent xy's index are used in indices' elements.
  1377  //
  1378  // Asm: VPERMI2PS, CPU Feature: AVX512
  1379  func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4
  1380  
  1381  // ConcatPermute performs a full permutation of vector x, y using indices:
  1382  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1383  // where xy is the concatenation of x (lower half) and y (upper half).
  1384  // Only the needed bits to represent xy's index are used in indices' elements.
  1385  //
  1386  // Asm: VPERMI2D, CPU Feature: AVX512
  1387  func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4
  1388  
  1389  // ConcatPermute performs a full permutation of vector x, y using indices:
  1390  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1391  // where xy is the concatenation of x (lower half) and y (upper half).
  1392  // Only the needed bits to represent xy's index are used in indices' elements.
  1393  //
  1394  // Asm: VPERMI2D, CPU Feature: AVX512
  1395  func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4
  1396  
  1397  // ConcatPermute performs a full permutation of vector x, y using indices:
  1398  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1399  // where xy is the concatenation of x (lower half) and y (upper half).
  1400  // Only the needed bits to represent xy's index are used in indices' elements.
  1401  //
  1402  // Asm: VPERMI2PS, CPU Feature: AVX512
  1403  func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8
  1404  
  1405  // ConcatPermute performs a full permutation of vector x, y using indices:
  1406  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1407  // where xy is the concatenation of x (lower half) and y (upper half).
  1408  // Only the needed bits to represent xy's index are used in indices' elements.
  1409  //
  1410  // Asm: VPERMI2D, CPU Feature: AVX512
  1411  func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8
  1412  
  1413  // ConcatPermute performs a full permutation of vector x, y using indices:
  1414  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1415  // where xy is the concatenation of x (lower half) and y (upper half).
  1416  // Only the needed bits to represent xy's index are used in indices' elements.
  1417  //
  1418  // Asm: VPERMI2D, CPU Feature: AVX512
  1419  func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8
  1420  
  1421  // ConcatPermute performs a full permutation of vector x, y using indices:
  1422  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1423  // where xy is the concatenation of x (lower half) and y (upper half).
  1424  // Only the needed bits to represent xy's index are used in indices' elements.
  1425  //
  1426  // Asm: VPERMI2PS, CPU Feature: AVX512
  1427  func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16
  1428  
  1429  // ConcatPermute performs a full permutation of vector x, y using indices:
  1430  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1431  // where xy is the concatenation of x (lower half) and y (upper half).
  1432  // Only the needed bits to represent xy's index are used in indices' elements.
  1433  //
  1434  // Asm: VPERMI2D, CPU Feature: AVX512
  1435  func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16
  1436  
  1437  // ConcatPermute performs a full permutation of vector x, y using indices:
  1438  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1439  // where xy is the concatenation of x (lower half) and y (upper half).
  1440  // Only the needed bits to represent xy's index are used in indices' elements.
  1441  //
  1442  // Asm: VPERMI2D, CPU Feature: AVX512
  1443  func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16
  1444  
  1445  // ConcatPermute performs a full permutation of vector x, y using indices:
  1446  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1447  // where xy is the concatenation of x (lower half) and y (upper half).
  1448  // Only the needed bits to represent xy's index are used in indices' elements.
  1449  //
  1450  // Asm: VPERMI2PD, CPU Feature: AVX512
  1451  func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2
  1452  
  1453  // ConcatPermute performs a full permutation of vector x, y using indices:
  1454  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1455  // where xy is the concatenation of x (lower half) and y (upper half).
  1456  // Only the needed bits to represent xy's index are used in indices' elements.
  1457  //
  1458  // Asm: VPERMI2Q, CPU Feature: AVX512
  1459  func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2
  1460  
  1461  // ConcatPermute performs a full permutation of vector x, y using indices:
  1462  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1463  // where xy is the concatenation of x (lower half) and y (upper half).
  1464  // Only the needed bits to represent xy's index are used in indices' elements.
  1465  //
  1466  // Asm: VPERMI2Q, CPU Feature: AVX512
  1467  func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2
  1468  
  1469  // ConcatPermute performs a full permutation of vector x, y using indices:
  1470  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1471  // where xy is the concatenation of x (lower half) and y (upper half).
  1472  // Only the needed bits to represent xy's index are used in indices' elements.
  1473  //
  1474  // Asm: VPERMI2PD, CPU Feature: AVX512
  1475  func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4
  1476  
  1477  // ConcatPermute performs a full permutation of vector x, y using indices:
  1478  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1479  // where xy is the concatenation of x (lower half) and y (upper half).
  1480  // Only the needed bits to represent xy's index are used in indices' elements.
  1481  //
  1482  // Asm: VPERMI2Q, CPU Feature: AVX512
  1483  func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4
  1484  
  1485  // ConcatPermute performs a full permutation of vector x, y using indices:
  1486  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1487  // where xy is the concatenation of x (lower half) and y (upper half).
  1488  // Only the needed bits to represent xy's index are used in indices' elements.
  1489  //
  1490  // Asm: VPERMI2Q, CPU Feature: AVX512
  1491  func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4
  1492  
  1493  // ConcatPermute performs a full permutation of vector x, y using indices:
  1494  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1495  // where xy is the concatenation of x (lower half) and y (upper half).
  1496  // Only the needed bits to represent xy's index are used in indices' elements.
  1497  //
  1498  // Asm: VPERMI2PD, CPU Feature: AVX512
  1499  func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8
  1500  
  1501  // ConcatPermute performs a full permutation of vector x, y using indices:
  1502  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1503  // where xy is the concatenation of x (lower half) and y (upper half).
  1504  // Only the needed bits to represent xy's index are used in indices' elements.
  1505  //
  1506  // Asm: VPERMI2Q, CPU Feature: AVX512
  1507  func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8
  1508  
  1509  // ConcatPermute performs a full permutation of vector x, y using indices:
  1510  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1511  // where xy is the concatenation of x (lower half) and y (upper half).
  1512  // Only the needed bits to represent xy's index are used in indices' elements.
  1513  //
  1514  // Asm: VPERMI2Q, CPU Feature: AVX512
  1515  func (x Uint64x8) ConcatPermute(y Uint64x8, indices Uint64x8) Uint64x8
  1516  
  1517  /* ConcatShiftBytesRight */
  1518  
  1519  // ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes.
  1520  // The result vector will be the lower half of the concatenated vector.
  1521  //
  1522  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1523  //
  1524  // Asm: VPALIGNR, CPU Feature: AVX
  1525  func (x Uint8x16) ConcatShiftBytesRight(constant uint8, y Uint8x16) Uint8x16
  1526  
  1527  /* ConcatShiftBytesRightGrouped */
  1528  
  1529  // ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
  1530  // The result vector will be the lower half of the concatenated vector.
  1531  // This operation is performed grouped by each 16 byte.
  1532  //
  1533  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1534  //
  1535  // Asm: VPALIGNR, CPU Feature: AVX2
  1536  func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8x32
  1537  
  1538  // ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
  1539  // The result vector will be the lower half of the concatenated vector.
  1540  // This operation is performed grouped by each 16 byte.
  1541  //
  1542  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1543  //
  1544  // Asm: VPALIGNR, CPU Feature: AVX512
  1545  func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64
  1546  
  1547  /* ConvertToFloat32 */
  1548  
  1549  // ConvertToFloat32 converts element values to float32.
  1550  // The result vector's elements are rounded to the nearest value.
  1551  //
  1552  // Asm: VCVTPD2PSX, CPU Feature: AVX
  1553  func (x Float64x2) ConvertToFloat32() Float32x4
  1554  
  1555  // ConvertToFloat32 converts element values to float32.
  1556  // The result vector's elements are rounded to the nearest value.
  1557  //
  1558  // Asm: VCVTPD2PSY, CPU Feature: AVX
  1559  func (x Float64x4) ConvertToFloat32() Float32x4
  1560  
  1561  // ConvertToFloat32 converts element values to float32.
  1562  // The result vector's elements are rounded to the nearest value.
  1563  //
  1564  // Asm: VCVTPD2PS, CPU Feature: AVX512
  1565  func (x Float64x8) ConvertToFloat32() Float32x8
  1566  
  1567  // ConvertToFloat32 converts element values to float32.
  1568  //
  1569  // Asm: VCVTDQ2PS, CPU Feature: AVX
  1570  func (x Int32x4) ConvertToFloat32() Float32x4
  1571  
  1572  // ConvertToFloat32 converts element values to float32.
  1573  //
  1574  // Asm: VCVTDQ2PS, CPU Feature: AVX
  1575  func (x Int32x8) ConvertToFloat32() Float32x8
  1576  
  1577  // ConvertToFloat32 converts element values to float32.
  1578  //
  1579  // Asm: VCVTDQ2PS, CPU Feature: AVX512
  1580  func (x Int32x16) ConvertToFloat32() Float32x16
  1581  
  1582  // ConvertToFloat32 converts element values to float32.
  1583  //
  1584  // Asm: VCVTQQ2PSX, CPU Feature: AVX512
  1585  func (x Int64x2) ConvertToFloat32() Float32x4
  1586  
  1587  // ConvertToFloat32 converts element values to float32.
  1588  //
  1589  // Asm: VCVTQQ2PSY, CPU Feature: AVX512
  1590  func (x Int64x4) ConvertToFloat32() Float32x4
  1591  
  1592  // ConvertToFloat32 converts element values to float32.
  1593  //
  1594  // Asm: VCVTQQ2PS, CPU Feature: AVX512
  1595  func (x Int64x8) ConvertToFloat32() Float32x8
  1596  
  1597  // ConvertToFloat32 converts element values to float32.
  1598  //
  1599  // Asm: VCVTUDQ2PS, CPU Feature: AVX512
  1600  func (x Uint32x4) ConvertToFloat32() Float32x4
  1601  
  1602  // ConvertToFloat32 converts element values to float32.
  1603  //
  1604  // Asm: VCVTUDQ2PS, CPU Feature: AVX512
  1605  func (x Uint32x8) ConvertToFloat32() Float32x8
  1606  
  1607  // ConvertToFloat32 converts element values to float32.
  1608  //
  1609  // Asm: VCVTUDQ2PS, CPU Feature: AVX512
  1610  func (x Uint32x16) ConvertToFloat32() Float32x16
  1611  
  1612  // ConvertToFloat32 converts element values to float32.
  1613  //
  1614  // Asm: VCVTUQQ2PSX, CPU Feature: AVX512
  1615  func (x Uint64x2) ConvertToFloat32() Float32x4
  1616  
  1617  // ConvertToFloat32 converts element values to float32.
  1618  //
  1619  // Asm: VCVTUQQ2PSY, CPU Feature: AVX512
  1620  func (x Uint64x4) ConvertToFloat32() Float32x4
  1621  
  1622  // ConvertToFloat32 converts element values to float32.
  1623  //
  1624  // Asm: VCVTUQQ2PS, CPU Feature: AVX512
  1625  func (x Uint64x8) ConvertToFloat32() Float32x8
  1626  
  1627  /* ConvertToFloat64 */
  1628  
  1629  // ConvertToFloat64 converts element values to float64.
  1630  //
  1631  // Asm: VCVTPS2PD, CPU Feature: AVX
  1632  func (x Float32x4) ConvertToFloat64() Float64x4
  1633  
  1634  // ConvertToFloat64 converts element values to float64.
  1635  //
  1636  // Asm: VCVTPS2PD, CPU Feature: AVX512
  1637  func (x Float32x8) ConvertToFloat64() Float64x8
  1638  
  1639  // ConvertToFloat64 converts element values to float64.
  1640  //
  1641  // Asm: VCVTDQ2PD, CPU Feature: AVX
  1642  func (x Int32x4) ConvertToFloat64() Float64x4
  1643  
  1644  // ConvertToFloat64 converts element values to float64.
  1645  //
  1646  // Asm: VCVTDQ2PD, CPU Feature: AVX512
  1647  func (x Int32x8) ConvertToFloat64() Float64x8
  1648  
  1649  // ConvertToFloat64 converts element values to float64.
  1650  //
  1651  // Asm: VCVTQQ2PD, CPU Feature: AVX512
  1652  func (x Int64x2) ConvertToFloat64() Float64x2
  1653  
  1654  // ConvertToFloat64 converts element values to float64.
  1655  //
  1656  // Asm: VCVTQQ2PD, CPU Feature: AVX512
  1657  func (x Int64x4) ConvertToFloat64() Float64x4
  1658  
  1659  // ConvertToFloat64 converts element values to float64.
  1660  //
  1661  // Asm: VCVTQQ2PD, CPU Feature: AVX512
  1662  func (x Int64x8) ConvertToFloat64() Float64x8
  1663  
  1664  // ConvertToFloat64 converts element values to float64.
  1665  //
  1666  // Asm: VCVTUDQ2PD, CPU Feature: AVX512
  1667  func (x Uint32x4) ConvertToFloat64() Float64x4
  1668  
  1669  // ConvertToFloat64 converts element values to float64.
  1670  //
  1671  // Asm: VCVTUDQ2PD, CPU Feature: AVX512
  1672  func (x Uint32x8) ConvertToFloat64() Float64x8
  1673  
  1674  // ConvertToFloat64 converts element values to float64.
  1675  //
  1676  // Asm: VCVTUQQ2PD, CPU Feature: AVX512
  1677  func (x Uint64x2) ConvertToFloat64() Float64x2
  1678  
  1679  // ConvertToFloat64 converts element values to float64.
  1680  //
  1681  // Asm: VCVTUQQ2PD, CPU Feature: AVX512
  1682  func (x Uint64x4) ConvertToFloat64() Float64x4
  1683  
  1684  // ConvertToFloat64 converts element values to float64.
  1685  //
  1686  // Asm: VCVTUQQ2PD, CPU Feature: AVX512
  1687  func (x Uint64x8) ConvertToFloat64() Float64x8
  1688  
  1689  /* ConvertToInt32 */
  1690  
  1691  // ConvertToInt32 converts element values to int32.
  1692  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1693  // If a converted result cannot be represented in int32, an implementation-defined
  1694  // architecture-specific value is returned.
  1695  //
  1696  // Asm: VCVTTPS2DQ, CPU Feature: AVX
  1697  func (x Float32x4) ConvertToInt32() Int32x4
  1698  
  1699  // ConvertToInt32 converts element values to int32.
  1700  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1701  // If a converted result cannot be represented in int32, an implementation-defined
  1702  // architecture-specific value is returned.
  1703  //
  1704  // Asm: VCVTTPS2DQ, CPU Feature: AVX
  1705  func (x Float32x8) ConvertToInt32() Int32x8
  1706  
  1707  // ConvertToInt32 converts element values to int32.
  1708  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1709  // If a converted result cannot be represented in int32, an implementation-defined
  1710  // architecture-specific value is returned.
  1711  //
  1712  // Asm: VCVTTPS2DQ, CPU Feature: AVX512
  1713  func (x Float32x16) ConvertToInt32() Int32x16
  1714  
  1715  // ConvertToInt32 converts element values to int32.
  1716  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1717  // If a converted result cannot be represented in int32, an implementation-defined
  1718  // architecture-specific value is returned.
  1719  //
  1720  // Asm: VCVTTPD2DQX, CPU Feature: AVX
  1721  func (x Float64x2) ConvertToInt32() Int32x4
  1722  
  1723  // ConvertToInt32 converts element values to int32.
  1724  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1725  // If a converted result cannot be represented in int32, an implementation-defined
  1726  // architecture-specific value is returned.
  1727  //
  1728  // Asm: VCVTTPD2DQY, CPU Feature: AVX
  1729  func (x Float64x4) ConvertToInt32() Int32x4
  1730  
  1731  // ConvertToInt32 converts element values to int32.
  1732  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1733  // If a converted result cannot be represented in int32, an implementation-defined
  1734  // architecture-specific value is returned.
  1735  //
  1736  // Asm: VCVTTPD2DQ, CPU Feature: AVX512
  1737  func (x Float64x8) ConvertToInt32() Int32x8
  1738  
  1739  /* ConvertToInt64 */
  1740  
  1741  // ConvertToInt64 converts element values to int64.
  1742  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1743  // If a converted result cannot be represented in int64, an implementation-defined
  1744  // architecture-specific value is returned.
  1745  //
  1746  // Asm: VCVTTPS2QQ, CPU Feature: AVX512
  1747  func (x Float32x4) ConvertToInt64() Int64x4
  1748  
  1749  // ConvertToInt64 converts element values to int64.
  1750  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1751  // If a converted result cannot be represented in int64, an implementation-defined
  1752  // architecture-specific value is returned.
  1753  //
  1754  // Asm: VCVTTPS2QQ, CPU Feature: AVX512
  1755  func (x Float32x8) ConvertToInt64() Int64x8
  1756  
  1757  // ConvertToInt64 converts element values to int64.
  1758  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1759  // If a converted result cannot be represented in int64, an implementation-defined
  1760  // architecture-specific value is returned.
  1761  //
  1762  // Asm: VCVTTPD2QQ, CPU Feature: AVX512
  1763  func (x Float64x2) ConvertToInt64() Int64x2
  1764  
  1765  // ConvertToInt64 converts element values to int64.
  1766  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1767  // If a converted result cannot be represented in int64, an implementation-defined
  1768  // architecture-specific value is returned.
  1769  //
  1770  // Asm: VCVTTPD2QQ, CPU Feature: AVX512
  1771  func (x Float64x4) ConvertToInt64() Int64x4
  1772  
  1773  // ConvertToInt64 converts element values to int64.
  1774  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1775  // If a converted result cannot be represented in int64, an implementation-defined
  1776  // architecture-specific value is returned.
  1777  //
  1778  // Asm: VCVTTPD2QQ, CPU Feature: AVX512
  1779  func (x Float64x8) ConvertToInt64() Int64x8
  1780  
  1781  /* ConvertToUint32 */
  1782  
  1783  // ConvertToUint32 converts element values to uint32.
  1784  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1785  // If a converted result cannot be represented in uint32, an implementation-defined
  1786  // architecture-specific value is returned.
  1787  //
  1788  // Asm: VCVTTPS2UDQ, CPU Feature: AVX512
  1789  func (x Float32x4) ConvertToUint32() Uint32x4
  1790  
  1791  // ConvertToUint32 converts element values to uint32.
  1792  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1793  // If a converted result cannot be represented in uint32, an implementation-defined
  1794  // architecture-specific value is returned.
  1795  //
  1796  // Asm: VCVTTPS2UDQ, CPU Feature: AVX512
  1797  func (x Float32x8) ConvertToUint32() Uint32x8
  1798  
  1799  // ConvertToUint32 converts element values to uint32.
  1800  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1801  // If a converted result cannot be represented in uint32, an implementation-defined
  1802  // architecture-specific value is returned.
  1803  //
  1804  // Asm: VCVTTPS2UDQ, CPU Feature: AVX512
  1805  func (x Float32x16) ConvertToUint32() Uint32x16
  1806  
  1807  // ConvertToUint32 converts element values to uint32.
  1808  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1809  // If a converted result cannot be represented in uint32, an implementation-defined
  1810  // architecture-specific value is returned.
  1811  //
  1812  // Asm: VCVTTPD2UDQX, CPU Feature: AVX512
  1813  func (x Float64x2) ConvertToUint32() Uint32x4
  1814  
  1815  // ConvertToUint32 converts element values to uint32.
  1816  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1817  // If a converted result cannot be represented in uint32, an implementation-defined
  1818  // architecture-specific value is returned.
  1819  //
  1820  // Asm: VCVTTPD2UDQY, CPU Feature: AVX512
  1821  func (x Float64x4) ConvertToUint32() Uint32x4
  1822  
  1823  // ConvertToUint32 converts element values to uint32.
  1824  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1825  // If a converted result cannot be represented in uint32, an implementation-defined
  1826  // architecture-specific value is returned.
  1827  //
  1828  // Asm: VCVTTPD2UDQ, CPU Feature: AVX512
  1829  func (x Float64x8) ConvertToUint32() Uint32x8
  1830  
  1831  /* ConvertToUint64 */
  1832  
  1833  // ConvertToUint64 converts element values to uint64.
  1834  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1835  // If a converted result cannot be represented in uint64, an implementation-defined
  1836  // architecture-specific value is returned.
  1837  //
  1838  // Asm: VCVTTPS2UQQ, CPU Feature: AVX512
  1839  func (x Float32x4) ConvertToUint64() Uint64x4
  1840  
  1841  // ConvertToUint64 converts element values to uint64.
  1842  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1843  // If a converted result cannot be represented in uint64, an implementation-defined
  1844  // architecture-specific value is returned.
  1845  //
  1846  // Asm: VCVTTPS2UQQ, CPU Feature: AVX512
  1847  func (x Float32x8) ConvertToUint64() Uint64x8
  1848  
  1849  // ConvertToUint64 converts element values to uint64.
  1850  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1851  // If a converted result cannot be represented in uint64, an implementation-defined
  1852  // architecture-specific value is returned.
  1853  //
  1854  // Asm: VCVTTPD2UQQ, CPU Feature: AVX512
  1855  func (x Float64x2) ConvertToUint64() Uint64x2
  1856  
  1857  // ConvertToUint64 converts element values to uint64.
  1858  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1859  // If a converted result cannot be represented in uint64, an implementation-defined
  1860  // architecture-specific value is returned.
  1861  //
  1862  // Asm: VCVTTPD2UQQ, CPU Feature: AVX512
  1863  func (x Float64x4) ConvertToUint64() Uint64x4
  1864  
  1865  // ConvertToUint64 converts element values to uint64.
  1866  // When a conversion is inexact, a truncated (round toward zero) value is returned.
  1867  // If a converted result cannot be represented in uint64, an implementation-defined
  1868  // architecture-specific value is returned.
  1869  //
  1870  // Asm: VCVTTPD2UQQ, CPU Feature: AVX512
  1871  func (x Float64x8) ConvertToUint64() Uint64x8
  1872  
  1873  /* CopySign */
  1874  
  1875  // CopySign returns the product of the first operand with -1, 0, or 1,
  1876  // whichever constant is nearest to the value of the second operand.
  1877  //
  1878  // Asm: VPSIGNB, CPU Feature: AVX
  1879  func (x Int8x16) CopySign(y Int8x16) Int8x16
  1880  
  1881  // CopySign returns the product of the first operand with -1, 0, or 1,
  1882  // whichever constant is nearest to the value of the second operand.
  1883  //
  1884  // Asm: VPSIGNB, CPU Feature: AVX2
  1885  func (x Int8x32) CopySign(y Int8x32) Int8x32
  1886  
  1887  // CopySign returns the product of the first operand with -1, 0, or 1,
  1888  // whichever constant is nearest to the value of the second operand.
  1889  //
  1890  // Asm: VPSIGNW, CPU Feature: AVX
  1891  func (x Int16x8) CopySign(y Int16x8) Int16x8
  1892  
  1893  // CopySign returns the product of the first operand with -1, 0, or 1,
  1894  // whichever constant is nearest to the value of the second operand.
  1895  //
  1896  // Asm: VPSIGNW, CPU Feature: AVX2
  1897  func (x Int16x16) CopySign(y Int16x16) Int16x16
  1898  
  1899  // CopySign returns the product of the first operand with -1, 0, or 1,
  1900  // whichever constant is nearest to the value of the second operand.
  1901  //
  1902  // Asm: VPSIGND, CPU Feature: AVX
  1903  func (x Int32x4) CopySign(y Int32x4) Int32x4
  1904  
  1905  // CopySign returns the product of the first operand with -1, 0, or 1,
  1906  // whichever constant is nearest to the value of the second operand.
  1907  //
  1908  // Asm: VPSIGND, CPU Feature: AVX2
  1909  func (x Int32x8) CopySign(y Int32x8) Int32x8
  1910  
  1911  /* Div */
  1912  
  1913  // Div divides elements of two vectors.
  1914  //
  1915  // Asm: VDIVPS, CPU Feature: AVX
  1916  func (x Float32x4) Div(y Float32x4) Float32x4
  1917  
  1918  // Div divides elements of two vectors.
  1919  //
  1920  // Asm: VDIVPS, CPU Feature: AVX
  1921  func (x Float32x8) Div(y Float32x8) Float32x8
  1922  
  1923  // Div divides elements of two vectors.
  1924  //
  1925  // Asm: VDIVPS, CPU Feature: AVX512
  1926  func (x Float32x16) Div(y Float32x16) Float32x16
  1927  
  1928  // Div divides elements of two vectors.
  1929  //
  1930  // Asm: VDIVPD, CPU Feature: AVX
  1931  func (x Float64x2) Div(y Float64x2) Float64x2
  1932  
  1933  // Div divides elements of two vectors.
  1934  //
  1935  // Asm: VDIVPD, CPU Feature: AVX
  1936  func (x Float64x4) Div(y Float64x4) Float64x4
  1937  
  1938  // Div divides elements of two vectors.
  1939  //
  1940  // Asm: VDIVPD, CPU Feature: AVX512
  1941  func (x Float64x8) Div(y Float64x8) Float64x8
  1942  
  1943  /* DotProductPairs */
  1944  
  1945  // DotProductPairs multiplies the elements and add the pairs together,
  1946  // yielding a vector of half as many elements with twice the input element size.
  1947  //
  1948  // Asm: VPMADDWD, CPU Feature: AVX
  1949  func (x Int16x8) DotProductPairs(y Int16x8) Int32x4
  1950  
  1951  // DotProductPairs multiplies the elements and add the pairs together,
  1952  // yielding a vector of half as many elements with twice the input element size.
  1953  //
  1954  // Asm: VPMADDWD, CPU Feature: AVX2
  1955  func (x Int16x16) DotProductPairs(y Int16x16) Int32x8
  1956  
  1957  // DotProductPairs multiplies the elements and add the pairs together,
  1958  // yielding a vector of half as many elements with twice the input element size.
  1959  //
  1960  // Asm: VPMADDWD, CPU Feature: AVX512
  1961  func (x Int16x32) DotProductPairs(y Int16x32) Int32x16
  1962  
  1963  /* DotProductPairsSaturated */
  1964  
  1965  // DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
  1966  // yielding a vector of half as many elements with twice the input element size.
  1967  //
  1968  // Asm: VPMADDUBSW, CPU Feature: AVX
  1969  func (x Uint8x16) DotProductPairsSaturated(y Int8x16) Int16x8
  1970  
  1971  // DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
  1972  // yielding a vector of half as many elements with twice the input element size.
  1973  //
  1974  // Asm: VPMADDUBSW, CPU Feature: AVX2
  1975  func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16
  1976  
  1977  // DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
  1978  // yielding a vector of half as many elements with twice the input element size.
  1979  //
  1980  // Asm: VPMADDUBSW, CPU Feature: AVX512
  1981  func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32
  1982  
  1983  /* DotProductQuadruple */
  1984  
  1985  // DotProductQuadruple performs dot products on groups of 4 elements of x and y.
  1986  // DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1987  //
  1988  // Asm: VPDPBUSD, CPU Feature: AVXVNNI
  1989  func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4
  1990  
  1991  // DotProductQuadruple performs dot products on groups of 4 elements of x and y.
  1992  // DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1993  //
  1994  // Asm: VPDPBUSD, CPU Feature: AVXVNNI
  1995  func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8
  1996  
  1997  // DotProductQuadruple performs dot products on groups of 4 elements of x and y.
  1998  // DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1999  //
  2000  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
  2001  func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16
  2002  
  2003  /* DotProductQuadrupleSaturated */
  2004  
  2005  // DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
  2006  // DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  2007  //
  2008  // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
  2009  func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4
  2010  
  2011  // DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
  2012  // DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  2013  //
  2014  // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
  2015  func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8
  2016  
  2017  // DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
  2018  // DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  2019  //
  2020  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
  2021  func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
  2022  
  2023  /* Equal */
  2024  
  2025  // Equal returns x equals y, elementwise.
  2026  //
  2027  // Asm: VPCMPEQB, CPU Feature: AVX
  2028  func (x Int8x16) Equal(y Int8x16) Mask8x16
  2029  
  2030  // Equal returns x equals y, elementwise.
  2031  //
  2032  // Asm: VPCMPEQB, CPU Feature: AVX2
  2033  func (x Int8x32) Equal(y Int8x32) Mask8x32
  2034  
  2035  // Equal returns x equals y, elementwise.
  2036  //
  2037  // Asm: VPCMPEQB, CPU Feature: AVX512
  2038  func (x Int8x64) Equal(y Int8x64) Mask8x64
  2039  
  2040  // Equal returns x equals y, elementwise.
  2041  //
  2042  // Asm: VPCMPEQW, CPU Feature: AVX
  2043  func (x Int16x8) Equal(y Int16x8) Mask16x8
  2044  
  2045  // Equal returns x equals y, elementwise.
  2046  //
  2047  // Asm: VPCMPEQW, CPU Feature: AVX2
  2048  func (x Int16x16) Equal(y Int16x16) Mask16x16
  2049  
  2050  // Equal returns x equals y, elementwise.
  2051  //
  2052  // Asm: VPCMPEQW, CPU Feature: AVX512
  2053  func (x Int16x32) Equal(y Int16x32) Mask16x32
  2054  
  2055  // Equal returns x equals y, elementwise.
  2056  //
  2057  // Asm: VPCMPEQD, CPU Feature: AVX
  2058  func (x Int32x4) Equal(y Int32x4) Mask32x4
  2059  
  2060  // Equal returns x equals y, elementwise.
  2061  //
  2062  // Asm: VPCMPEQD, CPU Feature: AVX2
  2063  func (x Int32x8) Equal(y Int32x8) Mask32x8
  2064  
  2065  // Equal returns x equals y, elementwise.
  2066  //
  2067  // Asm: VPCMPEQD, CPU Feature: AVX512
  2068  func (x Int32x16) Equal(y Int32x16) Mask32x16
  2069  
  2070  // Equal returns x equals y, elementwise.
  2071  //
  2072  // Asm: VPCMPEQQ, CPU Feature: AVX
  2073  func (x Int64x2) Equal(y Int64x2) Mask64x2
  2074  
  2075  // Equal returns x equals y, elementwise.
  2076  //
  2077  // Asm: VPCMPEQQ, CPU Feature: AVX2
  2078  func (x Int64x4) Equal(y Int64x4) Mask64x4
  2079  
  2080  // Equal returns x equals y, elementwise.
  2081  //
  2082  // Asm: VPCMPEQQ, CPU Feature: AVX512
  2083  func (x Int64x8) Equal(y Int64x8) Mask64x8
  2084  
  2085  // Equal returns x equals y, elementwise.
  2086  //
  2087  // Asm: VPCMPEQB, CPU Feature: AVX
  2088  func (x Uint8x16) Equal(y Uint8x16) Mask8x16
  2089  
  2090  // Equal returns x equals y, elementwise.
  2091  //
  2092  // Asm: VPCMPEQB, CPU Feature: AVX2
  2093  func (x Uint8x32) Equal(y Uint8x32) Mask8x32
  2094  
  2095  // Equal returns x equals y, elementwise.
  2096  //
  2097  // Asm: VPCMPEQB, CPU Feature: AVX512
  2098  func (x Uint8x64) Equal(y Uint8x64) Mask8x64
  2099  
  2100  // Equal returns x equals y, elementwise.
  2101  //
  2102  // Asm: VPCMPEQW, CPU Feature: AVX
  2103  func (x Uint16x8) Equal(y Uint16x8) Mask16x8
  2104  
  2105  // Equal returns x equals y, elementwise.
  2106  //
  2107  // Asm: VPCMPEQW, CPU Feature: AVX2
  2108  func (x Uint16x16) Equal(y Uint16x16) Mask16x16
  2109  
  2110  // Equal returns x equals y, elementwise.
  2111  //
  2112  // Asm: VPCMPEQW, CPU Feature: AVX512
  2113  func (x Uint16x32) Equal(y Uint16x32) Mask16x32
  2114  
  2115  // Equal returns x equals y, elementwise.
  2116  //
  2117  // Asm: VPCMPEQD, CPU Feature: AVX
  2118  func (x Uint32x4) Equal(y Uint32x4) Mask32x4
  2119  
  2120  // Equal returns x equals y, elementwise.
  2121  //
  2122  // Asm: VPCMPEQD, CPU Feature: AVX2
  2123  func (x Uint32x8) Equal(y Uint32x8) Mask32x8
  2124  
  2125  // Equal returns x equals y, elementwise.
  2126  //
  2127  // Asm: VPCMPEQD, CPU Feature: AVX512
  2128  func (x Uint32x16) Equal(y Uint32x16) Mask32x16
  2129  
  2130  // Equal returns x equals y, elementwise.
  2131  //
  2132  // Asm: VPCMPEQQ, CPU Feature: AVX
  2133  func (x Uint64x2) Equal(y Uint64x2) Mask64x2
  2134  
  2135  // Equal returns x equals y, elementwise.
  2136  //
  2137  // Asm: VPCMPEQQ, CPU Feature: AVX2
  2138  func (x Uint64x4) Equal(y Uint64x4) Mask64x4
  2139  
  2140  // Equal returns x equals y, elementwise.
  2141  //
  2142  // Asm: VPCMPEQQ, CPU Feature: AVX512
  2143  func (x Uint64x8) Equal(y Uint64x8) Mask64x8
  2144  
  2145  // Equal returns x equals y, elementwise.
  2146  //
  2147  // Asm: VCMPPS, CPU Feature: AVX
  2148  func (x Float32x4) Equal(y Float32x4) Mask32x4
  2149  
  2150  // Equal returns x equals y, elementwise.
  2151  //
  2152  // Asm: VCMPPS, CPU Feature: AVX
  2153  func (x Float32x8) Equal(y Float32x8) Mask32x8
  2154  
  2155  // Equal returns x equals y, elementwise.
  2156  //
  2157  // Asm: VCMPPS, CPU Feature: AVX512
  2158  func (x Float32x16) Equal(y Float32x16) Mask32x16
  2159  
  2160  // Equal returns x equals y, elementwise.
  2161  //
  2162  // Asm: VCMPPD, CPU Feature: AVX
  2163  func (x Float64x2) Equal(y Float64x2) Mask64x2
  2164  
  2165  // Equal returns x equals y, elementwise.
  2166  //
  2167  // Asm: VCMPPD, CPU Feature: AVX
  2168  func (x Float64x4) Equal(y Float64x4) Mask64x4
  2169  
  2170  // Equal returns x equals y, elementwise.
  2171  //
  2172  // Asm: VCMPPD, CPU Feature: AVX512
  2173  func (x Float64x8) Equal(y Float64x8) Mask64x8
  2174  
  2175  /* Expand */
  2176  
  2177  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2178  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2179  //
  2180  // Asm: VEXPANDPS, CPU Feature: AVX512
  2181  func (x Float32x4) Expand(mask Mask32x4) Float32x4
  2182  
  2183  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2184  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2185  //
  2186  // Asm: VEXPANDPS, CPU Feature: AVX512
  2187  func (x Float32x8) Expand(mask Mask32x8) Float32x8
  2188  
  2189  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2190  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2191  //
  2192  // Asm: VEXPANDPS, CPU Feature: AVX512
  2193  func (x Float32x16) Expand(mask Mask32x16) Float32x16
  2194  
  2195  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2196  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2197  //
  2198  // Asm: VEXPANDPD, CPU Feature: AVX512
  2199  func (x Float64x2) Expand(mask Mask64x2) Float64x2
  2200  
  2201  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2202  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2203  //
  2204  // Asm: VEXPANDPD, CPU Feature: AVX512
  2205  func (x Float64x4) Expand(mask Mask64x4) Float64x4
  2206  
  2207  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2208  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2209  //
  2210  // Asm: VEXPANDPD, CPU Feature: AVX512
  2211  func (x Float64x8) Expand(mask Mask64x8) Float64x8
  2212  
  2213  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2214  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2215  //
  2216  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2217  func (x Int8x16) Expand(mask Mask8x16) Int8x16
  2218  
  2219  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2220  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2221  //
  2222  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2223  func (x Int8x32) Expand(mask Mask8x32) Int8x32
  2224  
  2225  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2226  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2227  //
  2228  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2229  func (x Int8x64) Expand(mask Mask8x64) Int8x64
  2230  
  2231  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2232  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2233  //
  2234  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2235  func (x Int16x8) Expand(mask Mask16x8) Int16x8
  2236  
  2237  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2238  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2239  //
  2240  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2241  func (x Int16x16) Expand(mask Mask16x16) Int16x16
  2242  
  2243  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2244  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2245  //
  2246  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2247  func (x Int16x32) Expand(mask Mask16x32) Int16x32
  2248  
  2249  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2250  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2251  //
  2252  // Asm: VPEXPANDD, CPU Feature: AVX512
  2253  func (x Int32x4) Expand(mask Mask32x4) Int32x4
  2254  
  2255  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2256  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2257  //
  2258  // Asm: VPEXPANDD, CPU Feature: AVX512
  2259  func (x Int32x8) Expand(mask Mask32x8) Int32x8
  2260  
  2261  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2262  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2263  //
  2264  // Asm: VPEXPANDD, CPU Feature: AVX512
  2265  func (x Int32x16) Expand(mask Mask32x16) Int32x16
  2266  
  2267  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2268  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2269  //
  2270  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2271  func (x Int64x2) Expand(mask Mask64x2) Int64x2
  2272  
  2273  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2274  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2275  //
  2276  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2277  func (x Int64x4) Expand(mask Mask64x4) Int64x4
  2278  
  2279  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2280  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2281  //
  2282  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2283  func (x Int64x8) Expand(mask Mask64x8) Int64x8
  2284  
  2285  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2286  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2287  //
  2288  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2289  func (x Uint8x16) Expand(mask Mask8x16) Uint8x16
  2290  
  2291  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2292  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2293  //
  2294  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2295  func (x Uint8x32) Expand(mask Mask8x32) Uint8x32
  2296  
  2297  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2298  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2299  //
  2300  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2301  func (x Uint8x64) Expand(mask Mask8x64) Uint8x64
  2302  
  2303  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2304  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2305  //
  2306  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2307  func (x Uint16x8) Expand(mask Mask16x8) Uint16x8
  2308  
  2309  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2310  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2311  //
  2312  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2313  func (x Uint16x16) Expand(mask Mask16x16) Uint16x16
  2314  
  2315  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2316  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2317  //
  2318  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2319  func (x Uint16x32) Expand(mask Mask16x32) Uint16x32
  2320  
  2321  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2322  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2323  //
  2324  // Asm: VPEXPANDD, CPU Feature: AVX512
  2325  func (x Uint32x4) Expand(mask Mask32x4) Uint32x4
  2326  
  2327  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2328  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2329  //
  2330  // Asm: VPEXPANDD, CPU Feature: AVX512
  2331  func (x Uint32x8) Expand(mask Mask32x8) Uint32x8
  2332  
  2333  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2334  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2335  //
  2336  // Asm: VPEXPANDD, CPU Feature: AVX512
  2337  func (x Uint32x16) Expand(mask Mask32x16) Uint32x16
  2338  
  2339  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2340  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2341  //
  2342  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2343  func (x Uint64x2) Expand(mask Mask64x2) Uint64x2
  2344  
  2345  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2346  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2347  //
  2348  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2349  func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
  2350  
  2351  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2352  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2353  //
  2354  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2355  func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
  2356  
  2357  /* ExtendLo2ToInt64x2 */
  2358  
  2359  // ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
  2360  // The result vector's elements are sign-extended.
  2361  //
  2362  // Asm: VPMOVSXBQ, CPU Feature: AVX
  2363  func (x Int8x16) ExtendLo2ToInt64x2() Int64x2
  2364  
  2365  // ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
  2366  // The result vector's elements are sign-extended.
  2367  //
  2368  // Asm: VPMOVSXWQ, CPU Feature: AVX
  2369  func (x Int16x8) ExtendLo2ToInt64x2() Int64x2
  2370  
  2371  // ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
  2372  // The result vector's elements are sign-extended.
  2373  //
  2374  // Asm: VPMOVSXDQ, CPU Feature: AVX
  2375  func (x Int32x4) ExtendLo2ToInt64x2() Int64x2
  2376  
  2377  /* ExtendLo2ToUint64x2 */
  2378  
  2379  // ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
  2380  // The result vector's elements are zero-extended.
  2381  //
  2382  // Asm: VPMOVZXBQ, CPU Feature: AVX
  2383  func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2
  2384  
  2385  // ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
  2386  // The result vector's elements are zero-extended.
  2387  //
  2388  // Asm: VPMOVZXWQ, CPU Feature: AVX
  2389  func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2
  2390  
  2391  // ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
  2392  // The result vector's elements are zero-extended.
  2393  //
  2394  // Asm: VPMOVZXDQ, CPU Feature: AVX
  2395  func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2
  2396  
  2397  /* ExtendLo4ToInt32x4 */
  2398  
  2399  // ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
  2400  // The result vector's elements are sign-extended.
  2401  //
  2402  // Asm: VPMOVSXBD, CPU Feature: AVX
  2403  func (x Int8x16) ExtendLo4ToInt32x4() Int32x4
  2404  
  2405  // ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
  2406  // The result vector's elements are sign-extended.
  2407  //
  2408  // Asm: VPMOVSXWD, CPU Feature: AVX
  2409  func (x Int16x8) ExtendLo4ToInt32x4() Int32x4
  2410  
  2411  /* ExtendLo4ToInt64x4 */
  2412  
  2413  // ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
  2414  // The result vector's elements are sign-extended.
  2415  //
  2416  // Asm: VPMOVSXBQ, CPU Feature: AVX2
  2417  func (x Int8x16) ExtendLo4ToInt64x4() Int64x4
  2418  
  2419  // ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
  2420  // The result vector's elements are sign-extended.
  2421  //
  2422  // Asm: VPMOVSXWQ, CPU Feature: AVX2
  2423  func (x Int16x8) ExtendLo4ToInt64x4() Int64x4
  2424  
  2425  /* ExtendLo4ToUint32x4 */
  2426  
  2427  // ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
  2428  // The result vector's elements are zero-extended.
  2429  //
  2430  // Asm: VPMOVZXBD, CPU Feature: AVX
  2431  func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4
  2432  
  2433  // ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
  2434  // The result vector's elements are zero-extended.
  2435  //
  2436  // Asm: VPMOVZXWD, CPU Feature: AVX
  2437  func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4
  2438  
  2439  /* ExtendLo4ToUint64x4 */
  2440  
  2441  // ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
  2442  // The result vector's elements are zero-extended.
  2443  //
  2444  // Asm: VPMOVZXBQ, CPU Feature: AVX2
  2445  func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4
  2446  
  2447  // ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
  2448  // The result vector's elements are zero-extended.
  2449  //
  2450  // Asm: VPMOVZXWQ, CPU Feature: AVX2
  2451  func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4
  2452  
  2453  /* ExtendLo8ToInt16x8 */
  2454  
  2455  // ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16.
  2456  // The result vector's elements are sign-extended.
  2457  //
  2458  // Asm: VPMOVSXBW, CPU Feature: AVX
  2459  func (x Int8x16) ExtendLo8ToInt16x8() Int16x8
  2460  
  2461  /* ExtendLo8ToInt32x8 */
  2462  
  2463  // ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32.
  2464  // The result vector's elements are sign-extended.
  2465  //
  2466  // Asm: VPMOVSXBD, CPU Feature: AVX2
  2467  func (x Int8x16) ExtendLo8ToInt32x8() Int32x8
  2468  
  2469  /* ExtendLo8ToInt64x8 */
  2470  
  2471  // ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64.
  2472  // The result vector's elements are sign-extended.
  2473  //
  2474  // Asm: VPMOVSXBQ, CPU Feature: AVX512
  2475  func (x Int8x16) ExtendLo8ToInt64x8() Int64x8
  2476  
  2477  /* ExtendLo8ToUint16x8 */
  2478  
  2479  // ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16.
  2480  // The result vector's elements are zero-extended.
  2481  //
  2482  // Asm: VPMOVZXBW, CPU Feature: AVX
  2483  func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8
  2484  
  2485  /* ExtendLo8ToUint32x8 */
  2486  
  2487  // ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32.
  2488  // The result vector's elements are zero-extended.
  2489  //
  2490  // Asm: VPMOVZXBD, CPU Feature: AVX2
  2491  func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8
  2492  
  2493  /* ExtendLo8ToUint64x8 */
  2494  
  2495  // ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64.
  2496  // The result vector's elements are zero-extended.
  2497  //
  2498  // Asm: VPMOVZXBQ, CPU Feature: AVX512
  2499  func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8
  2500  
  2501  /* ExtendToInt16 */
  2502  
  2503  // ExtendToInt16 converts element values to int16.
  2504  // The result vector's elements are sign-extended.
  2505  //
  2506  // Asm: VPMOVSXBW, CPU Feature: AVX2
  2507  func (x Int8x16) ExtendToInt16() Int16x16
  2508  
  2509  // ExtendToInt16 converts element values to int16.
  2510  // The result vector's elements are sign-extended.
  2511  //
  2512  // Asm: VPMOVSXBW, CPU Feature: AVX512
  2513  func (x Int8x32) ExtendToInt16() Int16x32
  2514  
  2515  /* ExtendToInt32 */
  2516  
  2517  // ExtendToInt32 converts element values to int32.
  2518  // The result vector's elements are sign-extended.
  2519  //
  2520  // Asm: VPMOVSXBD, CPU Feature: AVX512
  2521  func (x Int8x16) ExtendToInt32() Int32x16
  2522  
  2523  // ExtendToInt32 converts element values to int32.
  2524  // The result vector's elements are sign-extended.
  2525  //
  2526  // Asm: VPMOVSXWD, CPU Feature: AVX2
  2527  func (x Int16x8) ExtendToInt32() Int32x8
  2528  
  2529  // ExtendToInt32 converts element values to int32.
  2530  // The result vector's elements are sign-extended.
  2531  //
  2532  // Asm: VPMOVSXWD, CPU Feature: AVX512
  2533  func (x Int16x16) ExtendToInt32() Int32x16
  2534  
  2535  /* ExtendToInt64 */
  2536  
  2537  // ExtendToInt64 converts element values to int64.
  2538  // The result vector's elements are sign-extended.
  2539  //
  2540  // Asm: VPMOVSXWQ, CPU Feature: AVX512
  2541  func (x Int16x8) ExtendToInt64() Int64x8
  2542  
  2543  // ExtendToInt64 converts element values to int64.
  2544  // The result vector's elements are sign-extended.
  2545  //
  2546  // Asm: VPMOVSXDQ, CPU Feature: AVX2
  2547  func (x Int32x4) ExtendToInt64() Int64x4
  2548  
  2549  // ExtendToInt64 converts element values to int64.
  2550  // The result vector's elements are sign-extended.
  2551  //
  2552  // Asm: VPMOVSXDQ, CPU Feature: AVX512
  2553  func (x Int32x8) ExtendToInt64() Int64x8
  2554  
  2555  /* ExtendToUint16 */
  2556  
  2557  // ExtendToUint16 converts element values to uint16.
  2558  // The result vector's elements are zero-extended.
  2559  //
  2560  // Asm: VPMOVZXBW, CPU Feature: AVX2
  2561  func (x Uint8x16) ExtendToUint16() Uint16x16
  2562  
  2563  // ExtendToUint16 converts element values to uint16.
  2564  // The result vector's elements are zero-extended.
  2565  //
  2566  // Asm: VPMOVZXBW, CPU Feature: AVX512
  2567  func (x Uint8x32) ExtendToUint16() Uint16x32
  2568  
  2569  /* ExtendToUint32 */
  2570  
  2571  // ExtendToUint32 converts element values to uint32.
  2572  // The result vector's elements are zero-extended.
  2573  //
  2574  // Asm: VPMOVZXBD, CPU Feature: AVX512
  2575  func (x Uint8x16) ExtendToUint32() Uint32x16
  2576  
  2577  // ExtendToUint32 converts element values to uint32.
  2578  // The result vector's elements are zero-extended.
  2579  //
  2580  // Asm: VPMOVZXWD, CPU Feature: AVX2
  2581  func (x Uint16x8) ExtendToUint32() Uint32x8
  2582  
  2583  // ExtendToUint32 converts element values to uint32.
  2584  // The result vector's elements are zero-extended.
  2585  //
  2586  // Asm: VPMOVZXWD, CPU Feature: AVX512
  2587  func (x Uint16x16) ExtendToUint32() Uint32x16
  2588  
  2589  /* ExtendToUint64 */
  2590  
  2591  // ExtendToUint64 converts element values to uint64.
  2592  // The result vector's elements are zero-extended.
  2593  //
  2594  // Asm: VPMOVZXWQ, CPU Feature: AVX512
  2595  func (x Uint16x8) ExtendToUint64() Uint64x8
  2596  
  2597  // ExtendToUint64 converts element values to uint64.
  2598  // The result vector's elements are zero-extended.
  2599  //
  2600  // Asm: VPMOVZXDQ, CPU Feature: AVX2
  2601  func (x Uint32x4) ExtendToUint64() Uint64x4
  2602  
  2603  // ExtendToUint64 converts element values to uint64.
  2604  // The result vector's elements are zero-extended.
  2605  //
  2606  // Asm: VPMOVZXDQ, CPU Feature: AVX512
  2607  func (x Uint32x8) ExtendToUint64() Uint64x8
  2608  
  2609  /* Floor */
  2610  
  2611  // Floor rounds elements down to the nearest integer.
  2612  //
  2613  // Asm: VROUNDPS, CPU Feature: AVX
  2614  func (x Float32x4) Floor() Float32x4
  2615  
  2616  // Floor rounds elements down to the nearest integer.
  2617  //
  2618  // Asm: VROUNDPS, CPU Feature: AVX
  2619  func (x Float32x8) Floor() Float32x8
  2620  
  2621  // Floor rounds elements down to the nearest integer.
  2622  //
  2623  // Asm: VROUNDPD, CPU Feature: AVX
  2624  func (x Float64x2) Floor() Float64x2
  2625  
  2626  // Floor rounds elements down to the nearest integer.
  2627  //
  2628  // Asm: VROUNDPD, CPU Feature: AVX
  2629  func (x Float64x4) Floor() Float64x4
  2630  
  2631  /* FloorScaled */
  2632  
  2633  // FloorScaled rounds elements down with specified precision.
  2634  //
  2635  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2636  //
  2637  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  2638  func (x Float32x4) FloorScaled(prec uint8) Float32x4
  2639  
  2640  // FloorScaled rounds elements down with specified precision.
  2641  //
  2642  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2643  //
  2644  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  2645  func (x Float32x8) FloorScaled(prec uint8) Float32x8
  2646  
  2647  // FloorScaled rounds elements down with specified precision.
  2648  //
  2649  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2650  //
  2651  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  2652  func (x Float32x16) FloorScaled(prec uint8) Float32x16
  2653  
  2654  // FloorScaled rounds elements down with specified precision.
  2655  //
  2656  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2657  //
  2658  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  2659  func (x Float64x2) FloorScaled(prec uint8) Float64x2
  2660  
  2661  // FloorScaled rounds elements down with specified precision.
  2662  //
  2663  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2664  //
  2665  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  2666  func (x Float64x4) FloorScaled(prec uint8) Float64x4
  2667  
  2668  // FloorScaled rounds elements down with specified precision.
  2669  //
  2670  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2671  //
  2672  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  2673  func (x Float64x8) FloorScaled(prec uint8) Float64x8
  2674  
  2675  /* FloorScaledResidue */
  2676  
  2677  // FloorScaledResidue computes the difference after flooring with specified precision.
  2678  //
  2679  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2680  //
  2681  // Asm: VREDUCEPS, CPU Feature: AVX512
  2682  func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
  2683  
  2684  // FloorScaledResidue computes the difference after flooring with specified precision.
  2685  //
  2686  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2687  //
  2688  // Asm: VREDUCEPS, CPU Feature: AVX512
  2689  func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
  2690  
  2691  // FloorScaledResidue computes the difference after flooring with specified precision.
  2692  //
  2693  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2694  //
  2695  // Asm: VREDUCEPS, CPU Feature: AVX512
  2696  func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
  2697  
  2698  // FloorScaledResidue computes the difference after flooring with specified precision.
  2699  //
  2700  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2701  //
  2702  // Asm: VREDUCEPD, CPU Feature: AVX512
  2703  func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
  2704  
  2705  // FloorScaledResidue computes the difference after flooring with specified precision.
  2706  //
  2707  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2708  //
  2709  // Asm: VREDUCEPD, CPU Feature: AVX512
  2710  func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
  2711  
  2712  // FloorScaledResidue computes the difference after flooring with specified precision.
  2713  //
  2714  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2715  //
  2716  // Asm: VREDUCEPD, CPU Feature: AVX512
  2717  func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
  2718  
  2719  /* GaloisFieldAffineTransform */
  2720  
  2721  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
  2722  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2723  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2724  // corresponding to a group of 8 elements in x.
  2725  //
  2726  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2727  //
  2728  // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
  2729  func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16
  2730  
  2731  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
  2732  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2733  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2734  // corresponding to a group of 8 elements in x.
  2735  //
  2736  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2737  //
  2738  // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
  2739  func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32
  2740  
  2741  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
  2742  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2743  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2744  // corresponding to a group of 8 elements in x.
  2745  //
  2746  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2747  //
  2748  // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
  2749  func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64
  2750  
  2751  /* GaloisFieldAffineTransformInverse */
  2752  
  2753  // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
  2754  // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
  2755  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2756  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2757  // corresponding to a group of 8 elements in x.
  2758  //
  2759  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2760  //
  2761  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  2762  func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x16
  2763  
  2764  // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
  2765  // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
  2766  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2767  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2768  // corresponding to a group of 8 elements in x.
  2769  //
  2770  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2771  //
  2772  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  2773  func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x32
  2774  
  2775  // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
  2776  // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
  2777  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2778  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2779  // corresponding to a group of 8 elements in x.
  2780  //
  2781  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2782  //
  2783  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  2784  func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
  2785  
  2786  /* GaloisFieldMul */
  2787  
  2788  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
  2789  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  2790  //
  2791  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  2792  func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16
  2793  
  2794  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
  2795  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  2796  //
  2797  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  2798  func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
  2799  
  2800  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
  2801  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  2802  //
  2803  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  2804  func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
  2805  
  2806  /* GetElem */
  2807  
  2808  // GetElem retrieves a single constant-indexed element's value.
  2809  //
  2810  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2811  //
  2812  // Asm: VPEXTRD, CPU Feature: AVX
  2813  func (x Float32x4) GetElem(index uint8) float32
  2814  
  2815  // GetElem retrieves a single constant-indexed element's value.
  2816  //
  2817  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2818  //
  2819  // Asm: VPEXTRQ, CPU Feature: AVX
  2820  func (x Float64x2) GetElem(index uint8) float64
  2821  
  2822  // GetElem retrieves a single constant-indexed element's value.
  2823  //
  2824  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2825  //
  2826  // Asm: VPEXTRB, CPU Feature: AVX512
  2827  func (x Int8x16) GetElem(index uint8) int8
  2828  
  2829  // GetElem retrieves a single constant-indexed element's value.
  2830  //
  2831  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2832  //
  2833  // Asm: VPEXTRW, CPU Feature: AVX512
  2834  func (x Int16x8) GetElem(index uint8) int16
  2835  
  2836  // GetElem retrieves a single constant-indexed element's value.
  2837  //
  2838  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2839  //
  2840  // Asm: VPEXTRD, CPU Feature: AVX
  2841  func (x Int32x4) GetElem(index uint8) int32
  2842  
  2843  // GetElem retrieves a single constant-indexed element's value.
  2844  //
  2845  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2846  //
  2847  // Asm: VPEXTRQ, CPU Feature: AVX
  2848  func (x Int64x2) GetElem(index uint8) int64
  2849  
  2850  // GetElem retrieves a single constant-indexed element's value.
  2851  //
  2852  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2853  //
  2854  // Asm: VPEXTRB, CPU Feature: AVX512
  2855  func (x Uint8x16) GetElem(index uint8) uint8
  2856  
  2857  // GetElem retrieves a single constant-indexed element's value.
  2858  //
  2859  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2860  //
  2861  // Asm: VPEXTRW, CPU Feature: AVX512
  2862  func (x Uint16x8) GetElem(index uint8) uint16
  2863  
  2864  // GetElem retrieves a single constant-indexed element's value.
  2865  //
  2866  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2867  //
  2868  // Asm: VPEXTRD, CPU Feature: AVX
  2869  func (x Uint32x4) GetElem(index uint8) uint32
  2870  
  2871  // GetElem retrieves a single constant-indexed element's value.
  2872  //
  2873  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2874  //
  2875  // Asm: VPEXTRQ, CPU Feature: AVX
  2876  func (x Uint64x2) GetElem(index uint8) uint64
  2877  
  2878  /* GetHi */
  2879  
  2880  // GetHi returns the upper half of x.
  2881  //
  2882  // Asm: VEXTRACTF128, CPU Feature: AVX
  2883  func (x Float32x8) GetHi() Float32x4
  2884  
  2885  // GetHi returns the upper half of x.
  2886  //
  2887  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2888  func (x Float32x16) GetHi() Float32x8
  2889  
  2890  // GetHi returns the upper half of x.
  2891  //
  2892  // Asm: VEXTRACTF128, CPU Feature: AVX
  2893  func (x Float64x4) GetHi() Float64x2
  2894  
  2895  // GetHi returns the upper half of x.
  2896  //
  2897  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2898  func (x Float64x8) GetHi() Float64x4
  2899  
  2900  // GetHi returns the upper half of x.
  2901  //
  2902  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2903  func (x Int8x32) GetHi() Int8x16
  2904  
  2905  // GetHi returns the upper half of x.
  2906  //
  2907  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2908  func (x Int8x64) GetHi() Int8x32
  2909  
  2910  // GetHi returns the upper half of x.
  2911  //
  2912  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2913  func (x Int16x16) GetHi() Int16x8
  2914  
  2915  // GetHi returns the upper half of x.
  2916  //
  2917  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2918  func (x Int16x32) GetHi() Int16x16
  2919  
  2920  // GetHi returns the upper half of x.
  2921  //
  2922  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2923  func (x Int32x8) GetHi() Int32x4
  2924  
  2925  // GetHi returns the upper half of x.
  2926  //
  2927  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2928  func (x Int32x16) GetHi() Int32x8
  2929  
  2930  // GetHi returns the upper half of x.
  2931  //
  2932  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2933  func (x Int64x4) GetHi() Int64x2
  2934  
  2935  // GetHi returns the upper half of x.
  2936  //
  2937  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2938  func (x Int64x8) GetHi() Int64x4
  2939  
  2940  // GetHi returns the upper half of x.
  2941  //
  2942  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2943  func (x Uint8x32) GetHi() Uint8x16
  2944  
  2945  // GetHi returns the upper half of x.
  2946  //
  2947  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2948  func (x Uint8x64) GetHi() Uint8x32
  2949  
  2950  // GetHi returns the upper half of x.
  2951  //
  2952  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2953  func (x Uint16x16) GetHi() Uint16x8
  2954  
  2955  // GetHi returns the upper half of x.
  2956  //
  2957  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2958  func (x Uint16x32) GetHi() Uint16x16
  2959  
  2960  // GetHi returns the upper half of x.
  2961  //
  2962  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2963  func (x Uint32x8) GetHi() Uint32x4
  2964  
  2965  // GetHi returns the upper half of x.
  2966  //
  2967  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2968  func (x Uint32x16) GetHi() Uint32x8
  2969  
  2970  // GetHi returns the upper half of x.
  2971  //
  2972  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2973  func (x Uint64x4) GetHi() Uint64x2
  2974  
  2975  // GetHi returns the upper half of x.
  2976  //
  2977  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2978  func (x Uint64x8) GetHi() Uint64x4
  2979  
  2980  /* GetLo */
  2981  
  2982  // GetLo returns the lower half of x.
  2983  //
  2984  // Asm: VEXTRACTF128, CPU Feature: AVX
  2985  func (x Float32x8) GetLo() Float32x4
  2986  
  2987  // GetLo returns the lower half of x.
  2988  //
  2989  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2990  func (x Float32x16) GetLo() Float32x8
  2991  
  2992  // GetLo returns the lower half of x.
  2993  //
  2994  // Asm: VEXTRACTF128, CPU Feature: AVX
  2995  func (x Float64x4) GetLo() Float64x2
  2996  
  2997  // GetLo returns the lower half of x.
  2998  //
  2999  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  3000  func (x Float64x8) GetLo() Float64x4
  3001  
  3002  // GetLo returns the lower half of x.
  3003  //
  3004  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3005  func (x Int8x32) GetLo() Int8x16
  3006  
  3007  // GetLo returns the lower half of x.
  3008  //
  3009  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3010  func (x Int8x64) GetLo() Int8x32
  3011  
  3012  // GetLo returns the lower half of x.
  3013  //
  3014  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3015  func (x Int16x16) GetLo() Int16x8
  3016  
  3017  // GetLo returns the lower half of x.
  3018  //
  3019  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3020  func (x Int16x32) GetLo() Int16x16
  3021  
  3022  // GetLo returns the lower half of x.
  3023  //
  3024  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3025  func (x Int32x8) GetLo() Int32x4
  3026  
  3027  // GetLo returns the lower half of x.
  3028  //
  3029  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3030  func (x Int32x16) GetLo() Int32x8
  3031  
  3032  // GetLo returns the lower half of x.
  3033  //
  3034  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3035  func (x Int64x4) GetLo() Int64x2
  3036  
  3037  // GetLo returns the lower half of x.
  3038  //
  3039  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3040  func (x Int64x8) GetLo() Int64x4
  3041  
  3042  // GetLo returns the lower half of x.
  3043  //
  3044  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3045  func (x Uint8x32) GetLo() Uint8x16
  3046  
  3047  // GetLo returns the lower half of x.
  3048  //
  3049  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3050  func (x Uint8x64) GetLo() Uint8x32
  3051  
  3052  // GetLo returns the lower half of x.
  3053  //
  3054  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3055  func (x Uint16x16) GetLo() Uint16x8
  3056  
  3057  // GetLo returns the lower half of x.
  3058  //
  3059  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3060  func (x Uint16x32) GetLo() Uint16x16
  3061  
  3062  // GetLo returns the lower half of x.
  3063  //
  3064  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3065  func (x Uint32x8) GetLo() Uint32x4
  3066  
  3067  // GetLo returns the lower half of x.
  3068  //
  3069  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3070  func (x Uint32x16) GetLo() Uint32x8
  3071  
  3072  // GetLo returns the lower half of x.
  3073  //
  3074  // Asm: VEXTRACTI128, CPU Feature: AVX2
  3075  func (x Uint64x4) GetLo() Uint64x2
  3076  
  3077  // GetLo returns the lower half of x.
  3078  //
  3079  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  3080  func (x Uint64x8) GetLo() Uint64x4
  3081  
  3082  /* Greater */
  3083  
  3084  // Greater returns x greater-than y, elementwise.
  3085  //
  3086  // Asm: VPCMPGTB, CPU Feature: AVX
  3087  func (x Int8x16) Greater(y Int8x16) Mask8x16
  3088  
  3089  // Greater returns x greater-than y, elementwise.
  3090  //
  3091  // Asm: VPCMPGTB, CPU Feature: AVX2
  3092  func (x Int8x32) Greater(y Int8x32) Mask8x32
  3093  
  3094  // Greater returns x greater-than y, elementwise.
  3095  //
  3096  // Asm: VPCMPGTB, CPU Feature: AVX512
  3097  func (x Int8x64) Greater(y Int8x64) Mask8x64
  3098  
  3099  // Greater returns x greater-than y, elementwise.
  3100  //
  3101  // Asm: VPCMPGTW, CPU Feature: AVX
  3102  func (x Int16x8) Greater(y Int16x8) Mask16x8
  3103  
  3104  // Greater returns x greater-than y, elementwise.
  3105  //
  3106  // Asm: VPCMPGTW, CPU Feature: AVX2
  3107  func (x Int16x16) Greater(y Int16x16) Mask16x16
  3108  
  3109  // Greater returns x greater-than y, elementwise.
  3110  //
  3111  // Asm: VPCMPGTW, CPU Feature: AVX512
  3112  func (x Int16x32) Greater(y Int16x32) Mask16x32
  3113  
  3114  // Greater returns x greater-than y, elementwise.
  3115  //
  3116  // Asm: VPCMPGTD, CPU Feature: AVX
  3117  func (x Int32x4) Greater(y Int32x4) Mask32x4
  3118  
  3119  // Greater returns x greater-than y, elementwise.
  3120  //
  3121  // Asm: VPCMPGTD, CPU Feature: AVX2
  3122  func (x Int32x8) Greater(y Int32x8) Mask32x8
  3123  
  3124  // Greater returns x greater-than y, elementwise.
  3125  //
  3126  // Asm: VPCMPGTD, CPU Feature: AVX512
  3127  func (x Int32x16) Greater(y Int32x16) Mask32x16
  3128  
  3129  // Greater returns x greater-than y, elementwise.
  3130  //
  3131  // Asm: VPCMPGTQ, CPU Feature: AVX
  3132  func (x Int64x2) Greater(y Int64x2) Mask64x2
  3133  
  3134  // Greater returns x greater-than y, elementwise.
  3135  //
  3136  // Asm: VPCMPGTQ, CPU Feature: AVX2
  3137  func (x Int64x4) Greater(y Int64x4) Mask64x4
  3138  
  3139  // Greater returns x greater-than y, elementwise.
  3140  //
  3141  // Asm: VPCMPGTQ, CPU Feature: AVX512
  3142  func (x Int64x8) Greater(y Int64x8) Mask64x8
  3143  
  3144  // Greater returns x greater-than y, elementwise.
  3145  //
  3146  // Asm: VCMPPS, CPU Feature: AVX
  3147  func (x Float32x4) Greater(y Float32x4) Mask32x4
  3148  
  3149  // Greater returns x greater-than y, elementwise.
  3150  //
  3151  // Asm: VCMPPS, CPU Feature: AVX
  3152  func (x Float32x8) Greater(y Float32x8) Mask32x8
  3153  
  3154  // Greater returns x greater-than y, elementwise.
  3155  //
  3156  // Asm: VCMPPS, CPU Feature: AVX512
  3157  func (x Float32x16) Greater(y Float32x16) Mask32x16
  3158  
  3159  // Greater returns x greater-than y, elementwise.
  3160  //
  3161  // Asm: VCMPPD, CPU Feature: AVX
  3162  func (x Float64x2) Greater(y Float64x2) Mask64x2
  3163  
  3164  // Greater returns x greater-than y, elementwise.
  3165  //
  3166  // Asm: VCMPPD, CPU Feature: AVX
  3167  func (x Float64x4) Greater(y Float64x4) Mask64x4
  3168  
  3169  // Greater returns x greater-than y, elementwise.
  3170  //
  3171  // Asm: VCMPPD, CPU Feature: AVX512
  3172  func (x Float64x8) Greater(y Float64x8) Mask64x8
  3173  
  3174  // Greater returns x greater-than y, elementwise.
  3175  //
  3176  // Asm: VPCMPUB, CPU Feature: AVX512
  3177  func (x Uint8x64) Greater(y Uint8x64) Mask8x64
  3178  
  3179  // Greater returns x greater-than y, elementwise.
  3180  //
  3181  // Asm: VPCMPUW, CPU Feature: AVX512
  3182  func (x Uint16x32) Greater(y Uint16x32) Mask16x32
  3183  
  3184  // Greater returns x greater-than y, elementwise.
  3185  //
  3186  // Asm: VPCMPUD, CPU Feature: AVX512
  3187  func (x Uint32x16) Greater(y Uint32x16) Mask32x16
  3188  
  3189  // Greater returns x greater-than y, elementwise.
  3190  //
  3191  // Asm: VPCMPUQ, CPU Feature: AVX512
  3192  func (x Uint64x8) Greater(y Uint64x8) Mask64x8
  3193  
  3194  /* GreaterEqual */
  3195  
  3196  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3197  //
  3198  // Asm: VCMPPS, CPU Feature: AVX
  3199  func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
  3200  
  3201  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3202  //
  3203  // Asm: VCMPPS, CPU Feature: AVX
  3204  func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
  3205  
  3206  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3207  //
  3208  // Asm: VCMPPS, CPU Feature: AVX512
  3209  func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
  3210  
  3211  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3212  //
  3213  // Asm: VCMPPD, CPU Feature: AVX
  3214  func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
  3215  
  3216  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3217  //
  3218  // Asm: VCMPPD, CPU Feature: AVX
  3219  func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
  3220  
  3221  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3222  //
  3223  // Asm: VCMPPD, CPU Feature: AVX512
  3224  func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
  3225  
  3226  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3227  //
  3228  // Asm: VPCMPB, CPU Feature: AVX512
  3229  func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
  3230  
  3231  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3232  //
  3233  // Asm: VPCMPW, CPU Feature: AVX512
  3234  func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
  3235  
  3236  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3237  //
  3238  // Asm: VPCMPD, CPU Feature: AVX512
  3239  func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
  3240  
  3241  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3242  //
  3243  // Asm: VPCMPQ, CPU Feature: AVX512
  3244  func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
  3245  
  3246  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3247  //
  3248  // Asm: VPCMPUB, CPU Feature: AVX512
  3249  func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
  3250  
  3251  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3252  //
  3253  // Asm: VPCMPUW, CPU Feature: AVX512
  3254  func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
  3255  
  3256  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3257  //
  3258  // Asm: VPCMPUD, CPU Feature: AVX512
  3259  func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
  3260  
  3261  // GreaterEqual returns x greater-than-or-equals y, elementwise.
  3262  //
  3263  // Asm: VPCMPUQ, CPU Feature: AVX512
  3264  func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
  3265  
  3266  /* InterleaveHi */
  3267  
  3268  // InterleaveHi interleaves the elements of the high halves of x and y.
  3269  //
  3270  // Asm: VPUNPCKHWD, CPU Feature: AVX
  3271  func (x Int16x8) InterleaveHi(y Int16x8) Int16x8
  3272  
  3273  // InterleaveHi interleaves the elements of the high halves of x and y.
  3274  //
  3275  // Asm: VPUNPCKHDQ, CPU Feature: AVX
  3276  func (x Int32x4) InterleaveHi(y Int32x4) Int32x4
  3277  
  3278  // InterleaveHi interleaves the elements of the high halves of x and y.
  3279  //
  3280  // Asm: VPUNPCKHQDQ, CPU Feature: AVX
  3281  func (x Int64x2) InterleaveHi(y Int64x2) Int64x2
  3282  
  3283  // InterleaveHi interleaves the elements of the high halves of x and y.
  3284  //
  3285  // Asm: VPUNPCKHWD, CPU Feature: AVX
  3286  func (x Uint16x8) InterleaveHi(y Uint16x8) Uint16x8
  3287  
  3288  // InterleaveHi interleaves the elements of the high halves of x and y.
  3289  //
  3290  // Asm: VPUNPCKHDQ, CPU Feature: AVX
  3291  func (x Uint32x4) InterleaveHi(y Uint32x4) Uint32x4
  3292  
  3293  // InterleaveHi interleaves the elements of the high halves of x and y.
  3294  //
  3295  // Asm: VPUNPCKHQDQ, CPU Feature: AVX
  3296  func (x Uint64x2) InterleaveHi(y Uint64x2) Uint64x2
  3297  
  3298  /* InterleaveHiGrouped */
  3299  
  3300  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3301  //
  3302  // Asm: VPUNPCKHWD, CPU Feature: AVX2
  3303  func (x Int16x16) InterleaveHiGrouped(y Int16x16) Int16x16
  3304  
  3305  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3306  //
  3307  // Asm: VPUNPCKHWD, CPU Feature: AVX512
  3308  func (x Int16x32) InterleaveHiGrouped(y Int16x32) Int16x32
  3309  
  3310  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3311  //
  3312  // Asm: VPUNPCKHDQ, CPU Feature: AVX2
  3313  func (x Int32x8) InterleaveHiGrouped(y Int32x8) Int32x8
  3314  
  3315  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3316  //
  3317  // Asm: VPUNPCKHDQ, CPU Feature: AVX512
  3318  func (x Int32x16) InterleaveHiGrouped(y Int32x16) Int32x16
  3319  
  3320  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3321  //
  3322  // Asm: VPUNPCKHQDQ, CPU Feature: AVX2
  3323  func (x Int64x4) InterleaveHiGrouped(y Int64x4) Int64x4
  3324  
  3325  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3326  //
  3327  // Asm: VPUNPCKHQDQ, CPU Feature: AVX512
  3328  func (x Int64x8) InterleaveHiGrouped(y Int64x8) Int64x8
  3329  
  3330  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3331  //
  3332  // Asm: VPUNPCKHWD, CPU Feature: AVX2
  3333  func (x Uint16x16) InterleaveHiGrouped(y Uint16x16) Uint16x16
  3334  
  3335  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3336  //
  3337  // Asm: VPUNPCKHWD, CPU Feature: AVX512
  3338  func (x Uint16x32) InterleaveHiGrouped(y Uint16x32) Uint16x32
  3339  
  3340  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3341  //
  3342  // Asm: VPUNPCKHDQ, CPU Feature: AVX2
  3343  func (x Uint32x8) InterleaveHiGrouped(y Uint32x8) Uint32x8
  3344  
  3345  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3346  //
  3347  // Asm: VPUNPCKHDQ, CPU Feature: AVX512
  3348  func (x Uint32x16) InterleaveHiGrouped(y Uint32x16) Uint32x16
  3349  
  3350  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3351  //
  3352  // Asm: VPUNPCKHQDQ, CPU Feature: AVX2
  3353  func (x Uint64x4) InterleaveHiGrouped(y Uint64x4) Uint64x4
  3354  
  3355  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3356  //
  3357  // Asm: VPUNPCKHQDQ, CPU Feature: AVX512
  3358  func (x Uint64x8) InterleaveHiGrouped(y Uint64x8) Uint64x8
  3359  
  3360  /* InterleaveLo */
  3361  
  3362  // InterleaveLo interleaves the elements of the low halves of x and y.
  3363  //
  3364  // Asm: VPUNPCKLWD, CPU Feature: AVX
  3365  func (x Int16x8) InterleaveLo(y Int16x8) Int16x8
  3366  
  3367  // InterleaveLo interleaves the elements of the low halves of x and y.
  3368  //
  3369  // Asm: VPUNPCKLDQ, CPU Feature: AVX
  3370  func (x Int32x4) InterleaveLo(y Int32x4) Int32x4
  3371  
  3372  // InterleaveLo interleaves the elements of the low halves of x and y.
  3373  //
  3374  // Asm: VPUNPCKLQDQ, CPU Feature: AVX
  3375  func (x Int64x2) InterleaveLo(y Int64x2) Int64x2
  3376  
  3377  // InterleaveLo interleaves the elements of the low halves of x and y.
  3378  //
  3379  // Asm: VPUNPCKLWD, CPU Feature: AVX
  3380  func (x Uint16x8) InterleaveLo(y Uint16x8) Uint16x8
  3381  
  3382  // InterleaveLo interleaves the elements of the low halves of x and y.
  3383  //
  3384  // Asm: VPUNPCKLDQ, CPU Feature: AVX
  3385  func (x Uint32x4) InterleaveLo(y Uint32x4) Uint32x4
  3386  
  3387  // InterleaveLo interleaves the elements of the low halves of x and y.
  3388  //
  3389  // Asm: VPUNPCKLQDQ, CPU Feature: AVX
  3390  func (x Uint64x2) InterleaveLo(y Uint64x2) Uint64x2
  3391  
  3392  /* InterleaveLoGrouped */
  3393  
  3394  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3395  //
  3396  // Asm: VPUNPCKLWD, CPU Feature: AVX2
  3397  func (x Int16x16) InterleaveLoGrouped(y Int16x16) Int16x16
  3398  
  3399  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3400  //
  3401  // Asm: VPUNPCKLWD, CPU Feature: AVX512
  3402  func (x Int16x32) InterleaveLoGrouped(y Int16x32) Int16x32
  3403  
  3404  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3405  //
  3406  // Asm: VPUNPCKLDQ, CPU Feature: AVX2
  3407  func (x Int32x8) InterleaveLoGrouped(y Int32x8) Int32x8
  3408  
  3409  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3410  //
  3411  // Asm: VPUNPCKLDQ, CPU Feature: AVX512
  3412  func (x Int32x16) InterleaveLoGrouped(y Int32x16) Int32x16
  3413  
  3414  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3415  //
  3416  // Asm: VPUNPCKLQDQ, CPU Feature: AVX2
  3417  func (x Int64x4) InterleaveLoGrouped(y Int64x4) Int64x4
  3418  
  3419  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3420  //
  3421  // Asm: VPUNPCKLQDQ, CPU Feature: AVX512
  3422  func (x Int64x8) InterleaveLoGrouped(y Int64x8) Int64x8
  3423  
  3424  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3425  //
  3426  // Asm: VPUNPCKLWD, CPU Feature: AVX2
  3427  func (x Uint16x16) InterleaveLoGrouped(y Uint16x16) Uint16x16
  3428  
  3429  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3430  //
  3431  // Asm: VPUNPCKLWD, CPU Feature: AVX512
  3432  func (x Uint16x32) InterleaveLoGrouped(y Uint16x32) Uint16x32
  3433  
  3434  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3435  //
  3436  // Asm: VPUNPCKLDQ, CPU Feature: AVX2
  3437  func (x Uint32x8) InterleaveLoGrouped(y Uint32x8) Uint32x8
  3438  
  3439  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3440  //
  3441  // Asm: VPUNPCKLDQ, CPU Feature: AVX512
  3442  func (x Uint32x16) InterleaveLoGrouped(y Uint32x16) Uint32x16
  3443  
  3444  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3445  //
  3446  // Asm: VPUNPCKLQDQ, CPU Feature: AVX2
  3447  func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4
  3448  
  3449  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3450  //
  3451  // Asm: VPUNPCKLQDQ, CPU Feature: AVX512
  3452  func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8
  3453  
  3454  /* IsNan */
  3455  
  3456  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3457  //
  3458  // Asm: VCMPPS, CPU Feature: AVX
  3459  func (x Float32x4) IsNan(y Float32x4) Mask32x4
  3460  
  3461  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3462  //
  3463  // Asm: VCMPPS, CPU Feature: AVX
  3464  func (x Float32x8) IsNan(y Float32x8) Mask32x8
  3465  
  3466  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3467  //
  3468  // Asm: VCMPPS, CPU Feature: AVX512
  3469  func (x Float32x16) IsNan(y Float32x16) Mask32x16
  3470  
  3471  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3472  //
  3473  // Asm: VCMPPD, CPU Feature: AVX
  3474  func (x Float64x2) IsNan(y Float64x2) Mask64x2
  3475  
  3476  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3477  //
  3478  // Asm: VCMPPD, CPU Feature: AVX
  3479  func (x Float64x4) IsNan(y Float64x4) Mask64x4
  3480  
  3481  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3482  //
  3483  // Asm: VCMPPD, CPU Feature: AVX512
  3484  func (x Float64x8) IsNan(y Float64x8) Mask64x8
  3485  
  3486  /* LeadingZeros */
  3487  
  3488  // LeadingZeros counts the leading zeros of each element in x.
  3489  //
  3490  // Asm: VPLZCNTD, CPU Feature: AVX512
  3491  func (x Int32x4) LeadingZeros() Int32x4
  3492  
  3493  // LeadingZeros counts the leading zeros of each element in x.
  3494  //
  3495  // Asm: VPLZCNTD, CPU Feature: AVX512
  3496  func (x Int32x8) LeadingZeros() Int32x8
  3497  
  3498  // LeadingZeros counts the leading zeros of each element in x.
  3499  //
  3500  // Asm: VPLZCNTD, CPU Feature: AVX512
  3501  func (x Int32x16) LeadingZeros() Int32x16
  3502  
  3503  // LeadingZeros counts the leading zeros of each element in x.
  3504  //
  3505  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3506  func (x Int64x2) LeadingZeros() Int64x2
  3507  
  3508  // LeadingZeros counts the leading zeros of each element in x.
  3509  //
  3510  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3511  func (x Int64x4) LeadingZeros() Int64x4
  3512  
  3513  // LeadingZeros counts the leading zeros of each element in x.
  3514  //
  3515  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3516  func (x Int64x8) LeadingZeros() Int64x8
  3517  
  3518  // LeadingZeros counts the leading zeros of each element in x.
  3519  //
  3520  // Asm: VPLZCNTD, CPU Feature: AVX512
  3521  func (x Uint32x4) LeadingZeros() Uint32x4
  3522  
  3523  // LeadingZeros counts the leading zeros of each element in x.
  3524  //
  3525  // Asm: VPLZCNTD, CPU Feature: AVX512
  3526  func (x Uint32x8) LeadingZeros() Uint32x8
  3527  
  3528  // LeadingZeros counts the leading zeros of each element in x.
  3529  //
  3530  // Asm: VPLZCNTD, CPU Feature: AVX512
  3531  func (x Uint32x16) LeadingZeros() Uint32x16
  3532  
  3533  // LeadingZeros counts the leading zeros of each element in x.
  3534  //
  3535  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3536  func (x Uint64x2) LeadingZeros() Uint64x2
  3537  
  3538  // LeadingZeros counts the leading zeros of each element in x.
  3539  //
  3540  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3541  func (x Uint64x4) LeadingZeros() Uint64x4
  3542  
  3543  // LeadingZeros counts the leading zeros of each element in x.
  3544  //
  3545  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3546  func (x Uint64x8) LeadingZeros() Uint64x8
  3547  
  3548  /* Less */
  3549  
  3550  // Less returns x less-than y, elementwise.
  3551  //
  3552  // Asm: VCMPPS, CPU Feature: AVX
  3553  func (x Float32x4) Less(y Float32x4) Mask32x4
  3554  
  3555  // Less returns x less-than y, elementwise.
  3556  //
  3557  // Asm: VCMPPS, CPU Feature: AVX
  3558  func (x Float32x8) Less(y Float32x8) Mask32x8
  3559  
  3560  // Less returns x less-than y, elementwise.
  3561  //
  3562  // Asm: VCMPPS, CPU Feature: AVX512
  3563  func (x Float32x16) Less(y Float32x16) Mask32x16
  3564  
  3565  // Less returns x less-than y, elementwise.
  3566  //
  3567  // Asm: VCMPPD, CPU Feature: AVX
  3568  func (x Float64x2) Less(y Float64x2) Mask64x2
  3569  
  3570  // Less returns x less-than y, elementwise.
  3571  //
  3572  // Asm: VCMPPD, CPU Feature: AVX
  3573  func (x Float64x4) Less(y Float64x4) Mask64x4
  3574  
  3575  // Less returns x less-than y, elementwise.
  3576  //
  3577  // Asm: VCMPPD, CPU Feature: AVX512
  3578  func (x Float64x8) Less(y Float64x8) Mask64x8
  3579  
  3580  // Less returns x less-than y, elementwise.
  3581  //
  3582  // Asm: VPCMPB, CPU Feature: AVX512
  3583  func (x Int8x64) Less(y Int8x64) Mask8x64
  3584  
  3585  // Less returns x less-than y, elementwise.
  3586  //
  3587  // Asm: VPCMPW, CPU Feature: AVX512
  3588  func (x Int16x32) Less(y Int16x32) Mask16x32
  3589  
  3590  // Less returns x less-than y, elementwise.
  3591  //
  3592  // Asm: VPCMPD, CPU Feature: AVX512
  3593  func (x Int32x16) Less(y Int32x16) Mask32x16
  3594  
  3595  // Less returns x less-than y, elementwise.
  3596  //
  3597  // Asm: VPCMPQ, CPU Feature: AVX512
  3598  func (x Int64x8) Less(y Int64x8) Mask64x8
  3599  
  3600  // Less returns x less-than y, elementwise.
  3601  //
  3602  // Asm: VPCMPUB, CPU Feature: AVX512
  3603  func (x Uint8x64) Less(y Uint8x64) Mask8x64
  3604  
  3605  // Less returns x less-than y, elementwise.
  3606  //
  3607  // Asm: VPCMPUW, CPU Feature: AVX512
  3608  func (x Uint16x32) Less(y Uint16x32) Mask16x32
  3609  
  3610  // Less returns x less-than y, elementwise.
  3611  //
  3612  // Asm: VPCMPUD, CPU Feature: AVX512
  3613  func (x Uint32x16) Less(y Uint32x16) Mask32x16
  3614  
  3615  // Less returns x less-than y, elementwise.
  3616  //
  3617  // Asm: VPCMPUQ, CPU Feature: AVX512
  3618  func (x Uint64x8) Less(y Uint64x8) Mask64x8
  3619  
  3620  /* LessEqual */
  3621  
  3622  // LessEqual returns x less-than-or-equals y, elementwise.
  3623  //
  3624  // Asm: VCMPPS, CPU Feature: AVX
  3625  func (x Float32x4) LessEqual(y Float32x4) Mask32x4
  3626  
  3627  // LessEqual returns x less-than-or-equals y, elementwise.
  3628  //
  3629  // Asm: VCMPPS, CPU Feature: AVX
  3630  func (x Float32x8) LessEqual(y Float32x8) Mask32x8
  3631  
  3632  // LessEqual returns x less-than-or-equals y, elementwise.
  3633  //
  3634  // Asm: VCMPPS, CPU Feature: AVX512
  3635  func (x Float32x16) LessEqual(y Float32x16) Mask32x16
  3636  
  3637  // LessEqual returns x less-than-or-equals y, elementwise.
  3638  //
  3639  // Asm: VCMPPD, CPU Feature: AVX
  3640  func (x Float64x2) LessEqual(y Float64x2) Mask64x2
  3641  
  3642  // LessEqual returns x less-than-or-equals y, elementwise.
  3643  //
  3644  // Asm: VCMPPD, CPU Feature: AVX
  3645  func (x Float64x4) LessEqual(y Float64x4) Mask64x4
  3646  
  3647  // LessEqual returns x less-than-or-equals y, elementwise.
  3648  //
  3649  // Asm: VCMPPD, CPU Feature: AVX512
  3650  func (x Float64x8) LessEqual(y Float64x8) Mask64x8
  3651  
  3652  // LessEqual returns x less-than-or-equals y, elementwise.
  3653  //
  3654  // Asm: VPCMPB, CPU Feature: AVX512
  3655  func (x Int8x64) LessEqual(y Int8x64) Mask8x64
  3656  
  3657  // LessEqual returns x less-than-or-equals y, elementwise.
  3658  //
  3659  // Asm: VPCMPW, CPU Feature: AVX512
  3660  func (x Int16x32) LessEqual(y Int16x32) Mask16x32
  3661  
  3662  // LessEqual returns x less-than-or-equals y, elementwise.
  3663  //
  3664  // Asm: VPCMPD, CPU Feature: AVX512
  3665  func (x Int32x16) LessEqual(y Int32x16) Mask32x16
  3666  
  3667  // LessEqual returns x less-than-or-equals y, elementwise.
  3668  //
  3669  // Asm: VPCMPQ, CPU Feature: AVX512
  3670  func (x Int64x8) LessEqual(y Int64x8) Mask64x8
  3671  
  3672  // LessEqual returns x less-than-or-equals y, elementwise.
  3673  //
  3674  // Asm: VPCMPUB, CPU Feature: AVX512
  3675  func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
  3676  
  3677  // LessEqual returns x less-than-or-equals y, elementwise.
  3678  //
  3679  // Asm: VPCMPUW, CPU Feature: AVX512
  3680  func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
  3681  
  3682  // LessEqual returns x less-than-or-equals y, elementwise.
  3683  //
  3684  // Asm: VPCMPUD, CPU Feature: AVX512
  3685  func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
  3686  
  3687  // LessEqual returns x less-than-or-equals y, elementwise.
  3688  //
  3689  // Asm: VPCMPUQ, CPU Feature: AVX512
  3690  func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
  3691  
  3692  /* Max */
  3693  
  3694  // Max computes the maximum of corresponding elements.
  3695  //
  3696  // Asm: VMAXPS, CPU Feature: AVX
  3697  func (x Float32x4) Max(y Float32x4) Float32x4
  3698  
  3699  // Max computes the maximum of corresponding elements.
  3700  //
  3701  // Asm: VMAXPS, CPU Feature: AVX
  3702  func (x Float32x8) Max(y Float32x8) Float32x8
  3703  
  3704  // Max computes the maximum of corresponding elements.
  3705  //
  3706  // Asm: VMAXPS, CPU Feature: AVX512
  3707  func (x Float32x16) Max(y Float32x16) Float32x16
  3708  
  3709  // Max computes the maximum of corresponding elements.
  3710  //
  3711  // Asm: VMAXPD, CPU Feature: AVX
  3712  func (x Float64x2) Max(y Float64x2) Float64x2
  3713  
  3714  // Max computes the maximum of corresponding elements.
  3715  //
  3716  // Asm: VMAXPD, CPU Feature: AVX
  3717  func (x Float64x4) Max(y Float64x4) Float64x4
  3718  
  3719  // Max computes the maximum of corresponding elements.
  3720  //
  3721  // Asm: VMAXPD, CPU Feature: AVX512
  3722  func (x Float64x8) Max(y Float64x8) Float64x8
  3723  
  3724  // Max computes the maximum of corresponding elements.
  3725  //
  3726  // Asm: VPMAXSB, CPU Feature: AVX
  3727  func (x Int8x16) Max(y Int8x16) Int8x16
  3728  
  3729  // Max computes the maximum of corresponding elements.
  3730  //
  3731  // Asm: VPMAXSB, CPU Feature: AVX2
  3732  func (x Int8x32) Max(y Int8x32) Int8x32
  3733  
  3734  // Max computes the maximum of corresponding elements.
  3735  //
  3736  // Asm: VPMAXSB, CPU Feature: AVX512
  3737  func (x Int8x64) Max(y Int8x64) Int8x64
  3738  
  3739  // Max computes the maximum of corresponding elements.
  3740  //
  3741  // Asm: VPMAXSW, CPU Feature: AVX
  3742  func (x Int16x8) Max(y Int16x8) Int16x8
  3743  
  3744  // Max computes the maximum of corresponding elements.
  3745  //
  3746  // Asm: VPMAXSW, CPU Feature: AVX2
  3747  func (x Int16x16) Max(y Int16x16) Int16x16
  3748  
  3749  // Max computes the maximum of corresponding elements.
  3750  //
  3751  // Asm: VPMAXSW, CPU Feature: AVX512
  3752  func (x Int16x32) Max(y Int16x32) Int16x32
  3753  
  3754  // Max computes the maximum of corresponding elements.
  3755  //
  3756  // Asm: VPMAXSD, CPU Feature: AVX
  3757  func (x Int32x4) Max(y Int32x4) Int32x4
  3758  
  3759  // Max computes the maximum of corresponding elements.
  3760  //
  3761  // Asm: VPMAXSD, CPU Feature: AVX2
  3762  func (x Int32x8) Max(y Int32x8) Int32x8
  3763  
  3764  // Max computes the maximum of corresponding elements.
  3765  //
  3766  // Asm: VPMAXSD, CPU Feature: AVX512
  3767  func (x Int32x16) Max(y Int32x16) Int32x16
  3768  
  3769  // Max computes the maximum of corresponding elements.
  3770  //
  3771  // Asm: VPMAXSQ, CPU Feature: AVX512
  3772  func (x Int64x2) Max(y Int64x2) Int64x2
  3773  
  3774  // Max computes the maximum of corresponding elements.
  3775  //
  3776  // Asm: VPMAXSQ, CPU Feature: AVX512
  3777  func (x Int64x4) Max(y Int64x4) Int64x4
  3778  
  3779  // Max computes the maximum of corresponding elements.
  3780  //
  3781  // Asm: VPMAXSQ, CPU Feature: AVX512
  3782  func (x Int64x8) Max(y Int64x8) Int64x8
  3783  
  3784  // Max computes the maximum of corresponding elements.
  3785  //
  3786  // Asm: VPMAXUB, CPU Feature: AVX
  3787  func (x Uint8x16) Max(y Uint8x16) Uint8x16
  3788  
  3789  // Max computes the maximum of corresponding elements.
  3790  //
  3791  // Asm: VPMAXUB, CPU Feature: AVX2
  3792  func (x Uint8x32) Max(y Uint8x32) Uint8x32
  3793  
  3794  // Max computes the maximum of corresponding elements.
  3795  //
  3796  // Asm: VPMAXUB, CPU Feature: AVX512
  3797  func (x Uint8x64) Max(y Uint8x64) Uint8x64
  3798  
  3799  // Max computes the maximum of corresponding elements.
  3800  //
  3801  // Asm: VPMAXUW, CPU Feature: AVX
  3802  func (x Uint16x8) Max(y Uint16x8) Uint16x8
  3803  
  3804  // Max computes the maximum of corresponding elements.
  3805  //
  3806  // Asm: VPMAXUW, CPU Feature: AVX2
  3807  func (x Uint16x16) Max(y Uint16x16) Uint16x16
  3808  
  3809  // Max computes the maximum of corresponding elements.
  3810  //
  3811  // Asm: VPMAXUW, CPU Feature: AVX512
  3812  func (x Uint16x32) Max(y Uint16x32) Uint16x32
  3813  
  3814  // Max computes the maximum of corresponding elements.
  3815  //
  3816  // Asm: VPMAXUD, CPU Feature: AVX
  3817  func (x Uint32x4) Max(y Uint32x4) Uint32x4
  3818  
  3819  // Max computes the maximum of corresponding elements.
  3820  //
  3821  // Asm: VPMAXUD, CPU Feature: AVX2
  3822  func (x Uint32x8) Max(y Uint32x8) Uint32x8
  3823  
  3824  // Max computes the maximum of corresponding elements.
  3825  //
  3826  // Asm: VPMAXUD, CPU Feature: AVX512
  3827  func (x Uint32x16) Max(y Uint32x16) Uint32x16
  3828  
  3829  // Max computes the maximum of corresponding elements.
  3830  //
  3831  // Asm: VPMAXUQ, CPU Feature: AVX512
  3832  func (x Uint64x2) Max(y Uint64x2) Uint64x2
  3833  
  3834  // Max computes the maximum of corresponding elements.
  3835  //
  3836  // Asm: VPMAXUQ, CPU Feature: AVX512
  3837  func (x Uint64x4) Max(y Uint64x4) Uint64x4
  3838  
  3839  // Max computes the maximum of corresponding elements.
  3840  //
  3841  // Asm: VPMAXUQ, CPU Feature: AVX512
  3842  func (x Uint64x8) Max(y Uint64x8) Uint64x8
  3843  
  3844  /* Min */
  3845  
  3846  // Min computes the minimum of corresponding elements.
  3847  //
  3848  // Asm: VMINPS, CPU Feature: AVX
  3849  func (x Float32x4) Min(y Float32x4) Float32x4
  3850  
  3851  // Min computes the minimum of corresponding elements.
  3852  //
  3853  // Asm: VMINPS, CPU Feature: AVX
  3854  func (x Float32x8) Min(y Float32x8) Float32x8
  3855  
  3856  // Min computes the minimum of corresponding elements.
  3857  //
  3858  // Asm: VMINPS, CPU Feature: AVX512
  3859  func (x Float32x16) Min(y Float32x16) Float32x16
  3860  
  3861  // Min computes the minimum of corresponding elements.
  3862  //
  3863  // Asm: VMINPD, CPU Feature: AVX
  3864  func (x Float64x2) Min(y Float64x2) Float64x2
  3865  
  3866  // Min computes the minimum of corresponding elements.
  3867  //
  3868  // Asm: VMINPD, CPU Feature: AVX
  3869  func (x Float64x4) Min(y Float64x4) Float64x4
  3870  
  3871  // Min computes the minimum of corresponding elements.
  3872  //
  3873  // Asm: VMINPD, CPU Feature: AVX512
  3874  func (x Float64x8) Min(y Float64x8) Float64x8
  3875  
  3876  // Min computes the minimum of corresponding elements.
  3877  //
  3878  // Asm: VPMINSB, CPU Feature: AVX
  3879  func (x Int8x16) Min(y Int8x16) Int8x16
  3880  
  3881  // Min computes the minimum of corresponding elements.
  3882  //
  3883  // Asm: VPMINSB, CPU Feature: AVX2
  3884  func (x Int8x32) Min(y Int8x32) Int8x32
  3885  
  3886  // Min computes the minimum of corresponding elements.
  3887  //
  3888  // Asm: VPMINSB, CPU Feature: AVX512
  3889  func (x Int8x64) Min(y Int8x64) Int8x64
  3890  
  3891  // Min computes the minimum of corresponding elements.
  3892  //
  3893  // Asm: VPMINSW, CPU Feature: AVX
  3894  func (x Int16x8) Min(y Int16x8) Int16x8
  3895  
  3896  // Min computes the minimum of corresponding elements.
  3897  //
  3898  // Asm: VPMINSW, CPU Feature: AVX2
  3899  func (x Int16x16) Min(y Int16x16) Int16x16
  3900  
  3901  // Min computes the minimum of corresponding elements.
  3902  //
  3903  // Asm: VPMINSW, CPU Feature: AVX512
  3904  func (x Int16x32) Min(y Int16x32) Int16x32
  3905  
  3906  // Min computes the minimum of corresponding elements.
  3907  //
  3908  // Asm: VPMINSD, CPU Feature: AVX
  3909  func (x Int32x4) Min(y Int32x4) Int32x4
  3910  
  3911  // Min computes the minimum of corresponding elements.
  3912  //
  3913  // Asm: VPMINSD, CPU Feature: AVX2
  3914  func (x Int32x8) Min(y Int32x8) Int32x8
  3915  
  3916  // Min computes the minimum of corresponding elements.
  3917  //
  3918  // Asm: VPMINSD, CPU Feature: AVX512
  3919  func (x Int32x16) Min(y Int32x16) Int32x16
  3920  
  3921  // Min computes the minimum of corresponding elements.
  3922  //
  3923  // Asm: VPMINSQ, CPU Feature: AVX512
  3924  func (x Int64x2) Min(y Int64x2) Int64x2
  3925  
  3926  // Min computes the minimum of corresponding elements.
  3927  //
  3928  // Asm: VPMINSQ, CPU Feature: AVX512
  3929  func (x Int64x4) Min(y Int64x4) Int64x4
  3930  
  3931  // Min computes the minimum of corresponding elements.
  3932  //
  3933  // Asm: VPMINSQ, CPU Feature: AVX512
  3934  func (x Int64x8) Min(y Int64x8) Int64x8
  3935  
  3936  // Min computes the minimum of corresponding elements.
  3937  //
  3938  // Asm: VPMINUB, CPU Feature: AVX
  3939  func (x Uint8x16) Min(y Uint8x16) Uint8x16
  3940  
  3941  // Min computes the minimum of corresponding elements.
  3942  //
  3943  // Asm: VPMINUB, CPU Feature: AVX2
  3944  func (x Uint8x32) Min(y Uint8x32) Uint8x32
  3945  
  3946  // Min computes the minimum of corresponding elements.
  3947  //
  3948  // Asm: VPMINUB, CPU Feature: AVX512
  3949  func (x Uint8x64) Min(y Uint8x64) Uint8x64
  3950  
  3951  // Min computes the minimum of corresponding elements.
  3952  //
  3953  // Asm: VPMINUW, CPU Feature: AVX
  3954  func (x Uint16x8) Min(y Uint16x8) Uint16x8
  3955  
  3956  // Min computes the minimum of corresponding elements.
  3957  //
  3958  // Asm: VPMINUW, CPU Feature: AVX2
  3959  func (x Uint16x16) Min(y Uint16x16) Uint16x16
  3960  
  3961  // Min computes the minimum of corresponding elements.
  3962  //
  3963  // Asm: VPMINUW, CPU Feature: AVX512
  3964  func (x Uint16x32) Min(y Uint16x32) Uint16x32
  3965  
  3966  // Min computes the minimum of corresponding elements.
  3967  //
  3968  // Asm: VPMINUD, CPU Feature: AVX
  3969  func (x Uint32x4) Min(y Uint32x4) Uint32x4
  3970  
  3971  // Min computes the minimum of corresponding elements.
  3972  //
  3973  // Asm: VPMINUD, CPU Feature: AVX2
  3974  func (x Uint32x8) Min(y Uint32x8) Uint32x8
  3975  
  3976  // Min computes the minimum of corresponding elements.
  3977  //
  3978  // Asm: VPMINUD, CPU Feature: AVX512
  3979  func (x Uint32x16) Min(y Uint32x16) Uint32x16
  3980  
  3981  // Min computes the minimum of corresponding elements.
  3982  //
  3983  // Asm: VPMINUQ, CPU Feature: AVX512
  3984  func (x Uint64x2) Min(y Uint64x2) Uint64x2
  3985  
  3986  // Min computes the minimum of corresponding elements.
  3987  //
  3988  // Asm: VPMINUQ, CPU Feature: AVX512
  3989  func (x Uint64x4) Min(y Uint64x4) Uint64x4
  3990  
  3991  // Min computes the minimum of corresponding elements.
  3992  //
  3993  // Asm: VPMINUQ, CPU Feature: AVX512
  3994  func (x Uint64x8) Min(y Uint64x8) Uint64x8
  3995  
  3996  /* Mul */
  3997  
  3998  // Mul multiplies corresponding elements of two vectors.
  3999  //
  4000  // Asm: VMULPS, CPU Feature: AVX
  4001  func (x Float32x4) Mul(y Float32x4) Float32x4
  4002  
  4003  // Mul multiplies corresponding elements of two vectors.
  4004  //
  4005  // Asm: VMULPS, CPU Feature: AVX
  4006  func (x Float32x8) Mul(y Float32x8) Float32x8
  4007  
  4008  // Mul multiplies corresponding elements of two vectors.
  4009  //
  4010  // Asm: VMULPS, CPU Feature: AVX512
  4011  func (x Float32x16) Mul(y Float32x16) Float32x16
  4012  
  4013  // Mul multiplies corresponding elements of two vectors.
  4014  //
  4015  // Asm: VMULPD, CPU Feature: AVX
  4016  func (x Float64x2) Mul(y Float64x2) Float64x2
  4017  
  4018  // Mul multiplies corresponding elements of two vectors.
  4019  //
  4020  // Asm: VMULPD, CPU Feature: AVX
  4021  func (x Float64x4) Mul(y Float64x4) Float64x4
  4022  
  4023  // Mul multiplies corresponding elements of two vectors.
  4024  //
  4025  // Asm: VMULPD, CPU Feature: AVX512
  4026  func (x Float64x8) Mul(y Float64x8) Float64x8
  4027  
  4028  // Mul multiplies corresponding elements of two vectors.
  4029  //
  4030  // Asm: VPMULLW, CPU Feature: AVX
  4031  func (x Int16x8) Mul(y Int16x8) Int16x8
  4032  
  4033  // Mul multiplies corresponding elements of two vectors.
  4034  //
  4035  // Asm: VPMULLW, CPU Feature: AVX2
  4036  func (x Int16x16) Mul(y Int16x16) Int16x16
  4037  
  4038  // Mul multiplies corresponding elements of two vectors.
  4039  //
  4040  // Asm: VPMULLW, CPU Feature: AVX512
  4041  func (x Int16x32) Mul(y Int16x32) Int16x32
  4042  
  4043  // Mul multiplies corresponding elements of two vectors.
  4044  //
  4045  // Asm: VPMULLD, CPU Feature: AVX
  4046  func (x Int32x4) Mul(y Int32x4) Int32x4
  4047  
  4048  // Mul multiplies corresponding elements of two vectors.
  4049  //
  4050  // Asm: VPMULLD, CPU Feature: AVX2
  4051  func (x Int32x8) Mul(y Int32x8) Int32x8
  4052  
  4053  // Mul multiplies corresponding elements of two vectors.
  4054  //
  4055  // Asm: VPMULLD, CPU Feature: AVX512
  4056  func (x Int32x16) Mul(y Int32x16) Int32x16
  4057  
  4058  // Mul multiplies corresponding elements of two vectors.
  4059  //
  4060  // Asm: VPMULLQ, CPU Feature: AVX512
  4061  func (x Int64x2) Mul(y Int64x2) Int64x2
  4062  
  4063  // Mul multiplies corresponding elements of two vectors.
  4064  //
  4065  // Asm: VPMULLQ, CPU Feature: AVX512
  4066  func (x Int64x4) Mul(y Int64x4) Int64x4
  4067  
  4068  // Mul multiplies corresponding elements of two vectors.
  4069  //
  4070  // Asm: VPMULLQ, CPU Feature: AVX512
  4071  func (x Int64x8) Mul(y Int64x8) Int64x8
  4072  
  4073  // Mul multiplies corresponding elements of two vectors.
  4074  //
  4075  // Asm: VPMULLW, CPU Feature: AVX
  4076  func (x Uint16x8) Mul(y Uint16x8) Uint16x8
  4077  
  4078  // Mul multiplies corresponding elements of two vectors.
  4079  //
  4080  // Asm: VPMULLW, CPU Feature: AVX2
  4081  func (x Uint16x16) Mul(y Uint16x16) Uint16x16
  4082  
  4083  // Mul multiplies corresponding elements of two vectors.
  4084  //
  4085  // Asm: VPMULLW, CPU Feature: AVX512
  4086  func (x Uint16x32) Mul(y Uint16x32) Uint16x32
  4087  
  4088  // Mul multiplies corresponding elements of two vectors.
  4089  //
  4090  // Asm: VPMULLD, CPU Feature: AVX
  4091  func (x Uint32x4) Mul(y Uint32x4) Uint32x4
  4092  
  4093  // Mul multiplies corresponding elements of two vectors.
  4094  //
  4095  // Asm: VPMULLD, CPU Feature: AVX2
  4096  func (x Uint32x8) Mul(y Uint32x8) Uint32x8
  4097  
  4098  // Mul multiplies corresponding elements of two vectors.
  4099  //
  4100  // Asm: VPMULLD, CPU Feature: AVX512
  4101  func (x Uint32x16) Mul(y Uint32x16) Uint32x16
  4102  
  4103  // Mul multiplies corresponding elements of two vectors.
  4104  //
  4105  // Asm: VPMULLQ, CPU Feature: AVX512
  4106  func (x Uint64x2) Mul(y Uint64x2) Uint64x2
  4107  
  4108  // Mul multiplies corresponding elements of two vectors.
  4109  //
  4110  // Asm: VPMULLQ, CPU Feature: AVX512
  4111  func (x Uint64x4) Mul(y Uint64x4) Uint64x4
  4112  
  4113  // Mul multiplies corresponding elements of two vectors.
  4114  //
  4115  // Asm: VPMULLQ, CPU Feature: AVX512
  4116  func (x Uint64x8) Mul(y Uint64x8) Uint64x8
  4117  
  4118  /* MulAdd */
  4119  
  4120  // MulAdd performs a fused (x * y) + z.
  4121  //
  4122  // Asm: VFMADD213PS, CPU Feature: AVX512
  4123  func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
  4124  
  4125  // MulAdd performs a fused (x * y) + z.
  4126  //
  4127  // Asm: VFMADD213PS, CPU Feature: AVX512
  4128  func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
  4129  
  4130  // MulAdd performs a fused (x * y) + z.
  4131  //
  4132  // Asm: VFMADD213PS, CPU Feature: AVX512
  4133  func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
  4134  
  4135  // MulAdd performs a fused (x * y) + z.
  4136  //
  4137  // Asm: VFMADD213PD, CPU Feature: AVX512
  4138  func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
  4139  
  4140  // MulAdd performs a fused (x * y) + z.
  4141  //
  4142  // Asm: VFMADD213PD, CPU Feature: AVX512
  4143  func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
  4144  
  4145  // MulAdd performs a fused (x * y) + z.
  4146  //
  4147  // Asm: VFMADD213PD, CPU Feature: AVX512
  4148  func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
  4149  
  4150  /* MulAddSub */
  4151  
  4152  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  4153  //
  4154  // Asm: VFMADDSUB213PS, CPU Feature: AVX512
  4155  func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
  4156  
  4157  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  4158  //
  4159  // Asm: VFMADDSUB213PS, CPU Feature: AVX512
  4160  func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
  4161  
  4162  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  4163  //
  4164  // Asm: VFMADDSUB213PS, CPU Feature: AVX512
  4165  func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
  4166  
  4167  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  4168  //
  4169  // Asm: VFMADDSUB213PD, CPU Feature: AVX512
  4170  func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
  4171  
  4172  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  4173  //
  4174  // Asm: VFMADDSUB213PD, CPU Feature: AVX512
  4175  func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
  4176  
  4177  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  4178  //
  4179  // Asm: VFMADDSUB213PD, CPU Feature: AVX512
  4180  func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
  4181  
  4182  /* MulEvenWiden */
  4183  
  4184  // MulEvenWiden multiplies even-indexed elements, widening the result.
  4185  // Result[i] = v1.Even[i] * v2.Even[i].
  4186  //
  4187  // Asm: VPMULDQ, CPU Feature: AVX
  4188  func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
  4189  
  4190  // MulEvenWiden multiplies even-indexed elements, widening the result.
  4191  // Result[i] = v1.Even[i] * v2.Even[i].
  4192  //
  4193  // Asm: VPMULDQ, CPU Feature: AVX2
  4194  func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
  4195  
  4196  // MulEvenWiden multiplies even-indexed elements, widening the result.
  4197  // Result[i] = v1.Even[i] * v2.Even[i].
  4198  //
  4199  // Asm: VPMULUDQ, CPU Feature: AVX
  4200  func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
  4201  
  4202  // MulEvenWiden multiplies even-indexed elements, widening the result.
  4203  // Result[i] = v1.Even[i] * v2.Even[i].
  4204  //
  4205  // Asm: VPMULUDQ, CPU Feature: AVX2
  4206  func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
  4207  
  4208  /* MulHigh */
  4209  
  4210  // MulHigh multiplies elements and stores the high part of the result.
  4211  //
  4212  // Asm: VPMULHW, CPU Feature: AVX
  4213  func (x Int16x8) MulHigh(y Int16x8) Int16x8
  4214  
  4215  // MulHigh multiplies elements and stores the high part of the result.
  4216  //
  4217  // Asm: VPMULHW, CPU Feature: AVX2
  4218  func (x Int16x16) MulHigh(y Int16x16) Int16x16
  4219  
  4220  // MulHigh multiplies elements and stores the high part of the result.
  4221  //
  4222  // Asm: VPMULHW, CPU Feature: AVX512
  4223  func (x Int16x32) MulHigh(y Int16x32) Int16x32
  4224  
  4225  // MulHigh multiplies elements and stores the high part of the result.
  4226  //
  4227  // Asm: VPMULHUW, CPU Feature: AVX
  4228  func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
  4229  
  4230  // MulHigh multiplies elements and stores the high part of the result.
  4231  //
  4232  // Asm: VPMULHUW, CPU Feature: AVX2
  4233  func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
  4234  
  4235  // MulHigh multiplies elements and stores the high part of the result.
  4236  //
  4237  // Asm: VPMULHUW, CPU Feature: AVX512
  4238  func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
  4239  
  4240  /* MulSubAdd */
  4241  
  4242  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  4243  //
  4244  // Asm: VFMSUBADD213PS, CPU Feature: AVX512
  4245  func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
  4246  
  4247  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  4248  //
  4249  // Asm: VFMSUBADD213PS, CPU Feature: AVX512
  4250  func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
  4251  
  4252  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  4253  //
  4254  // Asm: VFMSUBADD213PS, CPU Feature: AVX512
  4255  func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
  4256  
  4257  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  4258  //
  4259  // Asm: VFMSUBADD213PD, CPU Feature: AVX512
  4260  func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
  4261  
  4262  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  4263  //
  4264  // Asm: VFMSUBADD213PD, CPU Feature: AVX512
  4265  func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
  4266  
  4267  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  4268  //
  4269  // Asm: VFMSUBADD213PD, CPU Feature: AVX512
  4270  func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
  4271  
  4272  /* NotEqual */
  4273  
  4274  // NotEqual returns x not-equals y, elementwise.
  4275  //
  4276  // Asm: VCMPPS, CPU Feature: AVX
  4277  func (x Float32x4) NotEqual(y Float32x4) Mask32x4
  4278  
  4279  // NotEqual returns x not-equals y, elementwise.
  4280  //
  4281  // Asm: VCMPPS, CPU Feature: AVX
  4282  func (x Float32x8) NotEqual(y Float32x8) Mask32x8
  4283  
  4284  // NotEqual returns x not-equals y, elementwise.
  4285  //
  4286  // Asm: VCMPPS, CPU Feature: AVX512
  4287  func (x Float32x16) NotEqual(y Float32x16) Mask32x16
  4288  
  4289  // NotEqual returns x not-equals y, elementwise.
  4290  //
  4291  // Asm: VCMPPD, CPU Feature: AVX
  4292  func (x Float64x2) NotEqual(y Float64x2) Mask64x2
  4293  
  4294  // NotEqual returns x not-equals y, elementwise.
  4295  //
  4296  // Asm: VCMPPD, CPU Feature: AVX
  4297  func (x Float64x4) NotEqual(y Float64x4) Mask64x4
  4298  
  4299  // NotEqual returns x not-equals y, elementwise.
  4300  //
  4301  // Asm: VCMPPD, CPU Feature: AVX512
  4302  func (x Float64x8) NotEqual(y Float64x8) Mask64x8
  4303  
  4304  // NotEqual returns x not-equals y, elementwise.
  4305  //
  4306  // Asm: VPCMPB, CPU Feature: AVX512
  4307  func (x Int8x64) NotEqual(y Int8x64) Mask8x64
  4308  
  4309  // NotEqual returns x not-equals y, elementwise.
  4310  //
  4311  // Asm: VPCMPW, CPU Feature: AVX512
  4312  func (x Int16x32) NotEqual(y Int16x32) Mask16x32
  4313  
  4314  // NotEqual returns x not-equals y, elementwise.
  4315  //
  4316  // Asm: VPCMPD, CPU Feature: AVX512
  4317  func (x Int32x16) NotEqual(y Int32x16) Mask32x16
  4318  
  4319  // NotEqual returns x not-equals y, elementwise.
  4320  //
  4321  // Asm: VPCMPQ, CPU Feature: AVX512
  4322  func (x Int64x8) NotEqual(y Int64x8) Mask64x8
  4323  
  4324  // NotEqual returns x not-equals y, elementwise.
  4325  //
  4326  // Asm: VPCMPUB, CPU Feature: AVX512
  4327  func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
  4328  
  4329  // NotEqual returns x not-equals y, elementwise.
  4330  //
  4331  // Asm: VPCMPUW, CPU Feature: AVX512
  4332  func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
  4333  
  4334  // NotEqual returns x not-equals y, elementwise.
  4335  //
  4336  // Asm: VPCMPUD, CPU Feature: AVX512
  4337  func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
  4338  
  4339  // NotEqual returns x not-equals y, elementwise.
  4340  //
  4341  // Asm: VPCMPUQ, CPU Feature: AVX512
  4342  func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
  4343  
  4344  /* OnesCount */
  4345  
  4346  // OnesCount counts the number of set bits in each element.
  4347  //
  4348  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4349  func (x Int8x16) OnesCount() Int8x16
  4350  
  4351  // OnesCount counts the number of set bits in each element.
  4352  //
  4353  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4354  func (x Int8x32) OnesCount() Int8x32
  4355  
  4356  // OnesCount counts the number of set bits in each element.
  4357  //
  4358  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4359  func (x Int8x64) OnesCount() Int8x64
  4360  
  4361  // OnesCount counts the number of set bits in each element.
  4362  //
  4363  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4364  func (x Int16x8) OnesCount() Int16x8
  4365  
  4366  // OnesCount counts the number of set bits in each element.
  4367  //
  4368  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4369  func (x Int16x16) OnesCount() Int16x16
  4370  
  4371  // OnesCount counts the number of set bits in each element.
  4372  //
  4373  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4374  func (x Int16x32) OnesCount() Int16x32
  4375  
  4376  // OnesCount counts the number of set bits in each element.
  4377  //
  4378  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4379  func (x Int32x4) OnesCount() Int32x4
  4380  
  4381  // OnesCount counts the number of set bits in each element.
  4382  //
  4383  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4384  func (x Int32x8) OnesCount() Int32x8
  4385  
  4386  // OnesCount counts the number of set bits in each element.
  4387  //
  4388  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4389  func (x Int32x16) OnesCount() Int32x16
  4390  
  4391  // OnesCount counts the number of set bits in each element.
  4392  //
  4393  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4394  func (x Int64x2) OnesCount() Int64x2
  4395  
  4396  // OnesCount counts the number of set bits in each element.
  4397  //
  4398  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4399  func (x Int64x4) OnesCount() Int64x4
  4400  
  4401  // OnesCount counts the number of set bits in each element.
  4402  //
  4403  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4404  func (x Int64x8) OnesCount() Int64x8
  4405  
  4406  // OnesCount counts the number of set bits in each element.
  4407  //
  4408  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4409  func (x Uint8x16) OnesCount() Uint8x16
  4410  
  4411  // OnesCount counts the number of set bits in each element.
  4412  //
  4413  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4414  func (x Uint8x32) OnesCount() Uint8x32
  4415  
  4416  // OnesCount counts the number of set bits in each element.
  4417  //
  4418  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4419  func (x Uint8x64) OnesCount() Uint8x64
  4420  
  4421  // OnesCount counts the number of set bits in each element.
  4422  //
  4423  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4424  func (x Uint16x8) OnesCount() Uint16x8
  4425  
  4426  // OnesCount counts the number of set bits in each element.
  4427  //
  4428  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4429  func (x Uint16x16) OnesCount() Uint16x16
  4430  
  4431  // OnesCount counts the number of set bits in each element.
  4432  //
  4433  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4434  func (x Uint16x32) OnesCount() Uint16x32
  4435  
  4436  // OnesCount counts the number of set bits in each element.
  4437  //
  4438  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4439  func (x Uint32x4) OnesCount() Uint32x4
  4440  
  4441  // OnesCount counts the number of set bits in each element.
  4442  //
  4443  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4444  func (x Uint32x8) OnesCount() Uint32x8
  4445  
  4446  // OnesCount counts the number of set bits in each element.
  4447  //
  4448  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4449  func (x Uint32x16) OnesCount() Uint32x16
  4450  
  4451  // OnesCount counts the number of set bits in each element.
  4452  //
  4453  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4454  func (x Uint64x2) OnesCount() Uint64x2
  4455  
  4456  // OnesCount counts the number of set bits in each element.
  4457  //
  4458  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4459  func (x Uint64x4) OnesCount() Uint64x4
  4460  
  4461  // OnesCount counts the number of set bits in each element.
  4462  //
  4463  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4464  func (x Uint64x8) OnesCount() Uint64x8
  4465  
  4466  /* Or */
  4467  
  4468  // Or performs a bitwise OR operation between two vectors.
  4469  //
  4470  // Asm: VPOR, CPU Feature: AVX
  4471  func (x Int8x16) Or(y Int8x16) Int8x16
  4472  
  4473  // Or performs a bitwise OR operation between two vectors.
  4474  //
  4475  // Asm: VPOR, CPU Feature: AVX2
  4476  func (x Int8x32) Or(y Int8x32) Int8x32
  4477  
  4478  // Or performs a bitwise OR operation between two vectors.
  4479  //
  4480  // Asm: VPORD, CPU Feature: AVX512
  4481  func (x Int8x64) Or(y Int8x64) Int8x64
  4482  
  4483  // Or performs a bitwise OR operation between two vectors.
  4484  //
  4485  // Asm: VPOR, CPU Feature: AVX
  4486  func (x Int16x8) Or(y Int16x8) Int16x8
  4487  
  4488  // Or performs a bitwise OR operation between two vectors.
  4489  //
  4490  // Asm: VPOR, CPU Feature: AVX2
  4491  func (x Int16x16) Or(y Int16x16) Int16x16
  4492  
  4493  // Or performs a bitwise OR operation between two vectors.
  4494  //
  4495  // Asm: VPORD, CPU Feature: AVX512
  4496  func (x Int16x32) Or(y Int16x32) Int16x32
  4497  
  4498  // Or performs a bitwise OR operation between two vectors.
  4499  //
  4500  // Asm: VPOR, CPU Feature: AVX
  4501  func (x Int32x4) Or(y Int32x4) Int32x4
  4502  
  4503  // Or performs a bitwise OR operation between two vectors.
  4504  //
  4505  // Asm: VPOR, CPU Feature: AVX2
  4506  func (x Int32x8) Or(y Int32x8) Int32x8
  4507  
  4508  // Or performs a bitwise OR operation between two vectors.
  4509  //
  4510  // Asm: VPORD, CPU Feature: AVX512
  4511  func (x Int32x16) Or(y Int32x16) Int32x16
  4512  
  4513  // Or performs a bitwise OR operation between two vectors.
  4514  //
  4515  // Asm: VPOR, CPU Feature: AVX
  4516  func (x Int64x2) Or(y Int64x2) Int64x2
  4517  
  4518  // Or performs a bitwise OR operation between two vectors.
  4519  //
  4520  // Asm: VPOR, CPU Feature: AVX2
  4521  func (x Int64x4) Or(y Int64x4) Int64x4
  4522  
  4523  // Or performs a bitwise OR operation between two vectors.
  4524  //
  4525  // Asm: VPORQ, CPU Feature: AVX512
  4526  func (x Int64x8) Or(y Int64x8) Int64x8
  4527  
  4528  // Or performs a bitwise OR operation between two vectors.
  4529  //
  4530  // Asm: VPOR, CPU Feature: AVX
  4531  func (x Uint8x16) Or(y Uint8x16) Uint8x16
  4532  
  4533  // Or performs a bitwise OR operation between two vectors.
  4534  //
  4535  // Asm: VPOR, CPU Feature: AVX2
  4536  func (x Uint8x32) Or(y Uint8x32) Uint8x32
  4537  
  4538  // Or performs a bitwise OR operation between two vectors.
  4539  //
  4540  // Asm: VPORD, CPU Feature: AVX512
  4541  func (x Uint8x64) Or(y Uint8x64) Uint8x64
  4542  
  4543  // Or performs a bitwise OR operation between two vectors.
  4544  //
  4545  // Asm: VPOR, CPU Feature: AVX
  4546  func (x Uint16x8) Or(y Uint16x8) Uint16x8
  4547  
  4548  // Or performs a bitwise OR operation between two vectors.
  4549  //
  4550  // Asm: VPOR, CPU Feature: AVX2
  4551  func (x Uint16x16) Or(y Uint16x16) Uint16x16
  4552  
  4553  // Or performs a bitwise OR operation between two vectors.
  4554  //
  4555  // Asm: VPORD, CPU Feature: AVX512
  4556  func (x Uint16x32) Or(y Uint16x32) Uint16x32
  4557  
  4558  // Or performs a bitwise OR operation between two vectors.
  4559  //
  4560  // Asm: VPOR, CPU Feature: AVX
  4561  func (x Uint32x4) Or(y Uint32x4) Uint32x4
  4562  
  4563  // Or performs a bitwise OR operation between two vectors.
  4564  //
  4565  // Asm: VPOR, CPU Feature: AVX2
  4566  func (x Uint32x8) Or(y Uint32x8) Uint32x8
  4567  
  4568  // Or performs a bitwise OR operation between two vectors.
  4569  //
  4570  // Asm: VPORD, CPU Feature: AVX512
  4571  func (x Uint32x16) Or(y Uint32x16) Uint32x16
  4572  
  4573  // Or performs a bitwise OR operation between two vectors.
  4574  //
  4575  // Asm: VPOR, CPU Feature: AVX
  4576  func (x Uint64x2) Or(y Uint64x2) Uint64x2
  4577  
  4578  // Or performs a bitwise OR operation between two vectors.
  4579  //
  4580  // Asm: VPOR, CPU Feature: AVX2
  4581  func (x Uint64x4) Or(y Uint64x4) Uint64x4
  4582  
  4583  // Or performs a bitwise OR operation between two vectors.
  4584  //
  4585  // Asm: VPORQ, CPU Feature: AVX512
  4586  func (x Uint64x8) Or(y Uint64x8) Uint64x8
  4587  
  4588  /* Permute */
  4589  
  4590  // Permute performs a full permutation of vector x using indices:
  4591  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4592  // The low 4 bits (values 0-15) of each element of indices is used
  4593  //
  4594  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4595  func (x Int8x16) Permute(indices Uint8x16) Int8x16
  4596  
  4597  // Permute performs a full permutation of vector x using indices:
  4598  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4599  // The low 4 bits (values 0-15) of each element of indices is used
  4600  //
  4601  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4602  func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
  4603  
  4604  // Permute performs a full permutation of vector x using indices:
  4605  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4606  // The low 5 bits (values 0-31) of each element of indices is used
  4607  //
  4608  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4609  func (x Int8x32) Permute(indices Uint8x32) Int8x32
  4610  
  4611  // Permute performs a full permutation of vector x using indices:
  4612  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4613  // The low 5 bits (values 0-31) of each element of indices is used
  4614  //
  4615  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4616  func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
  4617  
  4618  // Permute performs a full permutation of vector x using indices:
  4619  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4620  // The low 6 bits (values 0-63) of each element of indices is used
  4621  //
  4622  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4623  func (x Int8x64) Permute(indices Uint8x64) Int8x64
  4624  
  4625  // Permute performs a full permutation of vector x using indices:
  4626  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4627  // The low 6 bits (values 0-63) of each element of indices is used
  4628  //
  4629  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4630  func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
  4631  
  4632  // Permute performs a full permutation of vector x using indices:
  4633  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4634  // The low 3 bits (values 0-7) of each element of indices is used
  4635  //
  4636  // Asm: VPERMW, CPU Feature: AVX512
  4637  func (x Int16x8) Permute(indices Uint16x8) Int16x8
  4638  
  4639  // Permute performs a full permutation of vector x using indices:
  4640  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4641  // The low 3 bits (values 0-7) of each element of indices is used
  4642  //
  4643  // Asm: VPERMW, CPU Feature: AVX512
  4644  func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
  4645  
  4646  // Permute performs a full permutation of vector x using indices:
  4647  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4648  // The low 4 bits (values 0-15) of each element of indices is used
  4649  //
  4650  // Asm: VPERMW, CPU Feature: AVX512
  4651  func (x Int16x16) Permute(indices Uint16x16) Int16x16
  4652  
  4653  // Permute performs a full permutation of vector x using indices:
  4654  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4655  // The low 4 bits (values 0-15) of each element of indices is used
  4656  //
  4657  // Asm: VPERMW, CPU Feature: AVX512
  4658  func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
  4659  
  4660  // Permute performs a full permutation of vector x using indices:
  4661  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4662  // The low 5 bits (values 0-31) of each element of indices is used
  4663  //
  4664  // Asm: VPERMW, CPU Feature: AVX512
  4665  func (x Int16x32) Permute(indices Uint16x32) Int16x32
  4666  
  4667  // Permute performs a full permutation of vector x using indices:
  4668  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4669  // The low 5 bits (values 0-31) of each element of indices is used
  4670  //
  4671  // Asm: VPERMW, CPU Feature: AVX512
  4672  func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
  4673  
  4674  // Permute performs a full permutation of vector x using indices:
  4675  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4676  // The low 3 bits (values 0-7) of each element of indices is used
  4677  //
  4678  // Asm: VPERMPS, CPU Feature: AVX2
  4679  func (x Float32x8) Permute(indices Uint32x8) Float32x8
  4680  
  4681  // Permute performs a full permutation of vector x using indices:
  4682  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4683  // The low 3 bits (values 0-7) of each element of indices is used
  4684  //
  4685  // Asm: VPERMD, CPU Feature: AVX2
  4686  func (x Int32x8) Permute(indices Uint32x8) Int32x8
  4687  
  4688  // Permute performs a full permutation of vector x using indices:
  4689  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4690  // The low 3 bits (values 0-7) of each element of indices is used
  4691  //
  4692  // Asm: VPERMD, CPU Feature: AVX2
  4693  func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
  4694  
  4695  // Permute performs a full permutation of vector x using indices:
  4696  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4697  // The low 4 bits (values 0-15) of each element of indices is used
  4698  //
  4699  // Asm: VPERMPS, CPU Feature: AVX512
  4700  func (x Float32x16) Permute(indices Uint32x16) Float32x16
  4701  
  4702  // Permute performs a full permutation of vector x using indices:
  4703  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4704  // The low 4 bits (values 0-15) of each element of indices is used
  4705  //
  4706  // Asm: VPERMD, CPU Feature: AVX512
  4707  func (x Int32x16) Permute(indices Uint32x16) Int32x16
  4708  
  4709  // Permute performs a full permutation of vector x using indices:
  4710  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4711  // The low 4 bits (values 0-15) of each element of indices is used
  4712  //
  4713  // Asm: VPERMD, CPU Feature: AVX512
  4714  func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
  4715  
  4716  // Permute performs a full permutation of vector x using indices:
  4717  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4718  // The low 2 bits (values 0-3) of each element of indices is used
  4719  //
  4720  // Asm: VPERMPD, CPU Feature: AVX512
  4721  func (x Float64x4) Permute(indices Uint64x4) Float64x4
  4722  
  4723  // Permute performs a full permutation of vector x using indices:
  4724  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4725  // The low 2 bits (values 0-3) of each element of indices is used
  4726  //
  4727  // Asm: VPERMQ, CPU Feature: AVX512
  4728  func (x Int64x4) Permute(indices Uint64x4) Int64x4
  4729  
  4730  // Permute performs a full permutation of vector x using indices:
  4731  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4732  // The low 2 bits (values 0-3) of each element of indices is used
  4733  //
  4734  // Asm: VPERMQ, CPU Feature: AVX512
  4735  func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
  4736  
  4737  // Permute performs a full permutation of vector x using indices:
  4738  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4739  // The low 3 bits (values 0-7) of each element of indices is used
  4740  //
  4741  // Asm: VPERMPD, CPU Feature: AVX512
  4742  func (x Float64x8) Permute(indices Uint64x8) Float64x8
  4743  
  4744  // Permute performs a full permutation of vector x using indices:
  4745  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4746  // The low 3 bits (values 0-7) of each element of indices is used
  4747  //
  4748  // Asm: VPERMQ, CPU Feature: AVX512
  4749  func (x Int64x8) Permute(indices Uint64x8) Int64x8
  4750  
  4751  // Permute performs a full permutation of vector x using indices:
  4752  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4753  // The low 3 bits (values 0-7) of each element of indices is used
  4754  //
  4755  // Asm: VPERMQ, CPU Feature: AVX512
  4756  func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
  4757  
  4758  /* PermuteOrZero */
  4759  
  4760  // PermuteOrZero performs a full permutation of vector x using indices:
  4761  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4762  // The lower four bits of each byte-sized index in indices select an element from x,
  4763  // unless the index's sign bit is set in which case zero is used instead.
  4764  //
  4765  // Asm: VPSHUFB, CPU Feature: AVX
  4766  func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16
  4767  
  4768  // PermuteOrZero performs a full permutation of vector x using indices:
  4769  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4770  // The lower four bits of each byte-sized index in indices select an element from x,
  4771  // unless the index's sign bit is set in which case zero is used instead.
  4772  //
  4773  // Asm: VPSHUFB, CPU Feature: AVX
  4774  func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16
  4775  
  4776  /* PermuteOrZeroGrouped */
  4777  
  4778  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4779  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4780  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4781  // unless the index's sign bit is set in which case zero is used instead.
  4782  // Each group is of size 128-bit.
  4783  //
  4784  // Asm: VPSHUFB, CPU Feature: AVX2
  4785  func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32
  4786  
  4787  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4788  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4789  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4790  // unless the index's sign bit is set in which case zero is used instead.
  4791  // Each group is of size 128-bit.
  4792  //
  4793  // Asm: VPSHUFB, CPU Feature: AVX512
  4794  func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64
  4795  
  4796  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4797  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4798  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4799  // unless the index's sign bit is set in which case zero is used instead.
  4800  // Each group is of size 128-bit.
  4801  //
  4802  // Asm: VPSHUFB, CPU Feature: AVX2
  4803  func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32
  4804  
  4805  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4806  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4807  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4808  // unless the index's sign bit is set in which case zero is used instead.
  4809  // Each group is of size 128-bit.
  4810  //
  4811  // Asm: VPSHUFB, CPU Feature: AVX512
  4812  func (x Uint8x64) PermuteOrZeroGrouped(indices Int8x64) Uint8x64
  4813  
  4814  /* Reciprocal */
  4815  
  4816  // Reciprocal computes an approximate reciprocal of each element.
  4817  //
  4818  // Asm: VRCPPS, CPU Feature: AVX
  4819  func (x Float32x4) Reciprocal() Float32x4
  4820  
  4821  // Reciprocal computes an approximate reciprocal of each element.
  4822  //
  4823  // Asm: VRCPPS, CPU Feature: AVX
  4824  func (x Float32x8) Reciprocal() Float32x8
  4825  
  4826  // Reciprocal computes an approximate reciprocal of each element.
  4827  //
  4828  // Asm: VRCP14PS, CPU Feature: AVX512
  4829  func (x Float32x16) Reciprocal() Float32x16
  4830  
  4831  // Reciprocal computes an approximate reciprocal of each element.
  4832  //
  4833  // Asm: VRCP14PD, CPU Feature: AVX512
  4834  func (x Float64x2) Reciprocal() Float64x2
  4835  
  4836  // Reciprocal computes an approximate reciprocal of each element.
  4837  //
  4838  // Asm: VRCP14PD, CPU Feature: AVX512
  4839  func (x Float64x4) Reciprocal() Float64x4
  4840  
  4841  // Reciprocal computes an approximate reciprocal of each element.
  4842  //
  4843  // Asm: VRCP14PD, CPU Feature: AVX512
  4844  func (x Float64x8) Reciprocal() Float64x8
  4845  
  4846  /* ReciprocalSqrt */
  4847  
  4848  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4849  //
  4850  // Asm: VRSQRTPS, CPU Feature: AVX
  4851  func (x Float32x4) ReciprocalSqrt() Float32x4
  4852  
  4853  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4854  //
  4855  // Asm: VRSQRTPS, CPU Feature: AVX
  4856  func (x Float32x8) ReciprocalSqrt() Float32x8
  4857  
  4858  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4859  //
  4860  // Asm: VRSQRT14PS, CPU Feature: AVX512
  4861  func (x Float32x16) ReciprocalSqrt() Float32x16
  4862  
  4863  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4864  //
  4865  // Asm: VRSQRT14PD, CPU Feature: AVX512
  4866  func (x Float64x2) ReciprocalSqrt() Float64x2
  4867  
  4868  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4869  //
  4870  // Asm: VRSQRT14PD, CPU Feature: AVX512
  4871  func (x Float64x4) ReciprocalSqrt() Float64x4
  4872  
  4873  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4874  //
  4875  // Asm: VRSQRT14PD, CPU Feature: AVX512
  4876  func (x Float64x8) ReciprocalSqrt() Float64x8
  4877  
  4878  /* RotateAllLeft */
  4879  
  4880  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4881  //
  4882  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4883  //
  4884  // Asm: VPROLD, CPU Feature: AVX512
  4885  func (x Int32x4) RotateAllLeft(shift uint8) Int32x4
  4886  
  4887  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4888  //
  4889  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4890  //
  4891  // Asm: VPROLD, CPU Feature: AVX512
  4892  func (x Int32x8) RotateAllLeft(shift uint8) Int32x8
  4893  
  4894  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4895  //
  4896  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4897  //
  4898  // Asm: VPROLD, CPU Feature: AVX512
  4899  func (x Int32x16) RotateAllLeft(shift uint8) Int32x16
  4900  
  4901  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4902  //
  4903  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4904  //
  4905  // Asm: VPROLQ, CPU Feature: AVX512
  4906  func (x Int64x2) RotateAllLeft(shift uint8) Int64x2
  4907  
  4908  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4909  //
  4910  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4911  //
  4912  // Asm: VPROLQ, CPU Feature: AVX512
  4913  func (x Int64x4) RotateAllLeft(shift uint8) Int64x4
  4914  
  4915  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4916  //
  4917  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4918  //
  4919  // Asm: VPROLQ, CPU Feature: AVX512
  4920  func (x Int64x8) RotateAllLeft(shift uint8) Int64x8
  4921  
  4922  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4923  //
  4924  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4925  //
  4926  // Asm: VPROLD, CPU Feature: AVX512
  4927  func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4
  4928  
  4929  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4930  //
  4931  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4932  //
  4933  // Asm: VPROLD, CPU Feature: AVX512
  4934  func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8
  4935  
  4936  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4937  //
  4938  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4939  //
  4940  // Asm: VPROLD, CPU Feature: AVX512
  4941  func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16
  4942  
  4943  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4944  //
  4945  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4946  //
  4947  // Asm: VPROLQ, CPU Feature: AVX512
  4948  func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2
  4949  
  4950  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4951  //
  4952  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4953  //
  4954  // Asm: VPROLQ, CPU Feature: AVX512
  4955  func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
  4956  
  4957  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4958  //
  4959  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4960  //
  4961  // Asm: VPROLQ, CPU Feature: AVX512
  4962  func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
  4963  
  4964  /* RotateAllRight */
  4965  
  4966  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4967  //
  4968  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4969  //
  4970  // Asm: VPRORD, CPU Feature: AVX512
  4971  func (x Int32x4) RotateAllRight(shift uint8) Int32x4
  4972  
  4973  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4974  //
  4975  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4976  //
  4977  // Asm: VPRORD, CPU Feature: AVX512
  4978  func (x Int32x8) RotateAllRight(shift uint8) Int32x8
  4979  
  4980  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4981  //
  4982  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4983  //
  4984  // Asm: VPRORD, CPU Feature: AVX512
  4985  func (x Int32x16) RotateAllRight(shift uint8) Int32x16
  4986  
  4987  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4988  //
  4989  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4990  //
  4991  // Asm: VPRORQ, CPU Feature: AVX512
  4992  func (x Int64x2) RotateAllRight(shift uint8) Int64x2
  4993  
  4994  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4995  //
  4996  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4997  //
  4998  // Asm: VPRORQ, CPU Feature: AVX512
  4999  func (x Int64x4) RotateAllRight(shift uint8) Int64x4
  5000  
  5001  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5002  //
  5003  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5004  //
  5005  // Asm: VPRORQ, CPU Feature: AVX512
  5006  func (x Int64x8) RotateAllRight(shift uint8) Int64x8
  5007  
  5008  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5009  //
  5010  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5011  //
  5012  // Asm: VPRORD, CPU Feature: AVX512
  5013  func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4
  5014  
  5015  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5016  //
  5017  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5018  //
  5019  // Asm: VPRORD, CPU Feature: AVX512
  5020  func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8
  5021  
  5022  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5023  //
  5024  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5025  //
  5026  // Asm: VPRORD, CPU Feature: AVX512
  5027  func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
  5028  
  5029  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5030  //
  5031  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5032  //
  5033  // Asm: VPRORQ, CPU Feature: AVX512
  5034  func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
  5035  
  5036  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5037  //
  5038  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5039  //
  5040  // Asm: VPRORQ, CPU Feature: AVX512
  5041  func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
  5042  
  5043  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  5044  //
  5045  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5046  //
  5047  // Asm: VPRORQ, CPU Feature: AVX512
  5048  func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
  5049  
  5050  /* RotateLeft */
  5051  
  5052  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5053  //
  5054  // Asm: VPROLVD, CPU Feature: AVX512
  5055  func (x Int32x4) RotateLeft(y Int32x4) Int32x4
  5056  
  5057  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5058  //
  5059  // Asm: VPROLVD, CPU Feature: AVX512
  5060  func (x Int32x8) RotateLeft(y Int32x8) Int32x8
  5061  
  5062  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5063  //
  5064  // Asm: VPROLVD, CPU Feature: AVX512
  5065  func (x Int32x16) RotateLeft(y Int32x16) Int32x16
  5066  
  5067  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5068  //
  5069  // Asm: VPROLVQ, CPU Feature: AVX512
  5070  func (x Int64x2) RotateLeft(y Int64x2) Int64x2
  5071  
  5072  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5073  //
  5074  // Asm: VPROLVQ, CPU Feature: AVX512
  5075  func (x Int64x4) RotateLeft(y Int64x4) Int64x4
  5076  
  5077  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5078  //
  5079  // Asm: VPROLVQ, CPU Feature: AVX512
  5080  func (x Int64x8) RotateLeft(y Int64x8) Int64x8
  5081  
  5082  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5083  //
  5084  // Asm: VPROLVD, CPU Feature: AVX512
  5085  func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4
  5086  
  5087  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5088  //
  5089  // Asm: VPROLVD, CPU Feature: AVX512
  5090  func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8
  5091  
  5092  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5093  //
  5094  // Asm: VPROLVD, CPU Feature: AVX512
  5095  func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16
  5096  
  5097  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5098  //
  5099  // Asm: VPROLVQ, CPU Feature: AVX512
  5100  func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2
  5101  
  5102  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5103  //
  5104  // Asm: VPROLVQ, CPU Feature: AVX512
  5105  func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
  5106  
  5107  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  5108  //
  5109  // Asm: VPROLVQ, CPU Feature: AVX512
  5110  func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
  5111  
  5112  /* RotateRight */
  5113  
  5114  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5115  //
  5116  // Asm: VPRORVD, CPU Feature: AVX512
  5117  func (x Int32x4) RotateRight(y Int32x4) Int32x4
  5118  
  5119  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5120  //
  5121  // Asm: VPRORVD, CPU Feature: AVX512
  5122  func (x Int32x8) RotateRight(y Int32x8) Int32x8
  5123  
  5124  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5125  //
  5126  // Asm: VPRORVD, CPU Feature: AVX512
  5127  func (x Int32x16) RotateRight(y Int32x16) Int32x16
  5128  
  5129  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5130  //
  5131  // Asm: VPRORVQ, CPU Feature: AVX512
  5132  func (x Int64x2) RotateRight(y Int64x2) Int64x2
  5133  
  5134  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5135  //
  5136  // Asm: VPRORVQ, CPU Feature: AVX512
  5137  func (x Int64x4) RotateRight(y Int64x4) Int64x4
  5138  
  5139  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5140  //
  5141  // Asm: VPRORVQ, CPU Feature: AVX512
  5142  func (x Int64x8) RotateRight(y Int64x8) Int64x8
  5143  
  5144  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5145  //
  5146  // Asm: VPRORVD, CPU Feature: AVX512
  5147  func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4
  5148  
  5149  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5150  //
  5151  // Asm: VPRORVD, CPU Feature: AVX512
  5152  func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8
  5153  
  5154  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5155  //
  5156  // Asm: VPRORVD, CPU Feature: AVX512
  5157  func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16
  5158  
  5159  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5160  //
  5161  // Asm: VPRORVQ, CPU Feature: AVX512
  5162  func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2
  5163  
  5164  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5165  //
  5166  // Asm: VPRORVQ, CPU Feature: AVX512
  5167  func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
  5168  
  5169  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  5170  //
  5171  // Asm: VPRORVQ, CPU Feature: AVX512
  5172  func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
  5173  
  5174  /* RoundToEven */
  5175  
  5176  // RoundToEven rounds elements to the nearest integer.
  5177  //
  5178  // Asm: VROUNDPS, CPU Feature: AVX
  5179  func (x Float32x4) RoundToEven() Float32x4
  5180  
  5181  // RoundToEven rounds elements to the nearest integer.
  5182  //
  5183  // Asm: VROUNDPS, CPU Feature: AVX
  5184  func (x Float32x8) RoundToEven() Float32x8
  5185  
  5186  // RoundToEven rounds elements to the nearest integer.
  5187  //
  5188  // Asm: VROUNDPD, CPU Feature: AVX
  5189  func (x Float64x2) RoundToEven() Float64x2
  5190  
  5191  // RoundToEven rounds elements to the nearest integer.
  5192  //
  5193  // Asm: VROUNDPD, CPU Feature: AVX
  5194  func (x Float64x4) RoundToEven() Float64x4
  5195  
  5196  /* RoundToEvenScaled */
  5197  
  5198  // RoundToEvenScaled rounds elements with specified precision.
  5199  //
  5200  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5201  //
  5202  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  5203  func (x Float32x4) RoundToEvenScaled(prec uint8) Float32x4
  5204  
  5205  // RoundToEvenScaled rounds elements with specified precision.
  5206  //
  5207  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5208  //
  5209  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  5210  func (x Float32x8) RoundToEvenScaled(prec uint8) Float32x8
  5211  
  5212  // RoundToEvenScaled rounds elements with specified precision.
  5213  //
  5214  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5215  //
  5216  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  5217  func (x Float32x16) RoundToEvenScaled(prec uint8) Float32x16
  5218  
  5219  // RoundToEvenScaled rounds elements with specified precision.
  5220  //
  5221  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5222  //
  5223  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  5224  func (x Float64x2) RoundToEvenScaled(prec uint8) Float64x2
  5225  
  5226  // RoundToEvenScaled rounds elements with specified precision.
  5227  //
  5228  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5229  //
  5230  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  5231  func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
  5232  
  5233  // RoundToEvenScaled rounds elements with specified precision.
  5234  //
  5235  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5236  //
  5237  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  5238  func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
  5239  
  5240  /* RoundToEvenScaledResidue */
  5241  
  5242  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  5243  //
  5244  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5245  //
  5246  // Asm: VREDUCEPS, CPU Feature: AVX512
  5247  func (x Float32x4) RoundToEvenScaledResidue(prec uint8) Float32x4
  5248  
  5249  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  5250  //
  5251  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5252  //
  5253  // Asm: VREDUCEPS, CPU Feature: AVX512
  5254  func (x Float32x8) RoundToEvenScaledResidue(prec uint8) Float32x8
  5255  
  5256  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  5257  //
  5258  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5259  //
  5260  // Asm: VREDUCEPS, CPU Feature: AVX512
  5261  func (x Float32x16) RoundToEvenScaledResidue(prec uint8) Float32x16
  5262  
  5263  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  5264  //
  5265  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5266  //
  5267  // Asm: VREDUCEPD, CPU Feature: AVX512
  5268  func (x Float64x2) RoundToEvenScaledResidue(prec uint8) Float64x2
  5269  
  5270  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  5271  //
  5272  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5273  //
  5274  // Asm: VREDUCEPD, CPU Feature: AVX512
  5275  func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
  5276  
  5277  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  5278  //
  5279  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5280  //
  5281  // Asm: VREDUCEPD, CPU Feature: AVX512
  5282  func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
  5283  
  5284  /* SHA1FourRounds */
  5285  
  5286  // SHA1FourRounds performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
  5287  // x contains the state variables a, b, c and d from upper to lower order.
  5288  // y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
  5289  // result = the state variables a', b', c', d' updated after 4 rounds.
  5290  // constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
  5291  //
  5292  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5293  //
  5294  // Asm: SHA1RNDS4, CPU Feature: SHA
  5295  func (x Uint32x4) SHA1FourRounds(constant uint8, y Uint32x4) Uint32x4
  5296  
  5297  /* SHA1Message1 */
  5298  
  5299  // SHA1Message1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
  5300  // x = {W3, W2, W1, W0}
  5301  // y = {0, 0, W5, W4}
  5302  // result = {W3^W5, W2^W4, W1^W3, W0^W2}.
  5303  //
  5304  // Asm: SHA1MSG1, CPU Feature: SHA
  5305  func (x Uint32x4) SHA1Message1(y Uint32x4) Uint32x4
  5306  
  5307  /* SHA1Message2 */
  5308  
  5309  // SHA1Message2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
  5310  // x = result of 2.
  5311  // y = {W15, W14, W13}
  5312  // result = {W19, W18, W17, W16}
  5313  //
  5314  // Asm: SHA1MSG2, CPU Feature: SHA
  5315  func (x Uint32x4) SHA1Message2(y Uint32x4) Uint32x4
  5316  
  5317  /* SHA1NextE */
  5318  
  5319  // SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
  5320  // x contains the state variable a (before the 4 rounds), placed in the upper element.
  5321  // y is the elements of W array for next 4 rounds from upper to lower order.
  5322  // result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
  5323  // from upper to lower order.
  5324  // For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
  5325  // for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
  5326  // computation of the value of e'.)
  5327  //
  5328  // Asm: SHA1NEXTE, CPU Feature: SHA
  5329  func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4
  5330  
  5331  /* SHA256Message1 */
  5332  
  5333  // SHA256Message1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
  5334  // x = {W0, W1, W2, W3}
  5335  // y = {W4, 0, 0, 0}
  5336  // result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
  5337  //
  5338  // Asm: SHA256MSG1, CPU Feature: SHA
  5339  func (x Uint32x4) SHA256Message1(y Uint32x4) Uint32x4
  5340  
  5341  /* SHA256Message2 */
  5342  
  5343  // SHA256Message2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
  5344  // x = result of 2
  5345  // y = {0, 0, W14, W15}
  5346  // result = {W16, W17, W18, W19}
  5347  //
  5348  // Asm: SHA256MSG2, CPU Feature: SHA
  5349  func (x Uint32x4) SHA256Message2(y Uint32x4) Uint32x4
  5350  
  5351  /* SHA256TwoRounds */
  5352  
  5353  // SHA256TwoRounds does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
  5354  // x = {h, g, d, c}
  5355  // y = {f, e, b, a}
  5356  // z = {W0+K0, W1+K1}
  5357  // result = {f', e', b', a'}
  5358  // The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
  5359  // the corresponding element of the W array to make the input data z.
  5360  // The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
  5361  // y (the state variables a, b, e, f before the 2 rounds).
  5362  //
  5363  // Asm: SHA256RNDS2, CPU Feature: SHA
  5364  func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4
  5365  
  5366  /* SaturateToInt8 */
  5367  
  5368  // SaturateToInt8 converts element values to int8.
  5369  // Conversion is done with saturation on the vector elements.
  5370  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5371  //
  5372  // Asm: VPMOVSWB, CPU Feature: AVX512
  5373  func (x Int16x8) SaturateToInt8() Int8x16
  5374  
  5375  // SaturateToInt8 converts element values to int8.
  5376  // Conversion is done with saturation on the vector elements.
  5377  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5378  //
  5379  // Asm: VPMOVSWB, CPU Feature: AVX512
  5380  func (x Int16x16) SaturateToInt8() Int8x16
  5381  
  5382  // SaturateToInt8 converts element values to int8.
  5383  // Conversion is done with saturation on the vector elements.
  5384  //
  5385  // Asm: VPMOVSWB, CPU Feature: AVX512
  5386  func (x Int16x32) SaturateToInt8() Int8x32
  5387  
  5388  // SaturateToInt8 converts element values to int8.
  5389  // Conversion is done with saturation on the vector elements.
  5390  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5391  //
  5392  // Asm: VPMOVSDB, CPU Feature: AVX512
  5393  func (x Int32x4) SaturateToInt8() Int8x16
  5394  
  5395  // SaturateToInt8 converts element values to int8.
  5396  // Conversion is done with saturation on the vector elements.
  5397  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5398  //
  5399  // Asm: VPMOVSDB, CPU Feature: AVX512
  5400  func (x Int32x8) SaturateToInt8() Int8x16
  5401  
  5402  // SaturateToInt8 converts element values to int8.
  5403  // Conversion is done with saturation on the vector elements.
  5404  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5405  //
  5406  // Asm: VPMOVSDB, CPU Feature: AVX512
  5407  func (x Int32x16) SaturateToInt8() Int8x16
  5408  
  5409  // SaturateToInt8 converts element values to int8.
  5410  // Conversion is done with saturation on the vector elements.
  5411  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5412  //
  5413  // Asm: VPMOVSQB, CPU Feature: AVX512
  5414  func (x Int64x2) SaturateToInt8() Int8x16
  5415  
  5416  // SaturateToInt8 converts element values to int8.
  5417  // Conversion is done with saturation on the vector elements.
  5418  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5419  //
  5420  // Asm: VPMOVSQB, CPU Feature: AVX512
  5421  func (x Int64x4) SaturateToInt8() Int8x16
  5422  
  5423  // SaturateToInt8 converts element values to int8.
  5424  // Conversion is done with saturation on the vector elements.
  5425  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5426  //
  5427  // Asm: VPMOVSQB, CPU Feature: AVX512
  5428  func (x Int64x8) SaturateToInt8() Int8x16
  5429  
  5430  /* SaturateToInt16 */
  5431  
  5432  // SaturateToInt16 converts element values to int16.
  5433  // Conversion is done with saturation on the vector elements.
  5434  //
  5435  // Asm: VPMOVSDW, CPU Feature: AVX512
  5436  func (x Int32x4) SaturateToInt16() Int16x8
  5437  
  5438  // SaturateToInt16 converts element values to int16.
  5439  // Conversion is done with saturation on the vector elements.
  5440  //
  5441  // Asm: VPMOVSDW, CPU Feature: AVX512
  5442  func (x Int32x8) SaturateToInt16() Int16x8
  5443  
  5444  // SaturateToInt16 converts element values to int16.
  5445  // Conversion is done with saturation on the vector elements.
  5446  //
  5447  // Asm: VPMOVSDW, CPU Feature: AVX512
  5448  func (x Int32x16) SaturateToInt16() Int16x16
  5449  
  5450  // SaturateToInt16 converts element values to int16.
  5451  // Conversion is done with saturation on the vector elements.
  5452  //
  5453  // Asm: VPMOVSQW, CPU Feature: AVX512
  5454  func (x Int64x2) SaturateToInt16() Int16x8
  5455  
  5456  // SaturateToInt16 converts element values to int16.
  5457  // Conversion is done with saturation on the vector elements.
  5458  //
  5459  // Asm: VPMOVSQW, CPU Feature: AVX512
  5460  func (x Int64x4) SaturateToInt16() Int16x8
  5461  
  5462  // SaturateToInt16 converts element values to int16.
  5463  // Conversion is done with saturation on the vector elements.
  5464  //
  5465  // Asm: VPMOVSQW, CPU Feature: AVX512
  5466  func (x Int64x8) SaturateToInt16() Int16x8
  5467  
  5468  /* SaturateToInt16Concat */
  5469  
  5470  // SaturateToInt16Concat converts element values to int16.
  5471  // With each 128-bit as a group:
  5472  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5473  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5474  // Conversion is done with saturation on the vector elements.
  5475  //
  5476  // Asm: VPACKSSDW, CPU Feature: AVX
  5477  func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8
  5478  
  5479  // SaturateToInt16Concat converts element values to int16.
  5480  // With each 128-bit as a group:
  5481  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5482  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5483  // Conversion is done with saturation on the vector elements.
  5484  //
  5485  // Asm: VPACKSSDW, CPU Feature: AVX2
  5486  func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16
  5487  
  5488  // SaturateToInt16Concat converts element values to int16.
  5489  // With each 128-bit as a group:
  5490  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5491  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5492  // Conversion is done with saturation on the vector elements.
  5493  //
  5494  // Asm: VPACKSSDW, CPU Feature: AVX512
  5495  func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32
  5496  
  5497  /* SaturateToInt32 */
  5498  
  5499  // SaturateToInt32 converts element values to int32.
  5500  // Conversion is done with saturation on the vector elements.
  5501  //
  5502  // Asm: VPMOVSQD, CPU Feature: AVX512
  5503  func (x Int64x2) SaturateToInt32() Int32x4
  5504  
  5505  // SaturateToInt32 converts element values to int32.
  5506  // Conversion is done with saturation on the vector elements.
  5507  //
  5508  // Asm: VPMOVSQD, CPU Feature: AVX512
  5509  func (x Int64x4) SaturateToInt32() Int32x4
  5510  
  5511  // SaturateToInt32 converts element values to int32.
  5512  // Conversion is done with saturation on the vector elements.
  5513  //
  5514  // Asm: VPMOVSQD, CPU Feature: AVX512
  5515  func (x Int64x8) SaturateToInt32() Int32x8
  5516  
  5517  /* SaturateToUint8 */
  5518  
  5519  // SaturateToUint8 converts element values to uint8.
  5520  // Conversion is done with saturation on the vector elements.
  5521  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5522  //
  5523  // Asm: VPMOVSWB, CPU Feature: AVX512
  5524  func (x Int16x8) SaturateToUint8() Int8x16
  5525  
  5526  // SaturateToUint8 converts element values to uint8.
  5527  // Conversion is done with saturation on the vector elements.
  5528  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5529  //
  5530  // Asm: VPMOVSWB, CPU Feature: AVX512
  5531  func (x Int16x16) SaturateToUint8() Int8x16
  5532  
  5533  // SaturateToUint8 converts element values to uint8.
  5534  // Conversion is done with saturation on the vector elements.
  5535  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5536  //
  5537  // Asm: VPMOVSDB, CPU Feature: AVX512
  5538  func (x Int32x4) SaturateToUint8() Int8x16
  5539  
  5540  // SaturateToUint8 converts element values to uint8.
  5541  // Conversion is done with saturation on the vector elements.
  5542  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5543  //
  5544  // Asm: VPMOVSDB, CPU Feature: AVX512
  5545  func (x Int32x8) SaturateToUint8() Int8x16
  5546  
  5547  // SaturateToUint8 converts element values to uint8.
  5548  // Conversion is done with saturation on the vector elements.
  5549  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5550  //
  5551  // Asm: VPMOVSDB, CPU Feature: AVX512
  5552  func (x Int32x16) SaturateToUint8() Int8x16
  5553  
  5554  // SaturateToUint8 converts element values to uint8.
  5555  // Conversion is done with saturation on the vector elements.
  5556  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5557  //
  5558  // Asm: VPMOVSQB, CPU Feature: AVX512
  5559  func (x Int64x2) SaturateToUint8() Int8x16
  5560  
  5561  // SaturateToUint8 converts element values to uint8.
  5562  // Conversion is done with saturation on the vector elements.
  5563  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5564  //
  5565  // Asm: VPMOVSQB, CPU Feature: AVX512
  5566  func (x Int64x4) SaturateToUint8() Int8x16
  5567  
  5568  // SaturateToUint8 converts element values to uint8.
  5569  // Conversion is done with saturation on the vector elements.
  5570  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5571  //
  5572  // Asm: VPMOVSQB, CPU Feature: AVX512
  5573  func (x Int64x8) SaturateToUint8() Int8x16
  5574  
  5575  // SaturateToUint8 converts element values to uint8.
  5576  // Conversion is done with saturation on the vector elements.
  5577  //
  5578  // Asm: VPMOVUSWB, CPU Feature: AVX512
  5579  func (x Uint16x32) SaturateToUint8() Uint8x32
  5580  
  5581  /* SaturateToUint16 */
  5582  
  5583  // SaturateToUint16 converts element values to uint16.
  5584  // Conversion is done with saturation on the vector elements.
  5585  //
  5586  // Asm: VPMOVUSDW, CPU Feature: AVX512
  5587  func (x Uint32x4) SaturateToUint16() Uint16x8
  5588  
  5589  // SaturateToUint16 converts element values to uint16.
  5590  // Conversion is done with saturation on the vector elements.
  5591  //
  5592  // Asm: VPMOVUSDW, CPU Feature: AVX512
  5593  func (x Uint32x8) SaturateToUint16() Uint16x8
  5594  
  5595  // SaturateToUint16 converts element values to uint16.
  5596  // Conversion is done with saturation on the vector elements.
  5597  //
  5598  // Asm: VPMOVUSDW, CPU Feature: AVX512
  5599  func (x Uint32x16) SaturateToUint16() Uint16x16
  5600  
  5601  // SaturateToUint16 converts element values to uint16.
  5602  // Conversion is done with saturation on the vector elements.
  5603  //
  5604  // Asm: VPMOVUSQW, CPU Feature: AVX512
  5605  func (x Uint64x2) SaturateToUint16() Uint16x8
  5606  
  5607  // SaturateToUint16 converts element values to uint16.
  5608  // Conversion is done with saturation on the vector elements.
  5609  //
  5610  // Asm: VPMOVUSQW, CPU Feature: AVX512
  5611  func (x Uint64x4) SaturateToUint16() Uint16x8
  5612  
  5613  // SaturateToUint16 converts element values to uint16.
  5614  // Conversion is done with saturation on the vector elements.
  5615  //
  5616  // Asm: VPMOVUSQW, CPU Feature: AVX512
  5617  func (x Uint64x8) SaturateToUint16() Uint16x8
  5618  
  5619  /* SaturateToUint16Concat */
  5620  
  5621  // SaturateToUint16Concat converts element values to uint16.
  5622  // With each 128-bit as a group:
  5623  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5624  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5625  // Conversion is done with saturation on the vector elements.
  5626  //
  5627  // Asm: VPACKUSDW, CPU Feature: AVX
  5628  func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8
  5629  
  5630  // SaturateToUint16Concat converts element values to uint16.
  5631  // With each 128-bit as a group:
  5632  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5633  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5634  // Conversion is done with saturation on the vector elements.
  5635  //
  5636  // Asm: VPACKUSDW, CPU Feature: AVX2
  5637  func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16
  5638  
  5639  // SaturateToUint16Concat converts element values to uint16.
  5640  // With each 128-bit as a group:
  5641  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5642  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5643  // Conversion is done with saturation on the vector elements.
  5644  //
  5645  // Asm: VPACKUSDW, CPU Feature: AVX512
  5646  func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32
  5647  
  5648  /* SaturateToUint32 */
  5649  
  5650  // SaturateToUint32 converts element values to uint32.
  5651  // Conversion is done with saturation on the vector elements.
  5652  //
  5653  // Asm: VPMOVUSQD, CPU Feature: AVX512
  5654  func (x Uint64x2) SaturateToUint32() Uint32x4
  5655  
  5656  // SaturateToUint32 converts element values to uint32.
  5657  // Conversion is done with saturation on the vector elements.
  5658  //
  5659  // Asm: VPMOVUSQD, CPU Feature: AVX512
  5660  func (x Uint64x4) SaturateToUint32() Uint32x4
  5661  
  5662  // SaturateToUint32 converts element values to uint32.
  5663  // Conversion is done with saturation on the vector elements.
  5664  //
  5665  // Asm: VPMOVUSQD, CPU Feature: AVX512
  5666  func (x Uint64x8) SaturateToUint32() Uint32x8
  5667  
  5668  /* Scale */
  5669  
  5670  // Scale multiplies elements by a power of 2.
  5671  //
  5672  // Asm: VSCALEFPS, CPU Feature: AVX512
  5673  func (x Float32x4) Scale(y Float32x4) Float32x4
  5674  
  5675  // Scale multiplies elements by a power of 2.
  5676  //
  5677  // Asm: VSCALEFPS, CPU Feature: AVX512
  5678  func (x Float32x8) Scale(y Float32x8) Float32x8
  5679  
  5680  // Scale multiplies elements by a power of 2.
  5681  //
  5682  // Asm: VSCALEFPS, CPU Feature: AVX512
  5683  func (x Float32x16) Scale(y Float32x16) Float32x16
  5684  
  5685  // Scale multiplies elements by a power of 2.
  5686  //
  5687  // Asm: VSCALEFPD, CPU Feature: AVX512
  5688  func (x Float64x2) Scale(y Float64x2) Float64x2
  5689  
  5690  // Scale multiplies elements by a power of 2.
  5691  //
  5692  // Asm: VSCALEFPD, CPU Feature: AVX512
  5693  func (x Float64x4) Scale(y Float64x4) Float64x4
  5694  
  5695  // Scale multiplies elements by a power of 2.
  5696  //
  5697  // Asm: VSCALEFPD, CPU Feature: AVX512
  5698  func (x Float64x8) Scale(y Float64x8) Float64x8
  5699  
  5700  /* Select128FromPair */
  5701  
  5702  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5703  // 128-bit elements, and returns a 256-bit result formed by
  5704  // concatenating the two elements specified by lo and hi.
  5705  // For example,
  5706  //
  5707  //	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
  5708  //
  5709  // returns {70, 71, 72, 73, 40, 41, 42, 43}.
  5710  //
  5711  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5712  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5713  //
  5714  // Asm: VPERM2F128, CPU Feature: AVX
  5715  func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8
  5716  
  5717  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5718  // 128-bit elements, and returns a 256-bit result formed by
  5719  // concatenating the two elements specified by lo and hi.
  5720  // For example,
  5721  //
  5722  //	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
  5723  //
  5724  // returns {70, 71, 40, 41}.
  5725  //
  5726  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5727  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5728  //
  5729  // Asm: VPERM2F128, CPU Feature: AVX
  5730  func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4
  5731  
  5732  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5733  // 128-bit elements, and returns a 256-bit result formed by
  5734  // concatenating the two elements specified by lo and hi.
  5735  // For example,
  5736  //
  5737  //	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
  5738  //	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
  5739  //
  5740  // returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
  5741  //
  5742  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5743  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5744  //
  5745  // Asm: VPERM2I128, CPU Feature: AVX2
  5746  func (x Int8x32) Select128FromPair(lo, hi uint8, y Int8x32) Int8x32
  5747  
  5748  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5749  // 128-bit elements, and returns a 256-bit result formed by
  5750  // concatenating the two elements specified by lo and hi.
  5751  // For example,
  5752  //
  5753  //	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
  5754  //	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
  5755  //
  5756  // returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
  5757  //
  5758  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5759  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5760  //
  5761  // Asm: VPERM2I128, CPU Feature: AVX2
  5762  func (x Int16x16) Select128FromPair(lo, hi uint8, y Int16x16) Int16x16
  5763  
  5764  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5765  // 128-bit elements, and returns a 256-bit result formed by
  5766  // concatenating the two elements specified by lo and hi.
  5767  // For example,
  5768  //
  5769  //	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
  5770  //
  5771  // returns {70, 71, 72, 73, 40, 41, 42, 43}.
  5772  //
  5773  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5774  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5775  //
  5776  // Asm: VPERM2I128, CPU Feature: AVX2
  5777  func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8
  5778  
  5779  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5780  // 128-bit elements, and returns a 256-bit result formed by
  5781  // concatenating the two elements specified by lo and hi.
  5782  // For example,
  5783  //
  5784  //	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
  5785  //
  5786  // returns {70, 71, 40, 41}.
  5787  //
  5788  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5789  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5790  //
  5791  // Asm: VPERM2I128, CPU Feature: AVX2
  5792  func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4
  5793  
  5794  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5795  // 128-bit elements, and returns a 256-bit result formed by
  5796  // concatenating the two elements specified by lo and hi.
  5797  // For example,
  5798  //
  5799  //	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
  5800  //	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
  5801  //
  5802  // returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
  5803  //
  5804  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5805  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5806  //
  5807  // Asm: VPERM2I128, CPU Feature: AVX2
  5808  func (x Uint8x32) Select128FromPair(lo, hi uint8, y Uint8x32) Uint8x32
  5809  
  5810  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5811  // 128-bit elements, and returns a 256-bit result formed by
  5812  // concatenating the two elements specified by lo and hi.
  5813  // For example,
  5814  //
  5815  //	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
  5816  //	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
  5817  //
  5818  // returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
  5819  //
  5820  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5821  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5822  //
  5823  // Asm: VPERM2I128, CPU Feature: AVX2
  5824  func (x Uint16x16) Select128FromPair(lo, hi uint8, y Uint16x16) Uint16x16
  5825  
  5826  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5827  // 128-bit elements, and returns a 256-bit result formed by
  5828  // concatenating the two elements specified by lo and hi.
  5829  // For example,
  5830  //
  5831  //	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
  5832  //
  5833  // returns {70, 71, 72, 73, 40, 41, 42, 43}.
  5834  //
  5835  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5836  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5837  //
  5838  // Asm: VPERM2I128, CPU Feature: AVX2
  5839  func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8
  5840  
  5841  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5842  // 128-bit elements, and returns a 256-bit result formed by
  5843  // concatenating the two elements specified by lo and hi.
  5844  // For example,
  5845  //
  5846  //	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
  5847  //
  5848  // returns {70, 71, 40, 41}.
  5849  //
  5850  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5851  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5852  //
  5853  // Asm: VPERM2I128, CPU Feature: AVX2
  5854  func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4
  5855  
  5856  /* SetElem */
  5857  
  5858  // SetElem sets a single constant-indexed element's value.
  5859  //
  5860  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5861  //
  5862  // Asm: VPINSRD, CPU Feature: AVX
  5863  func (x Float32x4) SetElem(index uint8, y float32) Float32x4
  5864  
  5865  // SetElem sets a single constant-indexed element's value.
  5866  //
  5867  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5868  //
  5869  // Asm: VPINSRQ, CPU Feature: AVX
  5870  func (x Float64x2) SetElem(index uint8, y float64) Float64x2
  5871  
  5872  // SetElem sets a single constant-indexed element's value.
  5873  //
  5874  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5875  //
  5876  // Asm: VPINSRB, CPU Feature: AVX
  5877  func (x Int8x16) SetElem(index uint8, y int8) Int8x16
  5878  
  5879  // SetElem sets a single constant-indexed element's value.
  5880  //
  5881  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5882  //
  5883  // Asm: VPINSRW, CPU Feature: AVX
  5884  func (x Int16x8) SetElem(index uint8, y int16) Int16x8
  5885  
  5886  // SetElem sets a single constant-indexed element's value.
  5887  //
  5888  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5889  //
  5890  // Asm: VPINSRD, CPU Feature: AVX
  5891  func (x Int32x4) SetElem(index uint8, y int32) Int32x4
  5892  
  5893  // SetElem sets a single constant-indexed element's value.
  5894  //
  5895  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5896  //
  5897  // Asm: VPINSRQ, CPU Feature: AVX
  5898  func (x Int64x2) SetElem(index uint8, y int64) Int64x2
  5899  
  5900  // SetElem sets a single constant-indexed element's value.
  5901  //
  5902  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5903  //
  5904  // Asm: VPINSRB, CPU Feature: AVX
  5905  func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16
  5906  
  5907  // SetElem sets a single constant-indexed element's value.
  5908  //
  5909  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5910  //
  5911  // Asm: VPINSRW, CPU Feature: AVX
  5912  func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8
  5913  
  5914  // SetElem sets a single constant-indexed element's value.
  5915  //
  5916  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5917  //
  5918  // Asm: VPINSRD, CPU Feature: AVX
  5919  func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4
  5920  
  5921  // SetElem sets a single constant-indexed element's value.
  5922  //
  5923  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5924  //
  5925  // Asm: VPINSRQ, CPU Feature: AVX
  5926  func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2
  5927  
  5928  /* SetHi */
  5929  
  5930  // SetHi returns x with its upper half set to y.
  5931  //
  5932  // Asm: VINSERTF128, CPU Feature: AVX
  5933  func (x Float32x8) SetHi(y Float32x4) Float32x8
  5934  
  5935  // SetHi returns x with its upper half set to y.
  5936  //
  5937  // Asm: VINSERTF64X4, CPU Feature: AVX512
  5938  func (x Float32x16) SetHi(y Float32x8) Float32x16
  5939  
  5940  // SetHi returns x with its upper half set to y.
  5941  //
  5942  // Asm: VINSERTF128, CPU Feature: AVX
  5943  func (x Float64x4) SetHi(y Float64x2) Float64x4
  5944  
  5945  // SetHi returns x with its upper half set to y.
  5946  //
  5947  // Asm: VINSERTF64X4, CPU Feature: AVX512
  5948  func (x Float64x8) SetHi(y Float64x4) Float64x8
  5949  
  5950  // SetHi returns x with its upper half set to y.
  5951  //
  5952  // Asm: VINSERTI128, CPU Feature: AVX2
  5953  func (x Int8x32) SetHi(y Int8x16) Int8x32
  5954  
  5955  // SetHi returns x with its upper half set to y.
  5956  //
  5957  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5958  func (x Int8x64) SetHi(y Int8x32) Int8x64
  5959  
  5960  // SetHi returns x with its upper half set to y.
  5961  //
  5962  // Asm: VINSERTI128, CPU Feature: AVX2
  5963  func (x Int16x16) SetHi(y Int16x8) Int16x16
  5964  
  5965  // SetHi returns x with its upper half set to y.
  5966  //
  5967  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5968  func (x Int16x32) SetHi(y Int16x16) Int16x32
  5969  
  5970  // SetHi returns x with its upper half set to y.
  5971  //
  5972  // Asm: VINSERTI128, CPU Feature: AVX2
  5973  func (x Int32x8) SetHi(y Int32x4) Int32x8
  5974  
  5975  // SetHi returns x with its upper half set to y.
  5976  //
  5977  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5978  func (x Int32x16) SetHi(y Int32x8) Int32x16
  5979  
  5980  // SetHi returns x with its upper half set to y.
  5981  //
  5982  // Asm: VINSERTI128, CPU Feature: AVX2
  5983  func (x Int64x4) SetHi(y Int64x2) Int64x4
  5984  
  5985  // SetHi returns x with its upper half set to y.
  5986  //
  5987  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5988  func (x Int64x8) SetHi(y Int64x4) Int64x8
  5989  
  5990  // SetHi returns x with its upper half set to y.
  5991  //
  5992  // Asm: VINSERTI128, CPU Feature: AVX2
  5993  func (x Uint8x32) SetHi(y Uint8x16) Uint8x32
  5994  
  5995  // SetHi returns x with its upper half set to y.
  5996  //
  5997  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5998  func (x Uint8x64) SetHi(y Uint8x32) Uint8x64
  5999  
  6000  // SetHi returns x with its upper half set to y.
  6001  //
  6002  // Asm: VINSERTI128, CPU Feature: AVX2
  6003  func (x Uint16x16) SetHi(y Uint16x8) Uint16x16
  6004  
  6005  // SetHi returns x with its upper half set to y.
  6006  //
  6007  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6008  func (x Uint16x32) SetHi(y Uint16x16) Uint16x32
  6009  
  6010  // SetHi returns x with its upper half set to y.
  6011  //
  6012  // Asm: VINSERTI128, CPU Feature: AVX2
  6013  func (x Uint32x8) SetHi(y Uint32x4) Uint32x8
  6014  
  6015  // SetHi returns x with its upper half set to y.
  6016  //
  6017  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6018  func (x Uint32x16) SetHi(y Uint32x8) Uint32x16
  6019  
  6020  // SetHi returns x with its upper half set to y.
  6021  //
  6022  // Asm: VINSERTI128, CPU Feature: AVX2
  6023  func (x Uint64x4) SetHi(y Uint64x2) Uint64x4
  6024  
  6025  // SetHi returns x with its upper half set to y.
  6026  //
  6027  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6028  func (x Uint64x8) SetHi(y Uint64x4) Uint64x8
  6029  
  6030  /* SetLo */
  6031  
  6032  // SetLo returns x with its lower half set to y.
  6033  //
  6034  // Asm: VINSERTF128, CPU Feature: AVX
  6035  func (x Float32x8) SetLo(y Float32x4) Float32x8
  6036  
  6037  // SetLo returns x with its lower half set to y.
  6038  //
  6039  // Asm: VINSERTF64X4, CPU Feature: AVX512
  6040  func (x Float32x16) SetLo(y Float32x8) Float32x16
  6041  
  6042  // SetLo returns x with its lower half set to y.
  6043  //
  6044  // Asm: VINSERTF128, CPU Feature: AVX
  6045  func (x Float64x4) SetLo(y Float64x2) Float64x4
  6046  
  6047  // SetLo returns x with its lower half set to y.
  6048  //
  6049  // Asm: VINSERTF64X4, CPU Feature: AVX512
  6050  func (x Float64x8) SetLo(y Float64x4) Float64x8
  6051  
  6052  // SetLo returns x with its lower half set to y.
  6053  //
  6054  // Asm: VINSERTI128, CPU Feature: AVX2
  6055  func (x Int8x32) SetLo(y Int8x16) Int8x32
  6056  
  6057  // SetLo returns x with its lower half set to y.
  6058  //
  6059  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6060  func (x Int8x64) SetLo(y Int8x32) Int8x64
  6061  
  6062  // SetLo returns x with its lower half set to y.
  6063  //
  6064  // Asm: VINSERTI128, CPU Feature: AVX2
  6065  func (x Int16x16) SetLo(y Int16x8) Int16x16
  6066  
  6067  // SetLo returns x with its lower half set to y.
  6068  //
  6069  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6070  func (x Int16x32) SetLo(y Int16x16) Int16x32
  6071  
  6072  // SetLo returns x with its lower half set to y.
  6073  //
  6074  // Asm: VINSERTI128, CPU Feature: AVX2
  6075  func (x Int32x8) SetLo(y Int32x4) Int32x8
  6076  
  6077  // SetLo returns x with its lower half set to y.
  6078  //
  6079  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6080  func (x Int32x16) SetLo(y Int32x8) Int32x16
  6081  
  6082  // SetLo returns x with its lower half set to y.
  6083  //
  6084  // Asm: VINSERTI128, CPU Feature: AVX2
  6085  func (x Int64x4) SetLo(y Int64x2) Int64x4
  6086  
  6087  // SetLo returns x with its lower half set to y.
  6088  //
  6089  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6090  func (x Int64x8) SetLo(y Int64x4) Int64x8
  6091  
  6092  // SetLo returns x with its lower half set to y.
  6093  //
  6094  // Asm: VINSERTI128, CPU Feature: AVX2
  6095  func (x Uint8x32) SetLo(y Uint8x16) Uint8x32
  6096  
  6097  // SetLo returns x with its lower half set to y.
  6098  //
  6099  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6100  func (x Uint8x64) SetLo(y Uint8x32) Uint8x64
  6101  
  6102  // SetLo returns x with its lower half set to y.
  6103  //
  6104  // Asm: VINSERTI128, CPU Feature: AVX2
  6105  func (x Uint16x16) SetLo(y Uint16x8) Uint16x16
  6106  
  6107  // SetLo returns x with its lower half set to y.
  6108  //
  6109  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6110  func (x Uint16x32) SetLo(y Uint16x16) Uint16x32
  6111  
  6112  // SetLo returns x with its lower half set to y.
  6113  //
  6114  // Asm: VINSERTI128, CPU Feature: AVX2
  6115  func (x Uint32x8) SetLo(y Uint32x4) Uint32x8
  6116  
  6117  // SetLo returns x with its lower half set to y.
  6118  //
  6119  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6120  func (x Uint32x16) SetLo(y Uint32x8) Uint32x16
  6121  
  6122  // SetLo returns x with its lower half set to y.
  6123  //
  6124  // Asm: VINSERTI128, CPU Feature: AVX2
  6125  func (x Uint64x4) SetLo(y Uint64x2) Uint64x4
  6126  
  6127  // SetLo returns x with its lower half set to y.
  6128  //
  6129  // Asm: VINSERTI64X4, CPU Feature: AVX512
  6130  func (x Uint64x8) SetLo(y Uint64x4) Uint64x8
  6131  
  6132  /* ShiftAllLeft */
  6133  
  6134  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6135  //
  6136  // Asm: VPSLLW, CPU Feature: AVX
  6137  func (x Int16x8) ShiftAllLeft(y uint64) Int16x8
  6138  
  6139  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6140  //
  6141  // Asm: VPSLLW, CPU Feature: AVX2
  6142  func (x Int16x16) ShiftAllLeft(y uint64) Int16x16
  6143  
  6144  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6145  //
  6146  // Asm: VPSLLW, CPU Feature: AVX512
  6147  func (x Int16x32) ShiftAllLeft(y uint64) Int16x32
  6148  
  6149  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6150  //
  6151  // Asm: VPSLLD, CPU Feature: AVX
  6152  func (x Int32x4) ShiftAllLeft(y uint64) Int32x4
  6153  
  6154  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6155  //
  6156  // Asm: VPSLLD, CPU Feature: AVX2
  6157  func (x Int32x8) ShiftAllLeft(y uint64) Int32x8
  6158  
  6159  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6160  //
  6161  // Asm: VPSLLD, CPU Feature: AVX512
  6162  func (x Int32x16) ShiftAllLeft(y uint64) Int32x16
  6163  
  6164  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6165  //
  6166  // Asm: VPSLLQ, CPU Feature: AVX
  6167  func (x Int64x2) ShiftAllLeft(y uint64) Int64x2
  6168  
  6169  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6170  //
  6171  // Asm: VPSLLQ, CPU Feature: AVX2
  6172  func (x Int64x4) ShiftAllLeft(y uint64) Int64x4
  6173  
  6174  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6175  //
  6176  // Asm: VPSLLQ, CPU Feature: AVX512
  6177  func (x Int64x8) ShiftAllLeft(y uint64) Int64x8
  6178  
  6179  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6180  //
  6181  // Asm: VPSLLW, CPU Feature: AVX
  6182  func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8
  6183  
  6184  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6185  //
  6186  // Asm: VPSLLW, CPU Feature: AVX2
  6187  func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16
  6188  
  6189  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6190  //
  6191  // Asm: VPSLLW, CPU Feature: AVX512
  6192  func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32
  6193  
  6194  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6195  //
  6196  // Asm: VPSLLD, CPU Feature: AVX
  6197  func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4
  6198  
  6199  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6200  //
  6201  // Asm: VPSLLD, CPU Feature: AVX2
  6202  func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8
  6203  
  6204  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6205  //
  6206  // Asm: VPSLLD, CPU Feature: AVX512
  6207  func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16
  6208  
  6209  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6210  //
  6211  // Asm: VPSLLQ, CPU Feature: AVX
  6212  func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2
  6213  
  6214  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6215  //
  6216  // Asm: VPSLLQ, CPU Feature: AVX2
  6217  func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4
  6218  
  6219  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  6220  //
  6221  // Asm: VPSLLQ, CPU Feature: AVX512
  6222  func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
  6223  
  6224  /* ShiftAllLeftConcat */
  6225  
  6226  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6227  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6228  //
  6229  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6230  //
  6231  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6232  func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8
  6233  
  6234  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6235  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6236  //
  6237  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6238  //
  6239  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6240  func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16
  6241  
  6242  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6243  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6244  //
  6245  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6246  //
  6247  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6248  func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32
  6249  
  6250  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6251  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6252  //
  6253  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6254  //
  6255  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6256  func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4
  6257  
  6258  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6259  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6260  //
  6261  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6262  //
  6263  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6264  func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8
  6265  
  6266  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6267  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6268  //
  6269  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6270  //
  6271  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6272  func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16
  6273  
  6274  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6275  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6276  //
  6277  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6278  //
  6279  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6280  func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2
  6281  
  6282  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6283  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6284  //
  6285  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6286  //
  6287  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6288  func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4
  6289  
  6290  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6291  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6292  //
  6293  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6294  //
  6295  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6296  func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8
  6297  
  6298  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6299  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6300  //
  6301  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6302  //
  6303  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6304  func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8
  6305  
  6306  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6307  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6308  //
  6309  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6310  //
  6311  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6312  func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16
  6313  
  6314  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6315  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6316  //
  6317  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6318  //
  6319  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6320  func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32
  6321  
  6322  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6323  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6324  //
  6325  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6326  //
  6327  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6328  func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4
  6329  
  6330  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6331  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6332  //
  6333  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6334  //
  6335  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6336  func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8
  6337  
  6338  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6339  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6340  //
  6341  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6342  //
  6343  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6344  func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16
  6345  
  6346  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6347  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6348  //
  6349  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6350  //
  6351  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6352  func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2
  6353  
  6354  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6355  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6356  //
  6357  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6358  //
  6359  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6360  func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
  6361  
  6362  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6363  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6364  //
  6365  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6366  //
  6367  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6368  func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
  6369  
  6370  /* ShiftAllRight */
  6371  
  6372  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6373  //
  6374  // Asm: VPSRAW, CPU Feature: AVX
  6375  func (x Int16x8) ShiftAllRight(y uint64) Int16x8
  6376  
  6377  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6378  //
  6379  // Asm: VPSRAW, CPU Feature: AVX2
  6380  func (x Int16x16) ShiftAllRight(y uint64) Int16x16
  6381  
  6382  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6383  //
  6384  // Asm: VPSRAW, CPU Feature: AVX512
  6385  func (x Int16x32) ShiftAllRight(y uint64) Int16x32
  6386  
  6387  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6388  //
  6389  // Asm: VPSRAD, CPU Feature: AVX
  6390  func (x Int32x4) ShiftAllRight(y uint64) Int32x4
  6391  
  6392  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6393  //
  6394  // Asm: VPSRAD, CPU Feature: AVX2
  6395  func (x Int32x8) ShiftAllRight(y uint64) Int32x8
  6396  
  6397  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6398  //
  6399  // Asm: VPSRAD, CPU Feature: AVX512
  6400  func (x Int32x16) ShiftAllRight(y uint64) Int32x16
  6401  
  6402  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6403  //
  6404  // Asm: VPSRAQ, CPU Feature: AVX512
  6405  func (x Int64x2) ShiftAllRight(y uint64) Int64x2
  6406  
  6407  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6408  //
  6409  // Asm: VPSRAQ, CPU Feature: AVX512
  6410  func (x Int64x4) ShiftAllRight(y uint64) Int64x4
  6411  
  6412  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6413  //
  6414  // Asm: VPSRAQ, CPU Feature: AVX512
  6415  func (x Int64x8) ShiftAllRight(y uint64) Int64x8
  6416  
  6417  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6418  //
  6419  // Asm: VPSRLW, CPU Feature: AVX
  6420  func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8
  6421  
  6422  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6423  //
  6424  // Asm: VPSRLW, CPU Feature: AVX2
  6425  func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16
  6426  
  6427  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6428  //
  6429  // Asm: VPSRLW, CPU Feature: AVX512
  6430  func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32
  6431  
  6432  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6433  //
  6434  // Asm: VPSRLD, CPU Feature: AVX
  6435  func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4
  6436  
  6437  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6438  //
  6439  // Asm: VPSRLD, CPU Feature: AVX2
  6440  func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8
  6441  
  6442  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6443  //
  6444  // Asm: VPSRLD, CPU Feature: AVX512
  6445  func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16
  6446  
  6447  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6448  //
  6449  // Asm: VPSRLQ, CPU Feature: AVX
  6450  func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2
  6451  
  6452  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6453  //
  6454  // Asm: VPSRLQ, CPU Feature: AVX2
  6455  func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4
  6456  
  6457  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6458  //
  6459  // Asm: VPSRLQ, CPU Feature: AVX512
  6460  func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
  6461  
  6462  /* ShiftAllRightConcat */
  6463  
  6464  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6465  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6466  //
  6467  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6468  //
  6469  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6470  func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8
  6471  
  6472  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6473  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6474  //
  6475  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6476  //
  6477  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6478  func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16
  6479  
  6480  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6481  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6482  //
  6483  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6484  //
  6485  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6486  func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32
  6487  
  6488  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6489  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6490  //
  6491  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6492  //
  6493  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6494  func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4
  6495  
  6496  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6497  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6498  //
  6499  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6500  //
  6501  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6502  func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8
  6503  
  6504  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6505  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6506  //
  6507  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6508  //
  6509  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6510  func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16
  6511  
  6512  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6513  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6514  //
  6515  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6516  //
  6517  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6518  func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2
  6519  
  6520  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6521  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6522  //
  6523  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6524  //
  6525  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6526  func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4
  6527  
  6528  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6529  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6530  //
  6531  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6532  //
  6533  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6534  func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8
  6535  
  6536  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6537  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6538  //
  6539  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6540  //
  6541  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6542  func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8
  6543  
  6544  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6545  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6546  //
  6547  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6548  //
  6549  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6550  func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16
  6551  
  6552  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6553  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6554  //
  6555  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6556  //
  6557  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6558  func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32
  6559  
  6560  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6561  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6562  //
  6563  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6564  //
  6565  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6566  func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
  6567  
  6568  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6569  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6570  //
  6571  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6572  //
  6573  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6574  func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
  6575  
  6576  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6577  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6578  //
  6579  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6580  //
  6581  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6582  func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
  6583  
  6584  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6585  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6586  //
  6587  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6588  //
  6589  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6590  func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
  6591  
  6592  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6593  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6594  //
  6595  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6596  //
  6597  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6598  func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
  6599  
  6600  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6601  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6602  //
  6603  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6604  //
  6605  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6606  func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
  6607  
  6608  /* ShiftLeft */
  6609  
  6610  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6611  //
  6612  // Asm: VPSLLVW, CPU Feature: AVX512
  6613  func (x Int16x8) ShiftLeft(y Int16x8) Int16x8
  6614  
  6615  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6616  //
  6617  // Asm: VPSLLVW, CPU Feature: AVX512
  6618  func (x Int16x16) ShiftLeft(y Int16x16) Int16x16
  6619  
  6620  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6621  //
  6622  // Asm: VPSLLVW, CPU Feature: AVX512
  6623  func (x Int16x32) ShiftLeft(y Int16x32) Int16x32
  6624  
  6625  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6626  //
  6627  // Asm: VPSLLVD, CPU Feature: AVX2
  6628  func (x Int32x4) ShiftLeft(y Int32x4) Int32x4
  6629  
  6630  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6631  //
  6632  // Asm: VPSLLVD, CPU Feature: AVX2
  6633  func (x Int32x8) ShiftLeft(y Int32x8) Int32x8
  6634  
  6635  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6636  //
  6637  // Asm: VPSLLVD, CPU Feature: AVX512
  6638  func (x Int32x16) ShiftLeft(y Int32x16) Int32x16
  6639  
  6640  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6641  //
  6642  // Asm: VPSLLVQ, CPU Feature: AVX2
  6643  func (x Int64x2) ShiftLeft(y Int64x2) Int64x2
  6644  
  6645  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6646  //
  6647  // Asm: VPSLLVQ, CPU Feature: AVX2
  6648  func (x Int64x4) ShiftLeft(y Int64x4) Int64x4
  6649  
  6650  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6651  //
  6652  // Asm: VPSLLVQ, CPU Feature: AVX512
  6653  func (x Int64x8) ShiftLeft(y Int64x8) Int64x8
  6654  
  6655  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6656  //
  6657  // Asm: VPSLLVW, CPU Feature: AVX512
  6658  func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8
  6659  
  6660  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6661  //
  6662  // Asm: VPSLLVW, CPU Feature: AVX512
  6663  func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16
  6664  
  6665  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6666  //
  6667  // Asm: VPSLLVW, CPU Feature: AVX512
  6668  func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32
  6669  
  6670  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6671  //
  6672  // Asm: VPSLLVD, CPU Feature: AVX2
  6673  func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4
  6674  
  6675  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6676  //
  6677  // Asm: VPSLLVD, CPU Feature: AVX2
  6678  func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8
  6679  
  6680  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6681  //
  6682  // Asm: VPSLLVD, CPU Feature: AVX512
  6683  func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16
  6684  
  6685  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6686  //
  6687  // Asm: VPSLLVQ, CPU Feature: AVX2
  6688  func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2
  6689  
  6690  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6691  //
  6692  // Asm: VPSLLVQ, CPU Feature: AVX2
  6693  func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4
  6694  
  6695  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6696  //
  6697  // Asm: VPSLLVQ, CPU Feature: AVX512
  6698  func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
  6699  
  6700  /* ShiftLeftConcat */
  6701  
  6702  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6703  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6704  //
  6705  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6706  func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8
  6707  
  6708  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6709  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6710  //
  6711  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6712  func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16
  6713  
  6714  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6715  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6716  //
  6717  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6718  func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32
  6719  
  6720  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6721  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6722  //
  6723  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6724  func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4
  6725  
  6726  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6727  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6728  //
  6729  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6730  func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8
  6731  
  6732  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6733  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6734  //
  6735  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6736  func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16
  6737  
  6738  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6739  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6740  //
  6741  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6742  func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2
  6743  
  6744  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6745  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6746  //
  6747  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6748  func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4
  6749  
  6750  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6751  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6752  //
  6753  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6754  func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8
  6755  
  6756  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6757  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6758  //
  6759  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6760  func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8
  6761  
  6762  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6763  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6764  //
  6765  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6766  func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16
  6767  
  6768  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6769  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6770  //
  6771  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6772  func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32
  6773  
  6774  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6775  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6776  //
  6777  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6778  func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
  6779  
  6780  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6781  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6782  //
  6783  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6784  func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
  6785  
  6786  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6787  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6788  //
  6789  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6790  func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
  6791  
  6792  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6793  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6794  //
  6795  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6796  func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
  6797  
  6798  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6799  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6800  //
  6801  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6802  func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
  6803  
  6804  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6805  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6806  //
  6807  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6808  func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
  6809  
  6810  /* ShiftRight */
  6811  
  6812  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6813  //
  6814  // Asm: VPSRAVW, CPU Feature: AVX512
  6815  func (x Int16x8) ShiftRight(y Int16x8) Int16x8
  6816  
  6817  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6818  //
  6819  // Asm: VPSRAVW, CPU Feature: AVX512
  6820  func (x Int16x16) ShiftRight(y Int16x16) Int16x16
  6821  
  6822  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6823  //
  6824  // Asm: VPSRAVW, CPU Feature: AVX512
  6825  func (x Int16x32) ShiftRight(y Int16x32) Int16x32
  6826  
  6827  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6828  //
  6829  // Asm: VPSRAVD, CPU Feature: AVX2
  6830  func (x Int32x4) ShiftRight(y Int32x4) Int32x4
  6831  
  6832  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6833  //
  6834  // Asm: VPSRAVD, CPU Feature: AVX2
  6835  func (x Int32x8) ShiftRight(y Int32x8) Int32x8
  6836  
  6837  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6838  //
  6839  // Asm: VPSRAVD, CPU Feature: AVX512
  6840  func (x Int32x16) ShiftRight(y Int32x16) Int32x16
  6841  
  6842  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6843  //
  6844  // Asm: VPSRAVQ, CPU Feature: AVX512
  6845  func (x Int64x2) ShiftRight(y Int64x2) Int64x2
  6846  
  6847  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6848  //
  6849  // Asm: VPSRAVQ, CPU Feature: AVX512
  6850  func (x Int64x4) ShiftRight(y Int64x4) Int64x4
  6851  
  6852  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6853  //
  6854  // Asm: VPSRAVQ, CPU Feature: AVX512
  6855  func (x Int64x8) ShiftRight(y Int64x8) Int64x8
  6856  
  6857  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6858  //
  6859  // Asm: VPSRLVW, CPU Feature: AVX512
  6860  func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8
  6861  
  6862  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6863  //
  6864  // Asm: VPSRLVW, CPU Feature: AVX512
  6865  func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16
  6866  
  6867  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6868  //
  6869  // Asm: VPSRLVW, CPU Feature: AVX512
  6870  func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32
  6871  
  6872  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6873  //
  6874  // Asm: VPSRLVD, CPU Feature: AVX2
  6875  func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4
  6876  
  6877  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6878  //
  6879  // Asm: VPSRLVD, CPU Feature: AVX2
  6880  func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8
  6881  
  6882  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6883  //
  6884  // Asm: VPSRLVD, CPU Feature: AVX512
  6885  func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16
  6886  
  6887  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6888  //
  6889  // Asm: VPSRLVQ, CPU Feature: AVX2
  6890  func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2
  6891  
  6892  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6893  //
  6894  // Asm: VPSRLVQ, CPU Feature: AVX2
  6895  func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4
  6896  
  6897  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6898  //
  6899  // Asm: VPSRLVQ, CPU Feature: AVX512
  6900  func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
  6901  
  6902  /* ShiftRightConcat */
  6903  
  6904  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6905  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6906  //
  6907  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6908  func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8
  6909  
  6910  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6911  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6912  //
  6913  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6914  func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16
  6915  
  6916  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6917  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6918  //
  6919  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6920  func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32
  6921  
  6922  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6923  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6924  //
  6925  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6926  func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4
  6927  
  6928  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6929  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6930  //
  6931  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6932  func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8
  6933  
  6934  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6935  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6936  //
  6937  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6938  func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16
  6939  
  6940  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6941  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6942  //
  6943  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6944  func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2
  6945  
  6946  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6947  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6948  //
  6949  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6950  func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
  6951  
  6952  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6953  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6954  //
  6955  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6956  func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
  6957  
  6958  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6959  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6960  //
  6961  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6962  func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
  6963  
  6964  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6965  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6966  //
  6967  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6968  func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
  6969  
  6970  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6971  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6972  //
  6973  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6974  func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
  6975  
  6976  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6977  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6978  //
  6979  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6980  func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
  6981  
  6982  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6983  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6984  //
  6985  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6986  func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
  6987  
  6988  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6989  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6990  //
  6991  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6992  func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
  6993  
  6994  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6995  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6996  //
  6997  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6998  func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
  6999  
  7000  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  7001  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  7002  //
  7003  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  7004  func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
  7005  
  7006  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  7007  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  7008  //
  7009  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  7010  func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
  7011  
  7012  /* Sqrt */
  7013  
  7014  // Sqrt computes the square root of each element.
  7015  //
  7016  // Asm: VSQRTPS, CPU Feature: AVX
  7017  func (x Float32x4) Sqrt() Float32x4
  7018  
  7019  // Sqrt computes the square root of each element.
  7020  //
  7021  // Asm: VSQRTPS, CPU Feature: AVX
  7022  func (x Float32x8) Sqrt() Float32x8
  7023  
  7024  // Sqrt computes the square root of each element.
  7025  //
  7026  // Asm: VSQRTPS, CPU Feature: AVX512
  7027  func (x Float32x16) Sqrt() Float32x16
  7028  
  7029  // Sqrt computes the square root of each element.
  7030  //
  7031  // Asm: VSQRTPD, CPU Feature: AVX
  7032  func (x Float64x2) Sqrt() Float64x2
  7033  
  7034  // Sqrt computes the square root of each element.
  7035  //
  7036  // Asm: VSQRTPD, CPU Feature: AVX
  7037  func (x Float64x4) Sqrt() Float64x4
  7038  
  7039  // Sqrt computes the square root of each element.
  7040  //
  7041  // Asm: VSQRTPD, CPU Feature: AVX512
  7042  func (x Float64x8) Sqrt() Float64x8
  7043  
  7044  /* Sub */
  7045  
  7046  // Sub subtracts corresponding elements of two vectors.
  7047  //
  7048  // Asm: VSUBPS, CPU Feature: AVX
  7049  func (x Float32x4) Sub(y Float32x4) Float32x4
  7050  
  7051  // Sub subtracts corresponding elements of two vectors.
  7052  //
  7053  // Asm: VSUBPS, CPU Feature: AVX
  7054  func (x Float32x8) Sub(y Float32x8) Float32x8
  7055  
  7056  // Sub subtracts corresponding elements of two vectors.
  7057  //
  7058  // Asm: VSUBPS, CPU Feature: AVX512
  7059  func (x Float32x16) Sub(y Float32x16) Float32x16
  7060  
  7061  // Sub subtracts corresponding elements of two vectors.
  7062  //
  7063  // Asm: VSUBPD, CPU Feature: AVX
  7064  func (x Float64x2) Sub(y Float64x2) Float64x2
  7065  
  7066  // Sub subtracts corresponding elements of two vectors.
  7067  //
  7068  // Asm: VSUBPD, CPU Feature: AVX
  7069  func (x Float64x4) Sub(y Float64x4) Float64x4
  7070  
  7071  // Sub subtracts corresponding elements of two vectors.
  7072  //
  7073  // Asm: VSUBPD, CPU Feature: AVX512
  7074  func (x Float64x8) Sub(y Float64x8) Float64x8
  7075  
  7076  // Sub subtracts corresponding elements of two vectors.
  7077  //
  7078  // Asm: VPSUBB, CPU Feature: AVX
  7079  func (x Int8x16) Sub(y Int8x16) Int8x16
  7080  
  7081  // Sub subtracts corresponding elements of two vectors.
  7082  //
  7083  // Asm: VPSUBB, CPU Feature: AVX2
  7084  func (x Int8x32) Sub(y Int8x32) Int8x32
  7085  
  7086  // Sub subtracts corresponding elements of two vectors.
  7087  //
  7088  // Asm: VPSUBB, CPU Feature: AVX512
  7089  func (x Int8x64) Sub(y Int8x64) Int8x64
  7090  
  7091  // Sub subtracts corresponding elements of two vectors.
  7092  //
  7093  // Asm: VPSUBW, CPU Feature: AVX
  7094  func (x Int16x8) Sub(y Int16x8) Int16x8
  7095  
  7096  // Sub subtracts corresponding elements of two vectors.
  7097  //
  7098  // Asm: VPSUBW, CPU Feature: AVX2
  7099  func (x Int16x16) Sub(y Int16x16) Int16x16
  7100  
  7101  // Sub subtracts corresponding elements of two vectors.
  7102  //
  7103  // Asm: VPSUBW, CPU Feature: AVX512
  7104  func (x Int16x32) Sub(y Int16x32) Int16x32
  7105  
  7106  // Sub subtracts corresponding elements of two vectors.
  7107  //
  7108  // Asm: VPSUBD, CPU Feature: AVX
  7109  func (x Int32x4) Sub(y Int32x4) Int32x4
  7110  
  7111  // Sub subtracts corresponding elements of two vectors.
  7112  //
  7113  // Asm: VPSUBD, CPU Feature: AVX2
  7114  func (x Int32x8) Sub(y Int32x8) Int32x8
  7115  
  7116  // Sub subtracts corresponding elements of two vectors.
  7117  //
  7118  // Asm: VPSUBD, CPU Feature: AVX512
  7119  func (x Int32x16) Sub(y Int32x16) Int32x16
  7120  
  7121  // Sub subtracts corresponding elements of two vectors.
  7122  //
  7123  // Asm: VPSUBQ, CPU Feature: AVX
  7124  func (x Int64x2) Sub(y Int64x2) Int64x2
  7125  
  7126  // Sub subtracts corresponding elements of two vectors.
  7127  //
  7128  // Asm: VPSUBQ, CPU Feature: AVX2
  7129  func (x Int64x4) Sub(y Int64x4) Int64x4
  7130  
  7131  // Sub subtracts corresponding elements of two vectors.
  7132  //
  7133  // Asm: VPSUBQ, CPU Feature: AVX512
  7134  func (x Int64x8) Sub(y Int64x8) Int64x8
  7135  
  7136  // Sub subtracts corresponding elements of two vectors.
  7137  //
  7138  // Asm: VPSUBB, CPU Feature: AVX
  7139  func (x Uint8x16) Sub(y Uint8x16) Uint8x16
  7140  
  7141  // Sub subtracts corresponding elements of two vectors.
  7142  //
  7143  // Asm: VPSUBB, CPU Feature: AVX2
  7144  func (x Uint8x32) Sub(y Uint8x32) Uint8x32
  7145  
  7146  // Sub subtracts corresponding elements of two vectors.
  7147  //
  7148  // Asm: VPSUBB, CPU Feature: AVX512
  7149  func (x Uint8x64) Sub(y Uint8x64) Uint8x64
  7150  
  7151  // Sub subtracts corresponding elements of two vectors.
  7152  //
  7153  // Asm: VPSUBW, CPU Feature: AVX
  7154  func (x Uint16x8) Sub(y Uint16x8) Uint16x8
  7155  
  7156  // Sub subtracts corresponding elements of two vectors.
  7157  //
  7158  // Asm: VPSUBW, CPU Feature: AVX2
  7159  func (x Uint16x16) Sub(y Uint16x16) Uint16x16
  7160  
  7161  // Sub subtracts corresponding elements of two vectors.
  7162  //
  7163  // Asm: VPSUBW, CPU Feature: AVX512
  7164  func (x Uint16x32) Sub(y Uint16x32) Uint16x32
  7165  
  7166  // Sub subtracts corresponding elements of two vectors.
  7167  //
  7168  // Asm: VPSUBD, CPU Feature: AVX
  7169  func (x Uint32x4) Sub(y Uint32x4) Uint32x4
  7170  
  7171  // Sub subtracts corresponding elements of two vectors.
  7172  //
  7173  // Asm: VPSUBD, CPU Feature: AVX2
  7174  func (x Uint32x8) Sub(y Uint32x8) Uint32x8
  7175  
  7176  // Sub subtracts corresponding elements of two vectors.
  7177  //
  7178  // Asm: VPSUBD, CPU Feature: AVX512
  7179  func (x Uint32x16) Sub(y Uint32x16) Uint32x16
  7180  
  7181  // Sub subtracts corresponding elements of two vectors.
  7182  //
  7183  // Asm: VPSUBQ, CPU Feature: AVX
  7184  func (x Uint64x2) Sub(y Uint64x2) Uint64x2
  7185  
  7186  // Sub subtracts corresponding elements of two vectors.
  7187  //
  7188  // Asm: VPSUBQ, CPU Feature: AVX2
  7189  func (x Uint64x4) Sub(y Uint64x4) Uint64x4
  7190  
  7191  // Sub subtracts corresponding elements of two vectors.
  7192  //
  7193  // Asm: VPSUBQ, CPU Feature: AVX512
  7194  func (x Uint64x8) Sub(y Uint64x8) Uint64x8
  7195  
  7196  /* SubPairs */
  7197  
  7198  // SubPairs horizontally subtracts adjacent pairs of elements.
  7199  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7200  //
  7201  // Asm: VHSUBPS, CPU Feature: AVX
  7202  func (x Float32x4) SubPairs(y Float32x4) Float32x4
  7203  
  7204  // SubPairs horizontally subtracts adjacent pairs of elements.
  7205  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7206  //
  7207  // Asm: VHSUBPS, CPU Feature: AVX
  7208  func (x Float32x8) SubPairs(y Float32x8) Float32x8
  7209  
  7210  // SubPairs horizontally subtracts adjacent pairs of elements.
  7211  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7212  //
  7213  // Asm: VHSUBPD, CPU Feature: AVX
  7214  func (x Float64x2) SubPairs(y Float64x2) Float64x2
  7215  
  7216  // SubPairs horizontally subtracts adjacent pairs of elements.
  7217  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7218  //
  7219  // Asm: VHSUBPD, CPU Feature: AVX
  7220  func (x Float64x4) SubPairs(y Float64x4) Float64x4
  7221  
  7222  // SubPairs horizontally subtracts adjacent pairs of elements.
  7223  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7224  //
  7225  // Asm: VPHSUBW, CPU Feature: AVX
  7226  func (x Int16x8) SubPairs(y Int16x8) Int16x8
  7227  
  7228  // SubPairs horizontally subtracts adjacent pairs of elements.
  7229  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7230  //
  7231  // Asm: VPHSUBW, CPU Feature: AVX2
  7232  func (x Int16x16) SubPairs(y Int16x16) Int16x16
  7233  
  7234  // SubPairs horizontally subtracts adjacent pairs of elements.
  7235  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7236  //
  7237  // Asm: VPHSUBD, CPU Feature: AVX
  7238  func (x Int32x4) SubPairs(y Int32x4) Int32x4
  7239  
  7240  // SubPairs horizontally subtracts adjacent pairs of elements.
  7241  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7242  //
  7243  // Asm: VPHSUBD, CPU Feature: AVX2
  7244  func (x Int32x8) SubPairs(y Int32x8) Int32x8
  7245  
  7246  // SubPairs horizontally subtracts adjacent pairs of elements.
  7247  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7248  //
  7249  // Asm: VPHSUBW, CPU Feature: AVX
  7250  func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8
  7251  
  7252  // SubPairs horizontally subtracts adjacent pairs of elements.
  7253  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7254  //
  7255  // Asm: VPHSUBW, CPU Feature: AVX2
  7256  func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16
  7257  
  7258  // SubPairs horizontally subtracts adjacent pairs of elements.
  7259  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7260  //
  7261  // Asm: VPHSUBD, CPU Feature: AVX
  7262  func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4
  7263  
  7264  // SubPairs horizontally subtracts adjacent pairs of elements.
  7265  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7266  //
  7267  // Asm: VPHSUBD, CPU Feature: AVX2
  7268  func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8
  7269  
  7270  /* SubPairsSaturated */
  7271  
  7272  // SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
  7273  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7274  //
  7275  // Asm: VPHSUBSW, CPU Feature: AVX
  7276  func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8
  7277  
  7278  // SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
  7279  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  7280  //
  7281  // Asm: VPHSUBSW, CPU Feature: AVX2
  7282  func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16
  7283  
  7284  /* SubSaturated */
  7285  
  7286  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7287  //
  7288  // Asm: VPSUBSB, CPU Feature: AVX
  7289  func (x Int8x16) SubSaturated(y Int8x16) Int8x16
  7290  
  7291  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7292  //
  7293  // Asm: VPSUBSB, CPU Feature: AVX2
  7294  func (x Int8x32) SubSaturated(y Int8x32) Int8x32
  7295  
  7296  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7297  //
  7298  // Asm: VPSUBSB, CPU Feature: AVX512
  7299  func (x Int8x64) SubSaturated(y Int8x64) Int8x64
  7300  
  7301  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7302  //
  7303  // Asm: VPSUBSW, CPU Feature: AVX
  7304  func (x Int16x8) SubSaturated(y Int16x8) Int16x8
  7305  
  7306  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7307  //
  7308  // Asm: VPSUBSW, CPU Feature: AVX2
  7309  func (x Int16x16) SubSaturated(y Int16x16) Int16x16
  7310  
  7311  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7312  //
  7313  // Asm: VPSUBSW, CPU Feature: AVX512
  7314  func (x Int16x32) SubSaturated(y Int16x32) Int16x32
  7315  
  7316  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7317  //
  7318  // Asm: VPSUBUSB, CPU Feature: AVX
  7319  func (x Uint8x16) SubSaturated(y Uint8x16) Uint8x16
  7320  
  7321  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7322  //
  7323  // Asm: VPSUBUSB, CPU Feature: AVX2
  7324  func (x Uint8x32) SubSaturated(y Uint8x32) Uint8x32
  7325  
  7326  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7327  //
  7328  // Asm: VPSUBUSB, CPU Feature: AVX512
  7329  func (x Uint8x64) SubSaturated(y Uint8x64) Uint8x64
  7330  
  7331  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7332  //
  7333  // Asm: VPSUBUSW, CPU Feature: AVX
  7334  func (x Uint16x8) SubSaturated(y Uint16x8) Uint16x8
  7335  
  7336  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7337  //
  7338  // Asm: VPSUBUSW, CPU Feature: AVX2
  7339  func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
  7340  
  7341  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7342  //
  7343  // Asm: VPSUBUSW, CPU Feature: AVX512
  7344  func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
  7345  
  7346  /* SumAbsDiff */
  7347  
  7348  // SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
  7349  // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
  7350  // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
  7351  //
  7352  // Asm: VPSADBW, CPU Feature: AVX
  7353  func (x Uint8x16) SumAbsDiff(y Uint8x16) Uint16x8
  7354  
  7355  // SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
  7356  // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
  7357  // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
  7358  //
  7359  // Asm: VPSADBW, CPU Feature: AVX2
  7360  func (x Uint8x32) SumAbsDiff(y Uint8x32) Uint16x16
  7361  
  7362  // SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
  7363  // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
  7364  // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
  7365  //
  7366  // Asm: VPSADBW, CPU Feature: AVX512
  7367  func (x Uint8x64) SumAbsDiff(y Uint8x64) Uint16x32
  7368  
  7369  /* Trunc */
  7370  
  7371  // Trunc truncates elements towards zero.
  7372  //
  7373  // Asm: VROUNDPS, CPU Feature: AVX
  7374  func (x Float32x4) Trunc() Float32x4
  7375  
  7376  // Trunc truncates elements towards zero.
  7377  //
  7378  // Asm: VROUNDPS, CPU Feature: AVX
  7379  func (x Float32x8) Trunc() Float32x8
  7380  
  7381  // Trunc truncates elements towards zero.
  7382  //
  7383  // Asm: VROUNDPD, CPU Feature: AVX
  7384  func (x Float64x2) Trunc() Float64x2
  7385  
  7386  // Trunc truncates elements towards zero.
  7387  //
  7388  // Asm: VROUNDPD, CPU Feature: AVX
  7389  func (x Float64x4) Trunc() Float64x4
  7390  
  7391  /* TruncScaled */
  7392  
  7393  // TruncScaled truncates elements with specified precision.
  7394  //
  7395  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7396  //
  7397  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  7398  func (x Float32x4) TruncScaled(prec uint8) Float32x4
  7399  
  7400  // TruncScaled truncates elements with specified precision.
  7401  //
  7402  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7403  //
  7404  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  7405  func (x Float32x8) TruncScaled(prec uint8) Float32x8
  7406  
  7407  // TruncScaled truncates elements with specified precision.
  7408  //
  7409  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7410  //
  7411  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  7412  func (x Float32x16) TruncScaled(prec uint8) Float32x16
  7413  
  7414  // TruncScaled truncates elements with specified precision.
  7415  //
  7416  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7417  //
  7418  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  7419  func (x Float64x2) TruncScaled(prec uint8) Float64x2
  7420  
  7421  // TruncScaled truncates elements with specified precision.
  7422  //
  7423  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7424  //
  7425  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  7426  func (x Float64x4) TruncScaled(prec uint8) Float64x4
  7427  
  7428  // TruncScaled truncates elements with specified precision.
  7429  //
  7430  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7431  //
  7432  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  7433  func (x Float64x8) TruncScaled(prec uint8) Float64x8
  7434  
  7435  /* TruncScaledResidue */
  7436  
  7437  // TruncScaledResidue computes the difference after truncating with specified precision.
  7438  //
  7439  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7440  //
  7441  // Asm: VREDUCEPS, CPU Feature: AVX512
  7442  func (x Float32x4) TruncScaledResidue(prec uint8) Float32x4
  7443  
  7444  // TruncScaledResidue computes the difference after truncating with specified precision.
  7445  //
  7446  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7447  //
  7448  // Asm: VREDUCEPS, CPU Feature: AVX512
  7449  func (x Float32x8) TruncScaledResidue(prec uint8) Float32x8
  7450  
  7451  // TruncScaledResidue computes the difference after truncating with specified precision.
  7452  //
  7453  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7454  //
  7455  // Asm: VREDUCEPS, CPU Feature: AVX512
  7456  func (x Float32x16) TruncScaledResidue(prec uint8) Float32x16
  7457  
  7458  // TruncScaledResidue computes the difference after truncating with specified precision.
  7459  //
  7460  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7461  //
  7462  // Asm: VREDUCEPD, CPU Feature: AVX512
  7463  func (x Float64x2) TruncScaledResidue(prec uint8) Float64x2
  7464  
  7465  // TruncScaledResidue computes the difference after truncating with specified precision.
  7466  //
  7467  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7468  //
  7469  // Asm: VREDUCEPD, CPU Feature: AVX512
  7470  func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
  7471  
  7472  // TruncScaledResidue computes the difference after truncating with specified precision.
  7473  //
  7474  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7475  //
  7476  // Asm: VREDUCEPD, CPU Feature: AVX512
  7477  func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
  7478  
  7479  /* TruncateToInt8 */
  7480  
  7481  // TruncateToInt8 converts element values to int8.
  7482  // Conversion is done with truncation on the vector elements.
  7483  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7484  //
  7485  // Asm: VPMOVWB, CPU Feature: AVX512
  7486  func (x Int16x8) TruncateToInt8() Int8x16
  7487  
  7488  // TruncateToInt8 converts element values to int8.
  7489  // Conversion is done with truncation on the vector elements.
  7490  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7491  //
  7492  // Asm: VPMOVWB, CPU Feature: AVX512
  7493  func (x Int16x16) TruncateToInt8() Int8x16
  7494  
  7495  // TruncateToInt8 converts element values to int8.
  7496  // Conversion is done with truncation on the vector elements.
  7497  //
  7498  // Asm: VPMOVWB, CPU Feature: AVX512
  7499  func (x Int16x32) TruncateToInt8() Int8x32
  7500  
  7501  // TruncateToInt8 converts element values to int8.
  7502  // Conversion is done with truncation on the vector elements.
  7503  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7504  //
  7505  // Asm: VPMOVDB, CPU Feature: AVX512
  7506  func (x Int32x4) TruncateToInt8() Int8x16
  7507  
  7508  // TruncateToInt8 converts element values to int8.
  7509  // Conversion is done with truncation on the vector elements.
  7510  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7511  //
  7512  // Asm: VPMOVDB, CPU Feature: AVX512
  7513  func (x Int32x8) TruncateToInt8() Int8x16
  7514  
  7515  // TruncateToInt8 converts element values to int8.
  7516  // Conversion is done with truncation on the vector elements.
  7517  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7518  //
  7519  // Asm: VPMOVDB, CPU Feature: AVX512
  7520  func (x Int32x16) TruncateToInt8() Int8x16
  7521  
  7522  // TruncateToInt8 converts element values to int8.
  7523  // Conversion is done with truncation on the vector elements.
  7524  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7525  //
  7526  // Asm: VPMOVQB, CPU Feature: AVX512
  7527  func (x Int64x2) TruncateToInt8() Int8x16
  7528  
  7529  // TruncateToInt8 converts element values to int8.
  7530  // Conversion is done with truncation on the vector elements.
  7531  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7532  //
  7533  // Asm: VPMOVQB, CPU Feature: AVX512
  7534  func (x Int64x4) TruncateToInt8() Int8x16
  7535  
  7536  // TruncateToInt8 converts element values to int8.
  7537  // Conversion is done with truncation on the vector elements.
  7538  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7539  //
  7540  // Asm: VPMOVQB, CPU Feature: AVX512
  7541  func (x Int64x8) TruncateToInt8() Int8x16
  7542  
  7543  /* TruncateToInt16 */
  7544  
  7545  // TruncateToInt16 converts element values to int16.
  7546  // Conversion is done with truncation on the vector elements.
  7547  //
  7548  // Asm: VPMOVDW, CPU Feature: AVX512
  7549  func (x Int32x4) TruncateToInt16() Int16x8
  7550  
  7551  // TruncateToInt16 converts element values to int16.
  7552  // Conversion is done with truncation on the vector elements.
  7553  //
  7554  // Asm: VPMOVDW, CPU Feature: AVX512
  7555  func (x Int32x8) TruncateToInt16() Int16x8
  7556  
  7557  // TruncateToInt16 converts element values to int16.
  7558  // Conversion is done with truncation on the vector elements.
  7559  //
  7560  // Asm: VPMOVDW, CPU Feature: AVX512
  7561  func (x Int32x16) TruncateToInt16() Int16x16
  7562  
  7563  // TruncateToInt16 converts element values to int16.
  7564  // Conversion is done with truncation on the vector elements.
  7565  //
  7566  // Asm: VPMOVQW, CPU Feature: AVX512
  7567  func (x Int64x2) TruncateToInt16() Int16x8
  7568  
  7569  // TruncateToInt16 converts element values to int16.
  7570  // Conversion is done with truncation on the vector elements.
  7571  //
  7572  // Asm: VPMOVQW, CPU Feature: AVX512
  7573  func (x Int64x4) TruncateToInt16() Int16x8
  7574  
  7575  // TruncateToInt16 converts element values to int16.
  7576  // Conversion is done with truncation on the vector elements.
  7577  //
  7578  // Asm: VPMOVQW, CPU Feature: AVX512
  7579  func (x Int64x8) TruncateToInt16() Int16x8
  7580  
  7581  /* TruncateToInt32 */
  7582  
  7583  // TruncateToInt32 converts element values to int32.
  7584  // Conversion is done with truncation on the vector elements.
  7585  //
  7586  // Asm: VPMOVQD, CPU Feature: AVX512
  7587  func (x Int64x2) TruncateToInt32() Int32x4
  7588  
  7589  // TruncateToInt32 converts element values to int32.
  7590  // Conversion is done with truncation on the vector elements.
  7591  //
  7592  // Asm: VPMOVQD, CPU Feature: AVX512
  7593  func (x Int64x4) TruncateToInt32() Int32x4
  7594  
  7595  // TruncateToInt32 converts element values to int32.
  7596  // Conversion is done with truncation on the vector elements.
  7597  //
  7598  // Asm: VPMOVQD, CPU Feature: AVX512
  7599  func (x Int64x8) TruncateToInt32() Int32x8
  7600  
  7601  /* TruncateToUint8 */
  7602  
  7603  // TruncateToUint8 converts element values to uint8.
  7604  // Conversion is done with truncation on the vector elements.
  7605  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7606  //
  7607  // Asm: VPMOVWB, CPU Feature: AVX512
  7608  func (x Uint16x8) TruncateToUint8() Uint8x16
  7609  
  7610  // TruncateToUint8 converts element values to uint8.
  7611  // Conversion is done with truncation on the vector elements.
  7612  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7613  //
  7614  // Asm: VPMOVWB, CPU Feature: AVX512
  7615  func (x Uint16x16) TruncateToUint8() Uint8x16
  7616  
  7617  // TruncateToUint8 converts element values to uint8.
  7618  // Conversion is done with truncation on the vector elements.
  7619  //
  7620  // Asm: VPMOVWB, CPU Feature: AVX512
  7621  func (x Uint16x32) TruncateToUint8() Uint8x32
  7622  
  7623  // TruncateToUint8 converts element values to uint8.
  7624  // Conversion is done with truncation on the vector elements.
  7625  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7626  //
  7627  // Asm: VPMOVDB, CPU Feature: AVX512
  7628  func (x Uint32x4) TruncateToUint8() Uint8x16
  7629  
  7630  // TruncateToUint8 converts element values to uint8.
  7631  // Conversion is done with truncation on the vector elements.
  7632  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7633  //
  7634  // Asm: VPMOVDB, CPU Feature: AVX512
  7635  func (x Uint32x8) TruncateToUint8() Uint8x16
  7636  
  7637  // TruncateToUint8 converts element values to uint8.
  7638  // Conversion is done with truncation on the vector elements.
  7639  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7640  //
  7641  // Asm: VPMOVDB, CPU Feature: AVX512
  7642  func (x Uint32x16) TruncateToUint8() Uint8x16
  7643  
  7644  // TruncateToUint8 converts element values to uint8.
  7645  // Conversion is done with truncation on the vector elements.
  7646  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7647  //
  7648  // Asm: VPMOVQB, CPU Feature: AVX512
  7649  func (x Uint64x2) TruncateToUint8() Uint8x16
  7650  
  7651  // TruncateToUint8 converts element values to uint8.
  7652  // Conversion is done with truncation on the vector elements.
  7653  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7654  //
  7655  // Asm: VPMOVQB, CPU Feature: AVX512
  7656  func (x Uint64x4) TruncateToUint8() Uint8x16
  7657  
  7658  // TruncateToUint8 converts element values to uint8.
  7659  // Conversion is done with truncation on the vector elements.
  7660  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7661  //
  7662  // Asm: VPMOVQB, CPU Feature: AVX512
  7663  func (x Uint64x8) TruncateToUint8() Uint8x16
  7664  
  7665  /* TruncateToUint16 */
  7666  
  7667  // TruncateToUint16 converts element values to uint16.
  7668  // Conversion is done with truncation on the vector elements.
  7669  //
  7670  // Asm: VPMOVDW, CPU Feature: AVX512
  7671  func (x Uint32x4) TruncateToUint16() Uint16x8
  7672  
  7673  // TruncateToUint16 converts element values to uint16.
  7674  // Conversion is done with truncation on the vector elements.
  7675  //
  7676  // Asm: VPMOVDW, CPU Feature: AVX512
  7677  func (x Uint32x8) TruncateToUint16() Uint16x8
  7678  
  7679  // TruncateToUint16 converts element values to uint16.
  7680  // Conversion is done with truncation on the vector elements.
  7681  //
  7682  // Asm: VPMOVDW, CPU Feature: AVX512
  7683  func (x Uint32x16) TruncateToUint16() Uint16x16
  7684  
  7685  // TruncateToUint16 converts element values to uint16.
  7686  // Conversion is done with truncation on the vector elements.
  7687  //
  7688  // Asm: VPMOVQW, CPU Feature: AVX512
  7689  func (x Uint64x2) TruncateToUint16() Uint16x8
  7690  
  7691  // TruncateToUint16 converts element values to uint16.
  7692  // Conversion is done with truncation on the vector elements.
  7693  //
  7694  // Asm: VPMOVQW, CPU Feature: AVX512
  7695  func (x Uint64x4) TruncateToUint16() Uint16x8
  7696  
  7697  // TruncateToUint16 converts element values to uint16.
  7698  // Conversion is done with truncation on the vector elements.
  7699  //
  7700  // Asm: VPMOVQW, CPU Feature: AVX512
  7701  func (x Uint64x8) TruncateToUint16() Uint16x8
  7702  
  7703  /* TruncateToUint32 */
  7704  
  7705  // TruncateToUint32 converts element values to uint32.
  7706  // Conversion is done with truncation on the vector elements.
  7707  //
  7708  // Asm: VPMOVQD, CPU Feature: AVX512
  7709  func (x Uint64x2) TruncateToUint32() Uint32x4
  7710  
  7711  // TruncateToUint32 converts element values to uint32.
  7712  // Conversion is done with truncation on the vector elements.
  7713  //
  7714  // Asm: VPMOVQD, CPU Feature: AVX512
  7715  func (x Uint64x4) TruncateToUint32() Uint32x4
  7716  
  7717  // TruncateToUint32 converts element values to uint32.
  7718  // Conversion is done with truncation on the vector elements.
  7719  //
  7720  // Asm: VPMOVQD, CPU Feature: AVX512
  7721  func (x Uint64x8) TruncateToUint32() Uint32x8
  7722  
  7723  /* Xor */
  7724  
  7725  // Xor performs a bitwise XOR operation between two vectors.
  7726  //
  7727  // Asm: VPXOR, CPU Feature: AVX
  7728  func (x Int8x16) Xor(y Int8x16) Int8x16
  7729  
  7730  // Xor performs a bitwise XOR operation between two vectors.
  7731  //
  7732  // Asm: VPXOR, CPU Feature: AVX2
  7733  func (x Int8x32) Xor(y Int8x32) Int8x32
  7734  
  7735  // Xor performs a bitwise XOR operation between two vectors.
  7736  //
  7737  // Asm: VPXORD, CPU Feature: AVX512
  7738  func (x Int8x64) Xor(y Int8x64) Int8x64
  7739  
  7740  // Xor performs a bitwise XOR operation between two vectors.
  7741  //
  7742  // Asm: VPXOR, CPU Feature: AVX
  7743  func (x Int16x8) Xor(y Int16x8) Int16x8
  7744  
  7745  // Xor performs a bitwise XOR operation between two vectors.
  7746  //
  7747  // Asm: VPXOR, CPU Feature: AVX2
  7748  func (x Int16x16) Xor(y Int16x16) Int16x16
  7749  
  7750  // Xor performs a bitwise XOR operation between two vectors.
  7751  //
  7752  // Asm: VPXORD, CPU Feature: AVX512
  7753  func (x Int16x32) Xor(y Int16x32) Int16x32
  7754  
  7755  // Xor performs a bitwise XOR operation between two vectors.
  7756  //
  7757  // Asm: VPXOR, CPU Feature: AVX
  7758  func (x Int32x4) Xor(y Int32x4) Int32x4
  7759  
  7760  // Xor performs a bitwise XOR operation between two vectors.
  7761  //
  7762  // Asm: VPXOR, CPU Feature: AVX2
  7763  func (x Int32x8) Xor(y Int32x8) Int32x8
  7764  
  7765  // Xor performs a bitwise XOR operation between two vectors.
  7766  //
  7767  // Asm: VPXORD, CPU Feature: AVX512
  7768  func (x Int32x16) Xor(y Int32x16) Int32x16
  7769  
  7770  // Xor performs a bitwise XOR operation between two vectors.
  7771  //
  7772  // Asm: VPXOR, CPU Feature: AVX
  7773  func (x Int64x2) Xor(y Int64x2) Int64x2
  7774  
  7775  // Xor performs a bitwise XOR operation between two vectors.
  7776  //
  7777  // Asm: VPXOR, CPU Feature: AVX2
  7778  func (x Int64x4) Xor(y Int64x4) Int64x4
  7779  
  7780  // Xor performs a bitwise XOR operation between two vectors.
  7781  //
  7782  // Asm: VPXORQ, CPU Feature: AVX512
  7783  func (x Int64x8) Xor(y Int64x8) Int64x8
  7784  
  7785  // Xor performs a bitwise XOR operation between two vectors.
  7786  //
  7787  // Asm: VPXOR, CPU Feature: AVX
  7788  func (x Uint8x16) Xor(y Uint8x16) Uint8x16
  7789  
  7790  // Xor performs a bitwise XOR operation between two vectors.
  7791  //
  7792  // Asm: VPXOR, CPU Feature: AVX2
  7793  func (x Uint8x32) Xor(y Uint8x32) Uint8x32
  7794  
  7795  // Xor performs a bitwise XOR operation between two vectors.
  7796  //
  7797  // Asm: VPXORD, CPU Feature: AVX512
  7798  func (x Uint8x64) Xor(y Uint8x64) Uint8x64
  7799  
  7800  // Xor performs a bitwise XOR operation between two vectors.
  7801  //
  7802  // Asm: VPXOR, CPU Feature: AVX
  7803  func (x Uint16x8) Xor(y Uint16x8) Uint16x8
  7804  
  7805  // Xor performs a bitwise XOR operation between two vectors.
  7806  //
  7807  // Asm: VPXOR, CPU Feature: AVX2
  7808  func (x Uint16x16) Xor(y Uint16x16) Uint16x16
  7809  
  7810  // Xor performs a bitwise XOR operation between two vectors.
  7811  //
  7812  // Asm: VPXORD, CPU Feature: AVX512
  7813  func (x Uint16x32) Xor(y Uint16x32) Uint16x32
  7814  
  7815  // Xor performs a bitwise XOR operation between two vectors.
  7816  //
  7817  // Asm: VPXOR, CPU Feature: AVX
  7818  func (x Uint32x4) Xor(y Uint32x4) Uint32x4
  7819  
  7820  // Xor performs a bitwise XOR operation between two vectors.
  7821  //
  7822  // Asm: VPXOR, CPU Feature: AVX2
  7823  func (x Uint32x8) Xor(y Uint32x8) Uint32x8
  7824  
  7825  // Xor performs a bitwise XOR operation between two vectors.
  7826  //
  7827  // Asm: VPXORD, CPU Feature: AVX512
  7828  func (x Uint32x16) Xor(y Uint32x16) Uint32x16
  7829  
  7830  // Xor performs a bitwise XOR operation between two vectors.
  7831  //
  7832  // Asm: VPXOR, CPU Feature: AVX
  7833  func (x Uint64x2) Xor(y Uint64x2) Uint64x2
  7834  
  7835  // Xor performs a bitwise XOR operation between two vectors.
  7836  //
  7837  // Asm: VPXOR, CPU Feature: AVX2
  7838  func (x Uint64x4) Xor(y Uint64x4) Uint64x4
  7839  
  7840  // Xor performs a bitwise XOR operation between two vectors.
  7841  //
  7842  // Asm: VPXORQ, CPU Feature: AVX512
  7843  func (x Uint64x8) Xor(y Uint64x8) Uint64x8
  7844  
  7845  // Float64x2 converts from Float32x4 to Float64x2
  7846  func (from Float32x4) AsFloat64x2() (to Float64x2)
  7847  
  7848  // Int8x16 converts from Float32x4 to Int8x16
  7849  func (from Float32x4) AsInt8x16() (to Int8x16)
  7850  
  7851  // Int16x8 converts from Float32x4 to Int16x8
  7852  func (from Float32x4) AsInt16x8() (to Int16x8)
  7853  
  7854  // Int32x4 converts from Float32x4 to Int32x4
  7855  func (from Float32x4) AsInt32x4() (to Int32x4)
  7856  
  7857  // Int64x2 converts from Float32x4 to Int64x2
  7858  func (from Float32x4) AsInt64x2() (to Int64x2)
  7859  
  7860  // Uint8x16 converts from Float32x4 to Uint8x16
  7861  func (from Float32x4) AsUint8x16() (to Uint8x16)
  7862  
  7863  // Uint16x8 converts from Float32x4 to Uint16x8
  7864  func (from Float32x4) AsUint16x8() (to Uint16x8)
  7865  
  7866  // Uint32x4 converts from Float32x4 to Uint32x4
  7867  func (from Float32x4) AsUint32x4() (to Uint32x4)
  7868  
  7869  // Uint64x2 converts from Float32x4 to Uint64x2
  7870  func (from Float32x4) AsUint64x2() (to Uint64x2)
  7871  
  7872  // Float64x4 converts from Float32x8 to Float64x4
  7873  func (from Float32x8) AsFloat64x4() (to Float64x4)
  7874  
  7875  // Int8x32 converts from Float32x8 to Int8x32
  7876  func (from Float32x8) AsInt8x32() (to Int8x32)
  7877  
  7878  // Int16x16 converts from Float32x8 to Int16x16
  7879  func (from Float32x8) AsInt16x16() (to Int16x16)
  7880  
  7881  // Int32x8 converts from Float32x8 to Int32x8
  7882  func (from Float32x8) AsInt32x8() (to Int32x8)
  7883  
  7884  // Int64x4 converts from Float32x8 to Int64x4
  7885  func (from Float32x8) AsInt64x4() (to Int64x4)
  7886  
  7887  // Uint8x32 converts from Float32x8 to Uint8x32
  7888  func (from Float32x8) AsUint8x32() (to Uint8x32)
  7889  
  7890  // Uint16x16 converts from Float32x8 to Uint16x16
  7891  func (from Float32x8) AsUint16x16() (to Uint16x16)
  7892  
  7893  // Uint32x8 converts from Float32x8 to Uint32x8
  7894  func (from Float32x8) AsUint32x8() (to Uint32x8)
  7895  
  7896  // Uint64x4 converts from Float32x8 to Uint64x4
  7897  func (from Float32x8) AsUint64x4() (to Uint64x4)
  7898  
  7899  // Float64x8 converts from Float32x16 to Float64x8
  7900  func (from Float32x16) AsFloat64x8() (to Float64x8)
  7901  
  7902  // Int8x64 converts from Float32x16 to Int8x64
  7903  func (from Float32x16) AsInt8x64() (to Int8x64)
  7904  
  7905  // Int16x32 converts from Float32x16 to Int16x32
  7906  func (from Float32x16) AsInt16x32() (to Int16x32)
  7907  
  7908  // Int32x16 converts from Float32x16 to Int32x16
  7909  func (from Float32x16) AsInt32x16() (to Int32x16)
  7910  
  7911  // Int64x8 converts from Float32x16 to Int64x8
  7912  func (from Float32x16) AsInt64x8() (to Int64x8)
  7913  
  7914  // Uint8x64 converts from Float32x16 to Uint8x64
  7915  func (from Float32x16) AsUint8x64() (to Uint8x64)
  7916  
  7917  // Uint16x32 converts from Float32x16 to Uint16x32
  7918  func (from Float32x16) AsUint16x32() (to Uint16x32)
  7919  
  7920  // Uint32x16 converts from Float32x16 to Uint32x16
  7921  func (from Float32x16) AsUint32x16() (to Uint32x16)
  7922  
  7923  // Uint64x8 converts from Float32x16 to Uint64x8
  7924  func (from Float32x16) AsUint64x8() (to Uint64x8)
  7925  
  7926  // Float32x4 converts from Float64x2 to Float32x4
  7927  func (from Float64x2) AsFloat32x4() (to Float32x4)
  7928  
  7929  // Int8x16 converts from Float64x2 to Int8x16
  7930  func (from Float64x2) AsInt8x16() (to Int8x16)
  7931  
  7932  // Int16x8 converts from Float64x2 to Int16x8
  7933  func (from Float64x2) AsInt16x8() (to Int16x8)
  7934  
  7935  // Int32x4 converts from Float64x2 to Int32x4
  7936  func (from Float64x2) AsInt32x4() (to Int32x4)
  7937  
  7938  // Int64x2 converts from Float64x2 to Int64x2
  7939  func (from Float64x2) AsInt64x2() (to Int64x2)
  7940  
  7941  // Uint8x16 converts from Float64x2 to Uint8x16
  7942  func (from Float64x2) AsUint8x16() (to Uint8x16)
  7943  
  7944  // Uint16x8 converts from Float64x2 to Uint16x8
  7945  func (from Float64x2) AsUint16x8() (to Uint16x8)
  7946  
  7947  // Uint32x4 converts from Float64x2 to Uint32x4
  7948  func (from Float64x2) AsUint32x4() (to Uint32x4)
  7949  
  7950  // Uint64x2 converts from Float64x2 to Uint64x2
  7951  func (from Float64x2) AsUint64x2() (to Uint64x2)
  7952  
  7953  // Float32x8 converts from Float64x4 to Float32x8
  7954  func (from Float64x4) AsFloat32x8() (to Float32x8)
  7955  
  7956  // Int8x32 converts from Float64x4 to Int8x32
  7957  func (from Float64x4) AsInt8x32() (to Int8x32)
  7958  
  7959  // Int16x16 converts from Float64x4 to Int16x16
  7960  func (from Float64x4) AsInt16x16() (to Int16x16)
  7961  
  7962  // Int32x8 converts from Float64x4 to Int32x8
  7963  func (from Float64x4) AsInt32x8() (to Int32x8)
  7964  
  7965  // Int64x4 converts from Float64x4 to Int64x4
  7966  func (from Float64x4) AsInt64x4() (to Int64x4)
  7967  
  7968  // Uint8x32 converts from Float64x4 to Uint8x32
  7969  func (from Float64x4) AsUint8x32() (to Uint8x32)
  7970  
  7971  // Uint16x16 converts from Float64x4 to Uint16x16
  7972  func (from Float64x4) AsUint16x16() (to Uint16x16)
  7973  
  7974  // Uint32x8 converts from Float64x4 to Uint32x8
  7975  func (from Float64x4) AsUint32x8() (to Uint32x8)
  7976  
  7977  // Uint64x4 converts from Float64x4 to Uint64x4
  7978  func (from Float64x4) AsUint64x4() (to Uint64x4)
  7979  
  7980  // Float32x16 converts from Float64x8 to Float32x16
  7981  func (from Float64x8) AsFloat32x16() (to Float32x16)
  7982  
  7983  // Int8x64 converts from Float64x8 to Int8x64
  7984  func (from Float64x8) AsInt8x64() (to Int8x64)
  7985  
  7986  // Int16x32 converts from Float64x8 to Int16x32
  7987  func (from Float64x8) AsInt16x32() (to Int16x32)
  7988  
  7989  // Int32x16 converts from Float64x8 to Int32x16
  7990  func (from Float64x8) AsInt32x16() (to Int32x16)
  7991  
  7992  // Int64x8 converts from Float64x8 to Int64x8
  7993  func (from Float64x8) AsInt64x8() (to Int64x8)
  7994  
  7995  // Uint8x64 converts from Float64x8 to Uint8x64
  7996  func (from Float64x8) AsUint8x64() (to Uint8x64)
  7997  
  7998  // Uint16x32 converts from Float64x8 to Uint16x32
  7999  func (from Float64x8) AsUint16x32() (to Uint16x32)
  8000  
  8001  // Uint32x16 converts from Float64x8 to Uint32x16
  8002  func (from Float64x8) AsUint32x16() (to Uint32x16)
  8003  
  8004  // Uint64x8 converts from Float64x8 to Uint64x8
  8005  func (from Float64x8) AsUint64x8() (to Uint64x8)
  8006  
  8007  // Float32x4 converts from Int8x16 to Float32x4
  8008  func (from Int8x16) AsFloat32x4() (to Float32x4)
  8009  
  8010  // Float64x2 converts from Int8x16 to Float64x2
  8011  func (from Int8x16) AsFloat64x2() (to Float64x2)
  8012  
  8013  // Int16x8 converts from Int8x16 to Int16x8
  8014  func (from Int8x16) AsInt16x8() (to Int16x8)
  8015  
  8016  // Int32x4 converts from Int8x16 to Int32x4
  8017  func (from Int8x16) AsInt32x4() (to Int32x4)
  8018  
  8019  // Int64x2 converts from Int8x16 to Int64x2
  8020  func (from Int8x16) AsInt64x2() (to Int64x2)
  8021  
  8022  // Uint8x16 converts from Int8x16 to Uint8x16
  8023  func (from Int8x16) AsUint8x16() (to Uint8x16)
  8024  
  8025  // Uint16x8 converts from Int8x16 to Uint16x8
  8026  func (from Int8x16) AsUint16x8() (to Uint16x8)
  8027  
  8028  // Uint32x4 converts from Int8x16 to Uint32x4
  8029  func (from Int8x16) AsUint32x4() (to Uint32x4)
  8030  
  8031  // Uint64x2 converts from Int8x16 to Uint64x2
  8032  func (from Int8x16) AsUint64x2() (to Uint64x2)
  8033  
  8034  // Float32x8 converts from Int8x32 to Float32x8
  8035  func (from Int8x32) AsFloat32x8() (to Float32x8)
  8036  
  8037  // Float64x4 converts from Int8x32 to Float64x4
  8038  func (from Int8x32) AsFloat64x4() (to Float64x4)
  8039  
  8040  // Int16x16 converts from Int8x32 to Int16x16
  8041  func (from Int8x32) AsInt16x16() (to Int16x16)
  8042  
  8043  // Int32x8 converts from Int8x32 to Int32x8
  8044  func (from Int8x32) AsInt32x8() (to Int32x8)
  8045  
  8046  // Int64x4 converts from Int8x32 to Int64x4
  8047  func (from Int8x32) AsInt64x4() (to Int64x4)
  8048  
  8049  // Uint8x32 converts from Int8x32 to Uint8x32
  8050  func (from Int8x32) AsUint8x32() (to Uint8x32)
  8051  
  8052  // Uint16x16 converts from Int8x32 to Uint16x16
  8053  func (from Int8x32) AsUint16x16() (to Uint16x16)
  8054  
  8055  // Uint32x8 converts from Int8x32 to Uint32x8
  8056  func (from Int8x32) AsUint32x8() (to Uint32x8)
  8057  
  8058  // Uint64x4 converts from Int8x32 to Uint64x4
  8059  func (from Int8x32) AsUint64x4() (to Uint64x4)
  8060  
  8061  // Float32x16 converts from Int8x64 to Float32x16
  8062  func (from Int8x64) AsFloat32x16() (to Float32x16)
  8063  
  8064  // Float64x8 converts from Int8x64 to Float64x8
  8065  func (from Int8x64) AsFloat64x8() (to Float64x8)
  8066  
  8067  // Int16x32 converts from Int8x64 to Int16x32
  8068  func (from Int8x64) AsInt16x32() (to Int16x32)
  8069  
  8070  // Int32x16 converts from Int8x64 to Int32x16
  8071  func (from Int8x64) AsInt32x16() (to Int32x16)
  8072  
  8073  // Int64x8 converts from Int8x64 to Int64x8
  8074  func (from Int8x64) AsInt64x8() (to Int64x8)
  8075  
  8076  // Uint8x64 converts from Int8x64 to Uint8x64
  8077  func (from Int8x64) AsUint8x64() (to Uint8x64)
  8078  
  8079  // Uint16x32 converts from Int8x64 to Uint16x32
  8080  func (from Int8x64) AsUint16x32() (to Uint16x32)
  8081  
  8082  // Uint32x16 converts from Int8x64 to Uint32x16
  8083  func (from Int8x64) AsUint32x16() (to Uint32x16)
  8084  
  8085  // Uint64x8 converts from Int8x64 to Uint64x8
  8086  func (from Int8x64) AsUint64x8() (to Uint64x8)
  8087  
  8088  // Float32x4 converts from Int16x8 to Float32x4
  8089  func (from Int16x8) AsFloat32x4() (to Float32x4)
  8090  
  8091  // Float64x2 converts from Int16x8 to Float64x2
  8092  func (from Int16x8) AsFloat64x2() (to Float64x2)
  8093  
  8094  // Int8x16 converts from Int16x8 to Int8x16
  8095  func (from Int16x8) AsInt8x16() (to Int8x16)
  8096  
  8097  // Int32x4 converts from Int16x8 to Int32x4
  8098  func (from Int16x8) AsInt32x4() (to Int32x4)
  8099  
  8100  // Int64x2 converts from Int16x8 to Int64x2
  8101  func (from Int16x8) AsInt64x2() (to Int64x2)
  8102  
  8103  // Uint8x16 converts from Int16x8 to Uint8x16
  8104  func (from Int16x8) AsUint8x16() (to Uint8x16)
  8105  
  8106  // Uint16x8 converts from Int16x8 to Uint16x8
  8107  func (from Int16x8) AsUint16x8() (to Uint16x8)
  8108  
  8109  // Uint32x4 converts from Int16x8 to Uint32x4
  8110  func (from Int16x8) AsUint32x4() (to Uint32x4)
  8111  
  8112  // Uint64x2 converts from Int16x8 to Uint64x2
  8113  func (from Int16x8) AsUint64x2() (to Uint64x2)
  8114  
  8115  // Float32x8 converts from Int16x16 to Float32x8
  8116  func (from Int16x16) AsFloat32x8() (to Float32x8)
  8117  
  8118  // Float64x4 converts from Int16x16 to Float64x4
  8119  func (from Int16x16) AsFloat64x4() (to Float64x4)
  8120  
  8121  // Int8x32 converts from Int16x16 to Int8x32
  8122  func (from Int16x16) AsInt8x32() (to Int8x32)
  8123  
  8124  // Int32x8 converts from Int16x16 to Int32x8
  8125  func (from Int16x16) AsInt32x8() (to Int32x8)
  8126  
  8127  // Int64x4 converts from Int16x16 to Int64x4
  8128  func (from Int16x16) AsInt64x4() (to Int64x4)
  8129  
  8130  // Uint8x32 converts from Int16x16 to Uint8x32
  8131  func (from Int16x16) AsUint8x32() (to Uint8x32)
  8132  
  8133  // Uint16x16 converts from Int16x16 to Uint16x16
  8134  func (from Int16x16) AsUint16x16() (to Uint16x16)
  8135  
  8136  // Uint32x8 converts from Int16x16 to Uint32x8
  8137  func (from Int16x16) AsUint32x8() (to Uint32x8)
  8138  
  8139  // Uint64x4 converts from Int16x16 to Uint64x4
  8140  func (from Int16x16) AsUint64x4() (to Uint64x4)
  8141  
  8142  // Float32x16 converts from Int16x32 to Float32x16
  8143  func (from Int16x32) AsFloat32x16() (to Float32x16)
  8144  
  8145  // Float64x8 converts from Int16x32 to Float64x8
  8146  func (from Int16x32) AsFloat64x8() (to Float64x8)
  8147  
  8148  // Int8x64 converts from Int16x32 to Int8x64
  8149  func (from Int16x32) AsInt8x64() (to Int8x64)
  8150  
  8151  // Int32x16 converts from Int16x32 to Int32x16
  8152  func (from Int16x32) AsInt32x16() (to Int32x16)
  8153  
  8154  // Int64x8 converts from Int16x32 to Int64x8
  8155  func (from Int16x32) AsInt64x8() (to Int64x8)
  8156  
  8157  // Uint8x64 converts from Int16x32 to Uint8x64
  8158  func (from Int16x32) AsUint8x64() (to Uint8x64)
  8159  
  8160  // Uint16x32 converts from Int16x32 to Uint16x32
  8161  func (from Int16x32) AsUint16x32() (to Uint16x32)
  8162  
  8163  // Uint32x16 converts from Int16x32 to Uint32x16
  8164  func (from Int16x32) AsUint32x16() (to Uint32x16)
  8165  
  8166  // Uint64x8 converts from Int16x32 to Uint64x8
  8167  func (from Int16x32) AsUint64x8() (to Uint64x8)
  8168  
  8169  // Float32x4 converts from Int32x4 to Float32x4
  8170  func (from Int32x4) AsFloat32x4() (to Float32x4)
  8171  
  8172  // Float64x2 converts from Int32x4 to Float64x2
  8173  func (from Int32x4) AsFloat64x2() (to Float64x2)
  8174  
  8175  // Int8x16 converts from Int32x4 to Int8x16
  8176  func (from Int32x4) AsInt8x16() (to Int8x16)
  8177  
  8178  // Int16x8 converts from Int32x4 to Int16x8
  8179  func (from Int32x4) AsInt16x8() (to Int16x8)
  8180  
  8181  // Int64x2 converts from Int32x4 to Int64x2
  8182  func (from Int32x4) AsInt64x2() (to Int64x2)
  8183  
  8184  // Uint8x16 converts from Int32x4 to Uint8x16
  8185  func (from Int32x4) AsUint8x16() (to Uint8x16)
  8186  
  8187  // Uint16x8 converts from Int32x4 to Uint16x8
  8188  func (from Int32x4) AsUint16x8() (to Uint16x8)
  8189  
  8190  // Uint32x4 converts from Int32x4 to Uint32x4
  8191  func (from Int32x4) AsUint32x4() (to Uint32x4)
  8192  
  8193  // Uint64x2 converts from Int32x4 to Uint64x2
  8194  func (from Int32x4) AsUint64x2() (to Uint64x2)
  8195  
  8196  // Float32x8 converts from Int32x8 to Float32x8
  8197  func (from Int32x8) AsFloat32x8() (to Float32x8)
  8198  
  8199  // Float64x4 converts from Int32x8 to Float64x4
  8200  func (from Int32x8) AsFloat64x4() (to Float64x4)
  8201  
  8202  // Int8x32 converts from Int32x8 to Int8x32
  8203  func (from Int32x8) AsInt8x32() (to Int8x32)
  8204  
  8205  // Int16x16 converts from Int32x8 to Int16x16
  8206  func (from Int32x8) AsInt16x16() (to Int16x16)
  8207  
  8208  // Int64x4 converts from Int32x8 to Int64x4
  8209  func (from Int32x8) AsInt64x4() (to Int64x4)
  8210  
  8211  // Uint8x32 converts from Int32x8 to Uint8x32
  8212  func (from Int32x8) AsUint8x32() (to Uint8x32)
  8213  
  8214  // Uint16x16 converts from Int32x8 to Uint16x16
  8215  func (from Int32x8) AsUint16x16() (to Uint16x16)
  8216  
  8217  // Uint32x8 converts from Int32x8 to Uint32x8
  8218  func (from Int32x8) AsUint32x8() (to Uint32x8)
  8219  
  8220  // Uint64x4 converts from Int32x8 to Uint64x4
  8221  func (from Int32x8) AsUint64x4() (to Uint64x4)
  8222  
  8223  // Float32x16 converts from Int32x16 to Float32x16
  8224  func (from Int32x16) AsFloat32x16() (to Float32x16)
  8225  
  8226  // Float64x8 converts from Int32x16 to Float64x8
  8227  func (from Int32x16) AsFloat64x8() (to Float64x8)
  8228  
  8229  // Int8x64 converts from Int32x16 to Int8x64
  8230  func (from Int32x16) AsInt8x64() (to Int8x64)
  8231  
  8232  // Int16x32 converts from Int32x16 to Int16x32
  8233  func (from Int32x16) AsInt16x32() (to Int16x32)
  8234  
  8235  // Int64x8 converts from Int32x16 to Int64x8
  8236  func (from Int32x16) AsInt64x8() (to Int64x8)
  8237  
  8238  // Uint8x64 converts from Int32x16 to Uint8x64
  8239  func (from Int32x16) AsUint8x64() (to Uint8x64)
  8240  
  8241  // Uint16x32 converts from Int32x16 to Uint16x32
  8242  func (from Int32x16) AsUint16x32() (to Uint16x32)
  8243  
  8244  // Uint32x16 converts from Int32x16 to Uint32x16
  8245  func (from Int32x16) AsUint32x16() (to Uint32x16)
  8246  
  8247  // Uint64x8 converts from Int32x16 to Uint64x8
  8248  func (from Int32x16) AsUint64x8() (to Uint64x8)
  8249  
  8250  // Float32x4 converts from Int64x2 to Float32x4
  8251  func (from Int64x2) AsFloat32x4() (to Float32x4)
  8252  
  8253  // Float64x2 converts from Int64x2 to Float64x2
  8254  func (from Int64x2) AsFloat64x2() (to Float64x2)
  8255  
  8256  // Int8x16 converts from Int64x2 to Int8x16
  8257  func (from Int64x2) AsInt8x16() (to Int8x16)
  8258  
  8259  // Int16x8 converts from Int64x2 to Int16x8
  8260  func (from Int64x2) AsInt16x8() (to Int16x8)
  8261  
  8262  // Int32x4 converts from Int64x2 to Int32x4
  8263  func (from Int64x2) AsInt32x4() (to Int32x4)
  8264  
  8265  // Uint8x16 converts from Int64x2 to Uint8x16
  8266  func (from Int64x2) AsUint8x16() (to Uint8x16)
  8267  
  8268  // Uint16x8 converts from Int64x2 to Uint16x8
  8269  func (from Int64x2) AsUint16x8() (to Uint16x8)
  8270  
  8271  // Uint32x4 converts from Int64x2 to Uint32x4
  8272  func (from Int64x2) AsUint32x4() (to Uint32x4)
  8273  
  8274  // Uint64x2 converts from Int64x2 to Uint64x2
  8275  func (from Int64x2) AsUint64x2() (to Uint64x2)
  8276  
  8277  // Float32x8 converts from Int64x4 to Float32x8
  8278  func (from Int64x4) AsFloat32x8() (to Float32x8)
  8279  
  8280  // Float64x4 converts from Int64x4 to Float64x4
  8281  func (from Int64x4) AsFloat64x4() (to Float64x4)
  8282  
  8283  // Int8x32 converts from Int64x4 to Int8x32
  8284  func (from Int64x4) AsInt8x32() (to Int8x32)
  8285  
  8286  // Int16x16 converts from Int64x4 to Int16x16
  8287  func (from Int64x4) AsInt16x16() (to Int16x16)
  8288  
  8289  // Int32x8 converts from Int64x4 to Int32x8
  8290  func (from Int64x4) AsInt32x8() (to Int32x8)
  8291  
  8292  // Uint8x32 converts from Int64x4 to Uint8x32
  8293  func (from Int64x4) AsUint8x32() (to Uint8x32)
  8294  
  8295  // Uint16x16 converts from Int64x4 to Uint16x16
  8296  func (from Int64x4) AsUint16x16() (to Uint16x16)
  8297  
  8298  // Uint32x8 converts from Int64x4 to Uint32x8
  8299  func (from Int64x4) AsUint32x8() (to Uint32x8)
  8300  
  8301  // Uint64x4 converts from Int64x4 to Uint64x4
  8302  func (from Int64x4) AsUint64x4() (to Uint64x4)
  8303  
  8304  // Float32x16 converts from Int64x8 to Float32x16
  8305  func (from Int64x8) AsFloat32x16() (to Float32x16)
  8306  
  8307  // Float64x8 converts from Int64x8 to Float64x8
  8308  func (from Int64x8) AsFloat64x8() (to Float64x8)
  8309  
  8310  // Int8x64 converts from Int64x8 to Int8x64
  8311  func (from Int64x8) AsInt8x64() (to Int8x64)
  8312  
  8313  // Int16x32 converts from Int64x8 to Int16x32
  8314  func (from Int64x8) AsInt16x32() (to Int16x32)
  8315  
  8316  // Int32x16 converts from Int64x8 to Int32x16
  8317  func (from Int64x8) AsInt32x16() (to Int32x16)
  8318  
  8319  // Uint8x64 converts from Int64x8 to Uint8x64
  8320  func (from Int64x8) AsUint8x64() (to Uint8x64)
  8321  
  8322  // Uint16x32 converts from Int64x8 to Uint16x32
  8323  func (from Int64x8) AsUint16x32() (to Uint16x32)
  8324  
  8325  // Uint32x16 converts from Int64x8 to Uint32x16
  8326  func (from Int64x8) AsUint32x16() (to Uint32x16)
  8327  
  8328  // Uint64x8 converts from Int64x8 to Uint64x8
  8329  func (from Int64x8) AsUint64x8() (to Uint64x8)
  8330  
  8331  // Float32x4 converts from Uint8x16 to Float32x4
  8332  func (from Uint8x16) AsFloat32x4() (to Float32x4)
  8333  
  8334  // Float64x2 converts from Uint8x16 to Float64x2
  8335  func (from Uint8x16) AsFloat64x2() (to Float64x2)
  8336  
  8337  // Int8x16 converts from Uint8x16 to Int8x16
  8338  func (from Uint8x16) AsInt8x16() (to Int8x16)
  8339  
  8340  // Int16x8 converts from Uint8x16 to Int16x8
  8341  func (from Uint8x16) AsInt16x8() (to Int16x8)
  8342  
  8343  // Int32x4 converts from Uint8x16 to Int32x4
  8344  func (from Uint8x16) AsInt32x4() (to Int32x4)
  8345  
  8346  // Int64x2 converts from Uint8x16 to Int64x2
  8347  func (from Uint8x16) AsInt64x2() (to Int64x2)
  8348  
  8349  // Uint16x8 converts from Uint8x16 to Uint16x8
  8350  func (from Uint8x16) AsUint16x8() (to Uint16x8)
  8351  
  8352  // Uint32x4 converts from Uint8x16 to Uint32x4
  8353  func (from Uint8x16) AsUint32x4() (to Uint32x4)
  8354  
  8355  // Uint64x2 converts from Uint8x16 to Uint64x2
  8356  func (from Uint8x16) AsUint64x2() (to Uint64x2)
  8357  
  8358  // Float32x8 converts from Uint8x32 to Float32x8
  8359  func (from Uint8x32) AsFloat32x8() (to Float32x8)
  8360  
  8361  // Float64x4 converts from Uint8x32 to Float64x4
  8362  func (from Uint8x32) AsFloat64x4() (to Float64x4)
  8363  
  8364  // Int8x32 converts from Uint8x32 to Int8x32
  8365  func (from Uint8x32) AsInt8x32() (to Int8x32)
  8366  
  8367  // Int16x16 converts from Uint8x32 to Int16x16
  8368  func (from Uint8x32) AsInt16x16() (to Int16x16)
  8369  
  8370  // Int32x8 converts from Uint8x32 to Int32x8
  8371  func (from Uint8x32) AsInt32x8() (to Int32x8)
  8372  
  8373  // Int64x4 converts from Uint8x32 to Int64x4
  8374  func (from Uint8x32) AsInt64x4() (to Int64x4)
  8375  
  8376  // Uint16x16 converts from Uint8x32 to Uint16x16
  8377  func (from Uint8x32) AsUint16x16() (to Uint16x16)
  8378  
  8379  // Uint32x8 converts from Uint8x32 to Uint32x8
  8380  func (from Uint8x32) AsUint32x8() (to Uint32x8)
  8381  
  8382  // Uint64x4 converts from Uint8x32 to Uint64x4
  8383  func (from Uint8x32) AsUint64x4() (to Uint64x4)
  8384  
  8385  // Float32x16 converts from Uint8x64 to Float32x16
  8386  func (from Uint8x64) AsFloat32x16() (to Float32x16)
  8387  
  8388  // Float64x8 converts from Uint8x64 to Float64x8
  8389  func (from Uint8x64) AsFloat64x8() (to Float64x8)
  8390  
  8391  // Int8x64 converts from Uint8x64 to Int8x64
  8392  func (from Uint8x64) AsInt8x64() (to Int8x64)
  8393  
  8394  // Int16x32 converts from Uint8x64 to Int16x32
  8395  func (from Uint8x64) AsInt16x32() (to Int16x32)
  8396  
  8397  // Int32x16 converts from Uint8x64 to Int32x16
  8398  func (from Uint8x64) AsInt32x16() (to Int32x16)
  8399  
  8400  // Int64x8 converts from Uint8x64 to Int64x8
  8401  func (from Uint8x64) AsInt64x8() (to Int64x8)
  8402  
  8403  // Uint16x32 converts from Uint8x64 to Uint16x32
  8404  func (from Uint8x64) AsUint16x32() (to Uint16x32)
  8405  
  8406  // Uint32x16 converts from Uint8x64 to Uint32x16
  8407  func (from Uint8x64) AsUint32x16() (to Uint32x16)
  8408  
  8409  // Uint64x8 converts from Uint8x64 to Uint64x8
  8410  func (from Uint8x64) AsUint64x8() (to Uint64x8)
  8411  
  8412  // Float32x4 converts from Uint16x8 to Float32x4
  8413  func (from Uint16x8) AsFloat32x4() (to Float32x4)
  8414  
  8415  // Float64x2 converts from Uint16x8 to Float64x2
  8416  func (from Uint16x8) AsFloat64x2() (to Float64x2)
  8417  
  8418  // Int8x16 converts from Uint16x8 to Int8x16
  8419  func (from Uint16x8) AsInt8x16() (to Int8x16)
  8420  
  8421  // Int16x8 converts from Uint16x8 to Int16x8
  8422  func (from Uint16x8) AsInt16x8() (to Int16x8)
  8423  
  8424  // Int32x4 converts from Uint16x8 to Int32x4
  8425  func (from Uint16x8) AsInt32x4() (to Int32x4)
  8426  
  8427  // Int64x2 converts from Uint16x8 to Int64x2
  8428  func (from Uint16x8) AsInt64x2() (to Int64x2)
  8429  
  8430  // Uint8x16 converts from Uint16x8 to Uint8x16
  8431  func (from Uint16x8) AsUint8x16() (to Uint8x16)
  8432  
  8433  // Uint32x4 converts from Uint16x8 to Uint32x4
  8434  func (from Uint16x8) AsUint32x4() (to Uint32x4)
  8435  
  8436  // Uint64x2 converts from Uint16x8 to Uint64x2
  8437  func (from Uint16x8) AsUint64x2() (to Uint64x2)
  8438  
  8439  // Float32x8 converts from Uint16x16 to Float32x8
  8440  func (from Uint16x16) AsFloat32x8() (to Float32x8)
  8441  
  8442  // Float64x4 converts from Uint16x16 to Float64x4
  8443  func (from Uint16x16) AsFloat64x4() (to Float64x4)
  8444  
  8445  // Int8x32 converts from Uint16x16 to Int8x32
  8446  func (from Uint16x16) AsInt8x32() (to Int8x32)
  8447  
  8448  // Int16x16 converts from Uint16x16 to Int16x16
  8449  func (from Uint16x16) AsInt16x16() (to Int16x16)
  8450  
  8451  // Int32x8 converts from Uint16x16 to Int32x8
  8452  func (from Uint16x16) AsInt32x8() (to Int32x8)
  8453  
  8454  // Int64x4 converts from Uint16x16 to Int64x4
  8455  func (from Uint16x16) AsInt64x4() (to Int64x4)
  8456  
  8457  // Uint8x32 converts from Uint16x16 to Uint8x32
  8458  func (from Uint16x16) AsUint8x32() (to Uint8x32)
  8459  
  8460  // Uint32x8 converts from Uint16x16 to Uint32x8
  8461  func (from Uint16x16) AsUint32x8() (to Uint32x8)
  8462  
  8463  // Uint64x4 converts from Uint16x16 to Uint64x4
  8464  func (from Uint16x16) AsUint64x4() (to Uint64x4)
  8465  
  8466  // Float32x16 converts from Uint16x32 to Float32x16
  8467  func (from Uint16x32) AsFloat32x16() (to Float32x16)
  8468  
  8469  // Float64x8 converts from Uint16x32 to Float64x8
  8470  func (from Uint16x32) AsFloat64x8() (to Float64x8)
  8471  
  8472  // Int8x64 converts from Uint16x32 to Int8x64
  8473  func (from Uint16x32) AsInt8x64() (to Int8x64)
  8474  
  8475  // Int16x32 converts from Uint16x32 to Int16x32
  8476  func (from Uint16x32) AsInt16x32() (to Int16x32)
  8477  
  8478  // Int32x16 converts from Uint16x32 to Int32x16
  8479  func (from Uint16x32) AsInt32x16() (to Int32x16)
  8480  
  8481  // Int64x8 converts from Uint16x32 to Int64x8
  8482  func (from Uint16x32) AsInt64x8() (to Int64x8)
  8483  
  8484  // Uint8x64 converts from Uint16x32 to Uint8x64
  8485  func (from Uint16x32) AsUint8x64() (to Uint8x64)
  8486  
  8487  // Uint32x16 converts from Uint16x32 to Uint32x16
  8488  func (from Uint16x32) AsUint32x16() (to Uint32x16)
  8489  
  8490  // Uint64x8 converts from Uint16x32 to Uint64x8
  8491  func (from Uint16x32) AsUint64x8() (to Uint64x8)
  8492  
  8493  // Float32x4 converts from Uint32x4 to Float32x4
  8494  func (from Uint32x4) AsFloat32x4() (to Float32x4)
  8495  
  8496  // Float64x2 converts from Uint32x4 to Float64x2
  8497  func (from Uint32x4) AsFloat64x2() (to Float64x2)
  8498  
  8499  // Int8x16 converts from Uint32x4 to Int8x16
  8500  func (from Uint32x4) AsInt8x16() (to Int8x16)
  8501  
  8502  // Int16x8 converts from Uint32x4 to Int16x8
  8503  func (from Uint32x4) AsInt16x8() (to Int16x8)
  8504  
  8505  // Int32x4 converts from Uint32x4 to Int32x4
  8506  func (from Uint32x4) AsInt32x4() (to Int32x4)
  8507  
  8508  // Int64x2 converts from Uint32x4 to Int64x2
  8509  func (from Uint32x4) AsInt64x2() (to Int64x2)
  8510  
  8511  // Uint8x16 converts from Uint32x4 to Uint8x16
  8512  func (from Uint32x4) AsUint8x16() (to Uint8x16)
  8513  
  8514  // Uint16x8 converts from Uint32x4 to Uint16x8
  8515  func (from Uint32x4) AsUint16x8() (to Uint16x8)
  8516  
  8517  // Uint64x2 converts from Uint32x4 to Uint64x2
  8518  func (from Uint32x4) AsUint64x2() (to Uint64x2)
  8519  
  8520  // Float32x8 converts from Uint32x8 to Float32x8
  8521  func (from Uint32x8) AsFloat32x8() (to Float32x8)
  8522  
  8523  // Float64x4 converts from Uint32x8 to Float64x4
  8524  func (from Uint32x8) AsFloat64x4() (to Float64x4)
  8525  
  8526  // Int8x32 converts from Uint32x8 to Int8x32
  8527  func (from Uint32x8) AsInt8x32() (to Int8x32)
  8528  
  8529  // Int16x16 converts from Uint32x8 to Int16x16
  8530  func (from Uint32x8) AsInt16x16() (to Int16x16)
  8531  
  8532  // Int32x8 converts from Uint32x8 to Int32x8
  8533  func (from Uint32x8) AsInt32x8() (to Int32x8)
  8534  
  8535  // Int64x4 converts from Uint32x8 to Int64x4
  8536  func (from Uint32x8) AsInt64x4() (to Int64x4)
  8537  
  8538  // Uint8x32 converts from Uint32x8 to Uint8x32
  8539  func (from Uint32x8) AsUint8x32() (to Uint8x32)
  8540  
  8541  // Uint16x16 converts from Uint32x8 to Uint16x16
  8542  func (from Uint32x8) AsUint16x16() (to Uint16x16)
  8543  
  8544  // Uint64x4 converts from Uint32x8 to Uint64x4
  8545  func (from Uint32x8) AsUint64x4() (to Uint64x4)
  8546  
  8547  // Float32x16 converts from Uint32x16 to Float32x16
  8548  func (from Uint32x16) AsFloat32x16() (to Float32x16)
  8549  
  8550  // Float64x8 converts from Uint32x16 to Float64x8
  8551  func (from Uint32x16) AsFloat64x8() (to Float64x8)
  8552  
  8553  // Int8x64 converts from Uint32x16 to Int8x64
  8554  func (from Uint32x16) AsInt8x64() (to Int8x64)
  8555  
  8556  // Int16x32 converts from Uint32x16 to Int16x32
  8557  func (from Uint32x16) AsInt16x32() (to Int16x32)
  8558  
  8559  // Int32x16 converts from Uint32x16 to Int32x16
  8560  func (from Uint32x16) AsInt32x16() (to Int32x16)
  8561  
  8562  // Int64x8 converts from Uint32x16 to Int64x8
  8563  func (from Uint32x16) AsInt64x8() (to Int64x8)
  8564  
  8565  // Uint8x64 converts from Uint32x16 to Uint8x64
  8566  func (from Uint32x16) AsUint8x64() (to Uint8x64)
  8567  
  8568  // Uint16x32 converts from Uint32x16 to Uint16x32
  8569  func (from Uint32x16) AsUint16x32() (to Uint16x32)
  8570  
  8571  // Uint64x8 converts from Uint32x16 to Uint64x8
  8572  func (from Uint32x16) AsUint64x8() (to Uint64x8)
  8573  
  8574  // Float32x4 converts from Uint64x2 to Float32x4
  8575  func (from Uint64x2) AsFloat32x4() (to Float32x4)
  8576  
  8577  // Float64x2 converts from Uint64x2 to Float64x2
  8578  func (from Uint64x2) AsFloat64x2() (to Float64x2)
  8579  
  8580  // Int8x16 converts from Uint64x2 to Int8x16
  8581  func (from Uint64x2) AsInt8x16() (to Int8x16)
  8582  
  8583  // Int16x8 converts from Uint64x2 to Int16x8
  8584  func (from Uint64x2) AsInt16x8() (to Int16x8)
  8585  
  8586  // Int32x4 converts from Uint64x2 to Int32x4
  8587  func (from Uint64x2) AsInt32x4() (to Int32x4)
  8588  
  8589  // Int64x2 converts from Uint64x2 to Int64x2
  8590  func (from Uint64x2) AsInt64x2() (to Int64x2)
  8591  
  8592  // Uint8x16 converts from Uint64x2 to Uint8x16
  8593  func (from Uint64x2) AsUint8x16() (to Uint8x16)
  8594  
  8595  // Uint16x8 converts from Uint64x2 to Uint16x8
  8596  func (from Uint64x2) AsUint16x8() (to Uint16x8)
  8597  
  8598  // Uint32x4 converts from Uint64x2 to Uint32x4
  8599  func (from Uint64x2) AsUint32x4() (to Uint32x4)
  8600  
  8601  // Float32x8 converts from Uint64x4 to Float32x8
  8602  func (from Uint64x4) AsFloat32x8() (to Float32x8)
  8603  
  8604  // Float64x4 converts from Uint64x4 to Float64x4
  8605  func (from Uint64x4) AsFloat64x4() (to Float64x4)
  8606  
  8607  // Int8x32 converts from Uint64x4 to Int8x32
  8608  func (from Uint64x4) AsInt8x32() (to Int8x32)
  8609  
  8610  // Int16x16 converts from Uint64x4 to Int16x16
  8611  func (from Uint64x4) AsInt16x16() (to Int16x16)
  8612  
  8613  // Int32x8 converts from Uint64x4 to Int32x8
  8614  func (from Uint64x4) AsInt32x8() (to Int32x8)
  8615  
  8616  // Int64x4 converts from Uint64x4 to Int64x4
  8617  func (from Uint64x4) AsInt64x4() (to Int64x4)
  8618  
  8619  // Uint8x32 converts from Uint64x4 to Uint8x32
  8620  func (from Uint64x4) AsUint8x32() (to Uint8x32)
  8621  
  8622  // Uint16x16 converts from Uint64x4 to Uint16x16
  8623  func (from Uint64x4) AsUint16x16() (to Uint16x16)
  8624  
  8625  // Uint32x8 converts from Uint64x4 to Uint32x8
  8626  func (from Uint64x4) AsUint32x8() (to Uint32x8)
  8627  
  8628  // Float32x16 converts from Uint64x8 to Float32x16
  8629  func (from Uint64x8) AsFloat32x16() (to Float32x16)
  8630  
  8631  // Float64x8 converts from Uint64x8 to Float64x8
  8632  func (from Uint64x8) AsFloat64x8() (to Float64x8)
  8633  
  8634  // Int8x64 converts from Uint64x8 to Int8x64
  8635  func (from Uint64x8) AsInt8x64() (to Int8x64)
  8636  
  8637  // Int16x32 converts from Uint64x8 to Int16x32
  8638  func (from Uint64x8) AsInt16x32() (to Int16x32)
  8639  
  8640  // Int32x16 converts from Uint64x8 to Int32x16
  8641  func (from Uint64x8) AsInt32x16() (to Int32x16)
  8642  
  8643  // Int64x8 converts from Uint64x8 to Int64x8
  8644  func (from Uint64x8) AsInt64x8() (to Int64x8)
  8645  
  8646  // Uint8x64 converts from Uint64x8 to Uint8x64
  8647  func (from Uint64x8) AsUint8x64() (to Uint8x64)
  8648  
  8649  // Uint16x32 converts from Uint64x8 to Uint16x32
  8650  func (from Uint64x8) AsUint16x32() (to Uint16x32)
  8651  
  8652  // Uint32x16 converts from Uint64x8 to Uint32x16
  8653  func (from Uint64x8) AsUint32x16() (to Uint32x16)
  8654  
  8655  // ToInt8x16 converts from Mask8x16 to Int8x16
  8656  func (from Mask8x16) ToInt8x16() (to Int8x16)
  8657  
  8658  // asMask converts from Int8x16 to Mask8x16
  8659  func (from Int8x16) asMask() (to Mask8x16)
  8660  
  8661  func (x Mask8x16) And(y Mask8x16) Mask8x16
  8662  
  8663  func (x Mask8x16) Or(y Mask8x16) Mask8x16
  8664  
  8665  // ToInt8x32 converts from Mask8x32 to Int8x32
  8666  func (from Mask8x32) ToInt8x32() (to Int8x32)
  8667  
  8668  // asMask converts from Int8x32 to Mask8x32
  8669  func (from Int8x32) asMask() (to Mask8x32)
  8670  
  8671  func (x Mask8x32) And(y Mask8x32) Mask8x32
  8672  
  8673  func (x Mask8x32) Or(y Mask8x32) Mask8x32
  8674  
  8675  // ToInt8x64 converts from Mask8x64 to Int8x64
  8676  func (from Mask8x64) ToInt8x64() (to Int8x64)
  8677  
  8678  // asMask converts from Int8x64 to Mask8x64
  8679  func (from Int8x64) asMask() (to Mask8x64)
  8680  
  8681  func (x Mask8x64) And(y Mask8x64) Mask8x64
  8682  
  8683  func (x Mask8x64) Or(y Mask8x64) Mask8x64
  8684  
  8685  // ToInt16x8 converts from Mask16x8 to Int16x8
  8686  func (from Mask16x8) ToInt16x8() (to Int16x8)
  8687  
  8688  // asMask converts from Int16x8 to Mask16x8
  8689  func (from Int16x8) asMask() (to Mask16x8)
  8690  
  8691  func (x Mask16x8) And(y Mask16x8) Mask16x8
  8692  
  8693  func (x Mask16x8) Or(y Mask16x8) Mask16x8
  8694  
  8695  // ToInt16x16 converts from Mask16x16 to Int16x16
  8696  func (from Mask16x16) ToInt16x16() (to Int16x16)
  8697  
  8698  // asMask converts from Int16x16 to Mask16x16
  8699  func (from Int16x16) asMask() (to Mask16x16)
  8700  
  8701  func (x Mask16x16) And(y Mask16x16) Mask16x16
  8702  
  8703  func (x Mask16x16) Or(y Mask16x16) Mask16x16
  8704  
  8705  // ToInt16x32 converts from Mask16x32 to Int16x32
  8706  func (from Mask16x32) ToInt16x32() (to Int16x32)
  8707  
  8708  // asMask converts from Int16x32 to Mask16x32
  8709  func (from Int16x32) asMask() (to Mask16x32)
  8710  
  8711  func (x Mask16x32) And(y Mask16x32) Mask16x32
  8712  
  8713  func (x Mask16x32) Or(y Mask16x32) Mask16x32
  8714  
  8715  // ToInt32x4 converts from Mask32x4 to Int32x4
  8716  func (from Mask32x4) ToInt32x4() (to Int32x4)
  8717  
  8718  // asMask converts from Int32x4 to Mask32x4
  8719  func (from Int32x4) asMask() (to Mask32x4)
  8720  
  8721  func (x Mask32x4) And(y Mask32x4) Mask32x4
  8722  
  8723  func (x Mask32x4) Or(y Mask32x4) Mask32x4
  8724  
  8725  // ToInt32x8 converts from Mask32x8 to Int32x8
  8726  func (from Mask32x8) ToInt32x8() (to Int32x8)
  8727  
  8728  // asMask converts from Int32x8 to Mask32x8
  8729  func (from Int32x8) asMask() (to Mask32x8)
  8730  
  8731  func (x Mask32x8) And(y Mask32x8) Mask32x8
  8732  
  8733  func (x Mask32x8) Or(y Mask32x8) Mask32x8
  8734  
  8735  // ToInt32x16 converts from Mask32x16 to Int32x16
  8736  func (from Mask32x16) ToInt32x16() (to Int32x16)
  8737  
  8738  // asMask converts from Int32x16 to Mask32x16
  8739  func (from Int32x16) asMask() (to Mask32x16)
  8740  
  8741  func (x Mask32x16) And(y Mask32x16) Mask32x16
  8742  
  8743  func (x Mask32x16) Or(y Mask32x16) Mask32x16
  8744  
  8745  // ToInt64x2 converts from Mask64x2 to Int64x2
  8746  func (from Mask64x2) ToInt64x2() (to Int64x2)
  8747  
  8748  // asMask converts from Int64x2 to Mask64x2
  8749  func (from Int64x2) asMask() (to Mask64x2)
  8750  
  8751  func (x Mask64x2) And(y Mask64x2) Mask64x2
  8752  
  8753  func (x Mask64x2) Or(y Mask64x2) Mask64x2
  8754  
  8755  // ToInt64x4 converts from Mask64x4 to Int64x4
  8756  func (from Mask64x4) ToInt64x4() (to Int64x4)
  8757  
  8758  // asMask converts from Int64x4 to Mask64x4
  8759  func (from Int64x4) asMask() (to Mask64x4)
  8760  
  8761  func (x Mask64x4) And(y Mask64x4) Mask64x4
  8762  
  8763  func (x Mask64x4) Or(y Mask64x4) Mask64x4
  8764  
  8765  // ToInt64x8 converts from Mask64x8 to Int64x8
  8766  func (from Mask64x8) ToInt64x8() (to Int64x8)
  8767  
  8768  // asMask converts from Int64x8 to Mask64x8
  8769  func (from Int64x8) asMask() (to Mask64x8)
  8770  
  8771  func (x Mask64x8) And(y Mask64x8) Mask64x8
  8772  
  8773  func (x Mask64x8) Or(y Mask64x8) Mask64x8
  8774  

View as plain text