Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   203  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   204  					pendingLines.remove(vl)
   205  					v.Pos = v.Pos.WithIsStmt()
   206  				}
   207  			}
   208  			if i != j {
   209  				b.Values[j] = v
   210  			}
   211  			j++
   212  		}
   213  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   214  			b.Pos = b.Pos.WithIsStmt()
   215  			pendingLines.remove(b.Pos)
   216  		}
   217  		b.truncateValues(j)
   218  	}
   219  }
   220  
   221  // Common functions called from rewriting rules
   222  
   223  func is64BitFloat(t *types.Type) bool {
   224  	return t.Size() == 8 && t.IsFloat()
   225  }
   226  
   227  func is32BitFloat(t *types.Type) bool {
   228  	return t.Size() == 4 && t.IsFloat()
   229  }
   230  
   231  func is64BitInt(t *types.Type) bool {
   232  	return t.Size() == 8 && t.IsInteger()
   233  }
   234  
   235  func is32BitInt(t *types.Type) bool {
   236  	return t.Size() == 4 && t.IsInteger()
   237  }
   238  
   239  func is16BitInt(t *types.Type) bool {
   240  	return t.Size() == 2 && t.IsInteger()
   241  }
   242  
   243  func is8BitInt(t *types.Type) bool {
   244  	return t.Size() == 1 && t.IsInteger()
   245  }
   246  
   247  func isPtr(t *types.Type) bool {
   248  	return t.IsPtrShaped()
   249  }
   250  
   251  func copyCompatibleType(t1, t2 *types.Type) bool {
   252  	if t1.Size() != t2.Size() {
   253  		return false
   254  	}
   255  	if t1.IsInteger() {
   256  		return t2.IsInteger()
   257  	}
   258  	if isPtr(t1) {
   259  		return isPtr(t2)
   260  	}
   261  	return t1.Compare(t2) == types.CMPeq
   262  }
   263  
   264  // mergeSym merges two symbolic offsets. There is no real merging of
   265  // offsets, we just pick the non-nil one.
   266  func mergeSym(x, y Sym) Sym {
   267  	if x == nil {
   268  		return y
   269  	}
   270  	if y == nil {
   271  		return x
   272  	}
   273  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   274  }
   275  
   276  func canMergeSym(x, y Sym) bool {
   277  	return x == nil || y == nil
   278  }
   279  
   280  // canMergeLoadClobber reports whether the load can be merged into target without
   281  // invalidating the schedule.
   282  // It also checks that the other non-load argument x is something we
   283  // are ok with clobbering.
   284  func canMergeLoadClobber(target, load, x *Value) bool {
   285  	// The register containing x is going to get clobbered.
   286  	// Don't merge if we still need the value of x.
   287  	// We don't have liveness information here, but we can
   288  	// approximate x dying with:
   289  	//  1) target is x's only use.
   290  	//  2) target is not in a deeper loop than x.
   291  	switch {
   292  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   293  		// This is a simple detector to determine that x is probably
   294  		// not live after target. (It does not need to be perfect,
   295  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   296  		// We have:
   297  		//   x = Phi(?, target)
   298  		//   target = Op(load, x)
   299  		// Because target has only one use as a Phi argument, we can schedule it
   300  		// very late. Hopefully, later than the other use of x. (The other use died
   301  		// between x and target, or exists on another branch entirely).
   302  	case x.Uses > 1:
   303  		return false
   304  	}
   305  	loopnest := x.Block.Func.loopnest()
   306  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   307  		return false
   308  	}
   309  	return canMergeLoad(target, load)
   310  }
   311  
   312  // canMergeLoad reports whether the load can be merged into target without
   313  // invalidating the schedule.
   314  func canMergeLoad(target, load *Value) bool {
   315  	if target.Block.ID != load.Block.ID {
   316  		// If the load is in a different block do not merge it.
   317  		return false
   318  	}
   319  
   320  	// We can't merge the load into the target if the load
   321  	// has more than one use.
   322  	if load.Uses != 1 {
   323  		return false
   324  	}
   325  
   326  	mem := load.MemoryArg()
   327  
   328  	// We need the load's memory arg to still be alive at target. That
   329  	// can't be the case if one of target's args depends on a memory
   330  	// state that is a successor of load's memory arg.
   331  	//
   332  	// For example, it would be invalid to merge load into target in
   333  	// the following situation because newmem has killed oldmem
   334  	// before target is reached:
   335  	//     load = read ... oldmem
   336  	//   newmem = write ... oldmem
   337  	//     arg0 = read ... newmem
   338  	//   target = add arg0 load
   339  	//
   340  	// If the argument comes from a different block then we can exclude
   341  	// it immediately because it must dominate load (which is in the
   342  	// same block as target).
   343  	var args []*Value
   344  	for _, a := range target.Args {
   345  		if a != load && a.Block.ID == target.Block.ID {
   346  			args = append(args, a)
   347  		}
   348  	}
   349  
   350  	// memPreds contains memory states known to be predecessors of load's
   351  	// memory state. It is lazily initialized.
   352  	var memPreds map[*Value]bool
   353  	for i := 0; len(args) > 0; i++ {
   354  		const limit = 100
   355  		if i >= limit {
   356  			// Give up if we have done a lot of iterations.
   357  			return false
   358  		}
   359  		v := args[len(args)-1]
   360  		args = args[:len(args)-1]
   361  		if target.Block.ID != v.Block.ID {
   362  			// Since target and load are in the same block
   363  			// we can stop searching when we leave the block.
   364  			continue
   365  		}
   366  		if v.Op == OpPhi {
   367  			// A Phi implies we have reached the top of the block.
   368  			// The memory phi, if it exists, is always
   369  			// the first logical store in the block.
   370  			continue
   371  		}
   372  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   373  			// We could handle this situation however it is likely
   374  			// to be very rare.
   375  			return false
   376  		}
   377  		if v.Op.SymEffect()&SymAddr != 0 {
   378  			// This case prevents an operation that calculates the
   379  			// address of a local variable from being forced to schedule
   380  			// before its corresponding VarDef.
   381  			// See issue 28445.
   382  			//   v1 = LOAD ...
   383  			//   v2 = VARDEF
   384  			//   v3 = LEAQ
   385  			//   v4 = CMPQ v1 v3
   386  			// We don't want to combine the CMPQ with the load, because
   387  			// that would force the CMPQ to schedule before the VARDEF, which
   388  			// in turn requires the LEAQ to schedule before the VARDEF.
   389  			return false
   390  		}
   391  		if v.Type.IsMemory() {
   392  			if memPreds == nil {
   393  				// Initialise a map containing memory states
   394  				// known to be predecessors of load's memory
   395  				// state.
   396  				memPreds = make(map[*Value]bool)
   397  				m := mem
   398  				const limit = 50
   399  				for i := 0; i < limit; i++ {
   400  					if m.Op == OpPhi {
   401  						// The memory phi, if it exists, is always
   402  						// the first logical store in the block.
   403  						break
   404  					}
   405  					if m.Block.ID != target.Block.ID {
   406  						break
   407  					}
   408  					if !m.Type.IsMemory() {
   409  						break
   410  					}
   411  					memPreds[m] = true
   412  					if len(m.Args) == 0 {
   413  						break
   414  					}
   415  					m = m.MemoryArg()
   416  				}
   417  			}
   418  
   419  			// We can merge if v is a predecessor of mem.
   420  			//
   421  			// For example, we can merge load into target in the
   422  			// following scenario:
   423  			//      x = read ... v
   424  			//    mem = write ... v
   425  			//   load = read ... mem
   426  			// target = add x load
   427  			if memPreds[v] {
   428  				continue
   429  			}
   430  			return false
   431  		}
   432  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   433  			// If v takes mem as an input then we know mem
   434  			// is valid at this point.
   435  			continue
   436  		}
   437  		for _, a := range v.Args {
   438  			if target.Block.ID == a.Block.ID {
   439  				args = append(args, a)
   440  			}
   441  		}
   442  	}
   443  
   444  	return true
   445  }
   446  
   447  // isSameCall reports whether aux is the same as the given named symbol.
   448  func isSameCall(aux Aux, name string) bool {
   449  	fn := aux.(*AuxCall).Fn
   450  	return fn != nil && fn.String() == name
   451  }
   452  
   453  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   454  func canLoadUnaligned(c *Config) bool {
   455  	return c.ctxt.Arch.Alignment == 1
   456  }
   457  
   458  // nlzX returns the number of leading zeros.
   459  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   460  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   461  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   462  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   463  
   464  // ntzX returns the number of trailing zeros.
   465  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   466  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   467  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   468  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   469  
   470  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   471  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   472  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   473  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   474  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   475  
   476  // nto returns the number of trailing ones.
   477  func nto(x int64) int64 {
   478  	return int64(ntz64(^x))
   479  }
   480  
   481  // logX returns logarithm of n base 2.
   482  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   483  func log8(n int8) int64 {
   484  	return int64(bits.Len8(uint8(n))) - 1
   485  }
   486  func log16(n int16) int64 {
   487  	return int64(bits.Len16(uint16(n))) - 1
   488  }
   489  func log32(n int32) int64 {
   490  	return int64(bits.Len32(uint32(n))) - 1
   491  }
   492  func log64(n int64) int64 {
   493  	return int64(bits.Len64(uint64(n))) - 1
   494  }
   495  
   496  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   497  // Rounds down.
   498  func log2uint32(n int64) int64 {
   499  	return int64(bits.Len32(uint32(n))) - 1
   500  }
   501  
   502  // isPowerOfTwoX functions report whether n is a power of 2.
   503  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   504  	return n > 0 && n&(n-1) == 0
   505  }
   506  
   507  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   508  func isUint64PowerOfTwo(in int64) bool {
   509  	n := uint64(in)
   510  	return n > 0 && n&(n-1) == 0
   511  }
   512  
   513  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   514  func isUint32PowerOfTwo(in int64) bool {
   515  	n := uint64(uint32(in))
   516  	return n > 0 && n&(n-1) == 0
   517  }
   518  
   519  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   520  func is32Bit(n int64) bool {
   521  	return n == int64(int32(n))
   522  }
   523  
   524  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   525  func is16Bit(n int64) bool {
   526  	return n == int64(int16(n))
   527  }
   528  
   529  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   530  func is8Bit(n int64) bool {
   531  	return n == int64(int8(n))
   532  }
   533  
   534  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   535  func isU8Bit(n int64) bool {
   536  	return n == int64(uint8(n))
   537  }
   538  
   539  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   540  func is12Bit(n int64) bool {
   541  	return -(1<<11) <= n && n < (1<<11)
   542  }
   543  
   544  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   545  func isU12Bit(n int64) bool {
   546  	return 0 <= n && n < (1<<12)
   547  }
   548  
   549  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   550  func isU16Bit(n int64) bool {
   551  	return n == int64(uint16(n))
   552  }
   553  
   554  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   555  func isU32Bit(n int64) bool {
   556  	return n == int64(uint32(n))
   557  }
   558  
   559  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   560  func is20Bit(n int64) bool {
   561  	return -(1<<19) <= n && n < (1<<19)
   562  }
   563  
   564  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   565  func b2i(b bool) int64 {
   566  	if b {
   567  		return 1
   568  	}
   569  	return 0
   570  }
   571  
   572  // b2i32 translates a boolean value to 0 or 1.
   573  func b2i32(b bool) int32 {
   574  	if b {
   575  		return 1
   576  	}
   577  	return 0
   578  }
   579  
   580  func canMulStrengthReduce(config *Config, x int64) bool {
   581  	_, ok := config.mulRecipes[x]
   582  	return ok
   583  }
   584  func canMulStrengthReduce32(config *Config, x int32) bool {
   585  	_, ok := config.mulRecipes[int64(x)]
   586  	return ok
   587  }
   588  
   589  // mulStrengthReduce returns v*x evaluated at the location
   590  // (block and source position) of m.
   591  // canMulStrengthReduce must have returned true.
   592  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   593  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   594  }
   595  
   596  // mulStrengthReduce32 returns v*x evaluated at the location
   597  // (block and source position) of m.
   598  // canMulStrengthReduce32 must have returned true.
   599  // The upper 32 bits of m might be set to junk.
   600  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   601  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   602  }
   603  
   604  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   605  // A shift is bounded if it is shifting by less than the width of the shifted value.
   606  func shiftIsBounded(v *Value) bool {
   607  	return v.AuxInt != 0
   608  }
   609  
   610  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   611  // generated code as much as possible.
   612  func canonLessThan(x, y *Value) bool {
   613  	if x.Op != y.Op {
   614  		return x.Op < y.Op
   615  	}
   616  	if !x.Pos.SameFileAndLine(y.Pos) {
   617  		return x.Pos.Before(y.Pos)
   618  	}
   619  	return x.ID < y.ID
   620  }
   621  
   622  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   623  // of the mantissa. It will panic if the truncation results in lost information.
   624  func truncate64Fto32F(f float64) float32 {
   625  	if !isExactFloat32(f) {
   626  		panic("truncate64Fto32F: truncation is not exact")
   627  	}
   628  	if !math.IsNaN(f) {
   629  		return float32(f)
   630  	}
   631  	// NaN bit patterns aren't necessarily preserved across conversion
   632  	// instructions so we need to do the conversion manually.
   633  	b := math.Float64bits(f)
   634  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   635  	//          | sign                  | exponent   | mantissa       |
   636  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   637  	return math.Float32frombits(r)
   638  }
   639  
   640  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   641  // pattern of the mantissa.
   642  func extend32Fto64F(f float32) float64 {
   643  	if !math.IsNaN(float64(f)) {
   644  		return float64(f)
   645  	}
   646  	// NaN bit patterns aren't necessarily preserved across conversion
   647  	// instructions so we need to do the conversion manually.
   648  	b := uint64(math.Float32bits(f))
   649  	//   | sign                  | exponent      | mantissa                    |
   650  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   651  	return math.Float64frombits(r)
   652  }
   653  
   654  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   655  func DivisionNeedsFixUp(v *Value) bool {
   656  	return v.AuxInt == 0
   657  }
   658  
   659  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   660  func auxFrom64F(f float64) int64 {
   661  	if f != f {
   662  		panic("can't encode a NaN in AuxInt field")
   663  	}
   664  	return int64(math.Float64bits(f))
   665  }
   666  
   667  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   668  func auxFrom32F(f float32) int64 {
   669  	if f != f {
   670  		panic("can't encode a NaN in AuxInt field")
   671  	}
   672  	return int64(math.Float64bits(extend32Fto64F(f)))
   673  }
   674  
   675  // auxTo32F decodes a float32 from the AuxInt value provided.
   676  func auxTo32F(i int64) float32 {
   677  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   678  }
   679  
   680  // auxTo64F decodes a float64 from the AuxInt value provided.
   681  func auxTo64F(i int64) float64 {
   682  	return math.Float64frombits(uint64(i))
   683  }
   684  
   685  func auxIntToBool(i int64) bool {
   686  	if i == 0 {
   687  		return false
   688  	}
   689  	return true
   690  }
   691  func auxIntToInt8(i int64) int8 {
   692  	return int8(i)
   693  }
   694  func auxIntToInt16(i int64) int16 {
   695  	return int16(i)
   696  }
   697  func auxIntToInt32(i int64) int32 {
   698  	return int32(i)
   699  }
   700  func auxIntToInt64(i int64) int64 {
   701  	return i
   702  }
   703  func auxIntToUint8(i int64) uint8 {
   704  	return uint8(i)
   705  }
   706  func auxIntToFloat32(i int64) float32 {
   707  	return float32(math.Float64frombits(uint64(i)))
   708  }
   709  func auxIntToFloat64(i int64) float64 {
   710  	return math.Float64frombits(uint64(i))
   711  }
   712  func auxIntToValAndOff(i int64) ValAndOff {
   713  	return ValAndOff(i)
   714  }
   715  func auxIntToArm64BitField(i int64) arm64BitField {
   716  	return arm64BitField(i)
   717  }
   718  func auxIntToInt128(x int64) int128 {
   719  	if x != 0 {
   720  		panic("nonzero int128 not allowed")
   721  	}
   722  	return 0
   723  }
   724  func auxIntToFlagConstant(x int64) flagConstant {
   725  	return flagConstant(x)
   726  }
   727  
   728  func auxIntToOp(cc int64) Op {
   729  	return Op(cc)
   730  }
   731  
   732  func boolToAuxInt(b bool) int64 {
   733  	if b {
   734  		return 1
   735  	}
   736  	return 0
   737  }
   738  func int8ToAuxInt(i int8) int64 {
   739  	return int64(i)
   740  }
   741  func int16ToAuxInt(i int16) int64 {
   742  	return int64(i)
   743  }
   744  func int32ToAuxInt(i int32) int64 {
   745  	return int64(i)
   746  }
   747  func int64ToAuxInt(i int64) int64 {
   748  	return int64(i)
   749  }
   750  func uint8ToAuxInt(i uint8) int64 {
   751  	return int64(int8(i))
   752  }
   753  func float32ToAuxInt(f float32) int64 {
   754  	return int64(math.Float64bits(float64(f)))
   755  }
   756  func float64ToAuxInt(f float64) int64 {
   757  	return int64(math.Float64bits(f))
   758  }
   759  func valAndOffToAuxInt(v ValAndOff) int64 {
   760  	return int64(v)
   761  }
   762  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   763  	return int64(v)
   764  }
   765  func int128ToAuxInt(x int128) int64 {
   766  	if x != 0 {
   767  		panic("nonzero int128 not allowed")
   768  	}
   769  	return 0
   770  }
   771  func flagConstantToAuxInt(x flagConstant) int64 {
   772  	return int64(x)
   773  }
   774  
   775  func opToAuxInt(o Op) int64 {
   776  	return int64(o)
   777  }
   778  
   779  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   780  type Aux interface {
   781  	CanBeAnSSAAux()
   782  }
   783  
   784  // for now only used to mark moves that need to avoid clobbering flags
   785  type auxMark bool
   786  
   787  func (auxMark) CanBeAnSSAAux() {}
   788  
   789  var AuxMark auxMark
   790  
   791  // stringAux wraps string values for use in Aux.
   792  type stringAux string
   793  
   794  func (stringAux) CanBeAnSSAAux() {}
   795  
   796  func auxToString(i Aux) string {
   797  	return string(i.(stringAux))
   798  }
   799  func auxToSym(i Aux) Sym {
   800  	// TODO: kind of a hack - allows nil interface through
   801  	s, _ := i.(Sym)
   802  	return s
   803  }
   804  func auxToType(i Aux) *types.Type {
   805  	return i.(*types.Type)
   806  }
   807  func auxToCall(i Aux) *AuxCall {
   808  	return i.(*AuxCall)
   809  }
   810  func auxToS390xCCMask(i Aux) s390x.CCMask {
   811  	return i.(s390x.CCMask)
   812  }
   813  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   814  	return i.(s390x.RotateParams)
   815  }
   816  
   817  func StringToAux(s string) Aux {
   818  	return stringAux(s)
   819  }
   820  func symToAux(s Sym) Aux {
   821  	return s
   822  }
   823  func callToAux(s *AuxCall) Aux {
   824  	return s
   825  }
   826  func typeToAux(t *types.Type) Aux {
   827  	return t
   828  }
   829  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   830  	return c
   831  }
   832  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   833  	return r
   834  }
   835  
   836  // uaddOvf reports whether unsigned a+b would overflow.
   837  func uaddOvf(a, b int64) bool {
   838  	return uint64(a)+uint64(b) < uint64(a)
   839  }
   840  
   841  // loadLSymOffset simulates reading a word at an offset into a
   842  // read-only symbol's runtime memory. If it would read a pointer to
   843  // another symbol, that symbol is returned. Otherwise, it returns nil.
   844  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   845  	if lsym.Type != objabi.SRODATA {
   846  		return nil
   847  	}
   848  
   849  	for _, r := range lsym.R {
   850  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   851  			return r.Sym
   852  		}
   853  	}
   854  
   855  	return nil
   856  }
   857  
   858  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   859  	v.Op = OpStaticLECall
   860  	auxcall := v.Aux.(*AuxCall)
   861  	auxcall.Fn = sym
   862  	// Remove first arg
   863  	v.Args[0].Uses--
   864  	copy(v.Args[0:], v.Args[1:])
   865  	v.Args[len(v.Args)-1] = nil // aid GC
   866  	v.Args = v.Args[:len(v.Args)-1]
   867  	if f := v.Block.Func; f.pass.debug > 0 {
   868  		f.Warnl(v.Pos, "de-virtualizing call")
   869  	}
   870  	return v
   871  }
   872  
   873  // isSamePtr reports whether p1 and p2 point to the same address.
   874  func isSamePtr(p1, p2 *Value) bool {
   875  	if p1 == p2 {
   876  		return true
   877  	}
   878  	if p1.Op != p2.Op {
   879  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   880  			p1 = p1.Args[0]
   881  		}
   882  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   883  			p2 = p2.Args[0]
   884  		}
   885  		if p1 == p2 {
   886  			return true
   887  		}
   888  		if p1.Op != p2.Op {
   889  			return false
   890  		}
   891  	}
   892  	switch p1.Op {
   893  	case OpOffPtr:
   894  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   895  	case OpAddr, OpLocalAddr:
   896  		return p1.Aux == p2.Aux
   897  	case OpAddPtr:
   898  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   899  	}
   900  	return false
   901  }
   902  
   903  func isStackPtr(v *Value) bool {
   904  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   905  		v = v.Args[0]
   906  	}
   907  	return v.Op == OpSP || v.Op == OpLocalAddr
   908  }
   909  
   910  // disjoint reports whether the memory region specified by [p1:p1+n1)
   911  // does not overlap with [p2:p2+n2).
   912  // A return value of false does not imply the regions overlap.
   913  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   914  	if n1 == 0 || n2 == 0 {
   915  		return true
   916  	}
   917  	if p1 == p2 {
   918  		return false
   919  	}
   920  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   921  		base, offset = ptr, 0
   922  		for base.Op == OpOffPtr {
   923  			offset += base.AuxInt
   924  			base = base.Args[0]
   925  		}
   926  		if opcodeTable[base.Op].nilCheck {
   927  			base = base.Args[0]
   928  		}
   929  		return base, offset
   930  	}
   931  
   932  	// Run types-based analysis
   933  	if disjointTypes(p1.Type, p2.Type) {
   934  		return true
   935  	}
   936  
   937  	p1, off1 := baseAndOffset(p1)
   938  	p2, off2 := baseAndOffset(p2)
   939  	if isSamePtr(p1, p2) {
   940  		return !overlap(off1, n1, off2, n2)
   941  	}
   942  	// p1 and p2 are not the same, so if they are both OpAddrs then
   943  	// they point to different variables.
   944  	// If one pointer is on the stack and the other is an argument
   945  	// then they can't overlap.
   946  	switch p1.Op {
   947  	case OpAddr, OpLocalAddr:
   948  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   949  			return true
   950  		}
   951  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   952  	case OpArg, OpArgIntReg:
   953  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   954  			return true
   955  		}
   956  	case OpSP:
   957  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   958  	}
   959  	return false
   960  }
   961  
   962  // disjointTypes reports whether a memory region pointed to by a pointer of type
   963  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   964  // based on type aliasing rules.
   965  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   966  	// Unsafe pointer can alias with anything.
   967  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   968  		return false
   969  	}
   970  
   971  	if !t1.IsPtr() || !t2.IsPtr() {
   972  		panic("disjointTypes: one of arguments is not a pointer")
   973  	}
   974  
   975  	t1 = t1.Elem()
   976  	t2 = t2.Elem()
   977  
   978  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   979  	// type.HasPointers check doesn't work for them correctly.
   980  	if t1.NotInHeap() || t2.NotInHeap() {
   981  		return false
   982  	}
   983  
   984  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   985  
   986  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   987  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   988  		(isPtrShaped(t2) && !t1.HasPointers()) {
   989  		return true
   990  	}
   991  
   992  	return false
   993  }
   994  
   995  // moveSize returns the number of bytes an aligned MOV instruction moves.
   996  func moveSize(align int64, c *Config) int64 {
   997  	switch {
   998  	case align%8 == 0 && c.PtrSize == 8:
   999  		return 8
  1000  	case align%4 == 0:
  1001  		return 4
  1002  	case align%2 == 0:
  1003  		return 2
  1004  	}
  1005  	return 1
  1006  }
  1007  
  1008  // mergePoint finds a block among a's blocks which dominates b and is itself
  1009  // dominated by all of a's blocks. Returns nil if it can't find one.
  1010  // Might return nil even if one does exist.
  1011  func mergePoint(b *Block, a ...*Value) *Block {
  1012  	// Walk backward from b looking for one of the a's blocks.
  1013  
  1014  	// Max distance
  1015  	d := 100
  1016  
  1017  	for d > 0 {
  1018  		for _, x := range a {
  1019  			if b == x.Block {
  1020  				goto found
  1021  			}
  1022  		}
  1023  		if len(b.Preds) > 1 {
  1024  			// Don't know which way to go back. Abort.
  1025  			return nil
  1026  		}
  1027  		b = b.Preds[0].b
  1028  		d--
  1029  	}
  1030  	return nil // too far away
  1031  found:
  1032  	// At this point, r is the first value in a that we find by walking backwards.
  1033  	// if we return anything, r will be it.
  1034  	r := b
  1035  
  1036  	// Keep going, counting the other a's that we find. They must all dominate r.
  1037  	na := 0
  1038  	for d > 0 {
  1039  		for _, x := range a {
  1040  			if b == x.Block {
  1041  				na++
  1042  			}
  1043  		}
  1044  		if na == len(a) {
  1045  			// Found all of a in a backwards walk. We can return r.
  1046  			return r
  1047  		}
  1048  		if len(b.Preds) > 1 {
  1049  			return nil
  1050  		}
  1051  		b = b.Preds[0].b
  1052  		d--
  1053  
  1054  	}
  1055  	return nil // too far away
  1056  }
  1057  
  1058  // clobber invalidates values. Returns true.
  1059  // clobber is used by rewrite rules to:
  1060  //
  1061  //	A) make sure the values are really dead and never used again.
  1062  //	B) decrement use counts of the values' args.
  1063  func clobber(vv ...*Value) bool {
  1064  	for _, v := range vv {
  1065  		v.reset(OpInvalid)
  1066  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1067  	}
  1068  	return true
  1069  }
  1070  
  1071  // resetCopy resets v to be a copy of arg.
  1072  // Always returns true.
  1073  func resetCopy(v *Value, arg *Value) bool {
  1074  	v.reset(OpCopy)
  1075  	v.AddArg(arg)
  1076  	return true
  1077  }
  1078  
  1079  // clobberIfDead resets v when use count is 1. Returns true.
  1080  // clobberIfDead is used by rewrite rules to decrement
  1081  // use counts of v's args when v is dead and never used.
  1082  func clobberIfDead(v *Value) bool {
  1083  	if v.Uses == 1 {
  1084  		v.reset(OpInvalid)
  1085  	}
  1086  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1087  	return true
  1088  }
  1089  
  1090  // noteRule is an easy way to track if a rule is matched when writing
  1091  // new ones.  Make the rule of interest also conditional on
  1092  //
  1093  //	noteRule("note to self: rule of interest matched")
  1094  //
  1095  // and that message will print when the rule matches.
  1096  func noteRule(s string) bool {
  1097  	fmt.Println(s)
  1098  	return true
  1099  }
  1100  
  1101  // countRule increments Func.ruleMatches[key].
  1102  // If Func.ruleMatches is non-nil at the end
  1103  // of compilation, it will be printed to stdout.
  1104  // This is intended to make it easier to find which functions
  1105  // which contain lots of rules matches when developing new rules.
  1106  func countRule(v *Value, key string) bool {
  1107  	f := v.Block.Func
  1108  	if f.ruleMatches == nil {
  1109  		f.ruleMatches = make(map[string]int)
  1110  	}
  1111  	f.ruleMatches[key]++
  1112  	return true
  1113  }
  1114  
  1115  // warnRule generates compiler debug output with string s when
  1116  // v is not in autogenerated code, cond is true and the rule has fired.
  1117  func warnRule(cond bool, v *Value, s string) bool {
  1118  	if pos := v.Pos; pos.Line() > 1 && cond {
  1119  		v.Block.Func.Warnl(pos, s)
  1120  	}
  1121  	return true
  1122  }
  1123  
  1124  // for a pseudo-op like (LessThan x), extract x.
  1125  func flagArg(v *Value) *Value {
  1126  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1127  		return nil
  1128  	}
  1129  	return v.Args[0]
  1130  }
  1131  
  1132  // arm64Negate finds the complement to an ARM64 condition code,
  1133  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1134  //
  1135  // For floating point, it's more subtle because NaN is unordered. We do
  1136  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1137  func arm64Negate(op Op) Op {
  1138  	switch op {
  1139  	case OpARM64LessThan:
  1140  		return OpARM64GreaterEqual
  1141  	case OpARM64LessThanU:
  1142  		return OpARM64GreaterEqualU
  1143  	case OpARM64GreaterThan:
  1144  		return OpARM64LessEqual
  1145  	case OpARM64GreaterThanU:
  1146  		return OpARM64LessEqualU
  1147  	case OpARM64LessEqual:
  1148  		return OpARM64GreaterThan
  1149  	case OpARM64LessEqualU:
  1150  		return OpARM64GreaterThanU
  1151  	case OpARM64GreaterEqual:
  1152  		return OpARM64LessThan
  1153  	case OpARM64GreaterEqualU:
  1154  		return OpARM64LessThanU
  1155  	case OpARM64Equal:
  1156  		return OpARM64NotEqual
  1157  	case OpARM64NotEqual:
  1158  		return OpARM64Equal
  1159  	case OpARM64LessThanF:
  1160  		return OpARM64NotLessThanF
  1161  	case OpARM64NotLessThanF:
  1162  		return OpARM64LessThanF
  1163  	case OpARM64LessEqualF:
  1164  		return OpARM64NotLessEqualF
  1165  	case OpARM64NotLessEqualF:
  1166  		return OpARM64LessEqualF
  1167  	case OpARM64GreaterThanF:
  1168  		return OpARM64NotGreaterThanF
  1169  	case OpARM64NotGreaterThanF:
  1170  		return OpARM64GreaterThanF
  1171  	case OpARM64GreaterEqualF:
  1172  		return OpARM64NotGreaterEqualF
  1173  	case OpARM64NotGreaterEqualF:
  1174  		return OpARM64GreaterEqualF
  1175  	default:
  1176  		panic("unreachable")
  1177  	}
  1178  }
  1179  
  1180  // arm64Invert evaluates (InvertFlags op), which
  1181  // is the same as altering the condition codes such
  1182  // that the same result would be produced if the arguments
  1183  // to the flag-generating instruction were reversed, e.g.
  1184  // (InvertFlags (CMP x y)) -> (CMP y x)
  1185  func arm64Invert(op Op) Op {
  1186  	switch op {
  1187  	case OpARM64LessThan:
  1188  		return OpARM64GreaterThan
  1189  	case OpARM64LessThanU:
  1190  		return OpARM64GreaterThanU
  1191  	case OpARM64GreaterThan:
  1192  		return OpARM64LessThan
  1193  	case OpARM64GreaterThanU:
  1194  		return OpARM64LessThanU
  1195  	case OpARM64LessEqual:
  1196  		return OpARM64GreaterEqual
  1197  	case OpARM64LessEqualU:
  1198  		return OpARM64GreaterEqualU
  1199  	case OpARM64GreaterEqual:
  1200  		return OpARM64LessEqual
  1201  	case OpARM64GreaterEqualU:
  1202  		return OpARM64LessEqualU
  1203  	case OpARM64Equal, OpARM64NotEqual:
  1204  		return op
  1205  	case OpARM64LessThanF:
  1206  		return OpARM64GreaterThanF
  1207  	case OpARM64GreaterThanF:
  1208  		return OpARM64LessThanF
  1209  	case OpARM64LessEqualF:
  1210  		return OpARM64GreaterEqualF
  1211  	case OpARM64GreaterEqualF:
  1212  		return OpARM64LessEqualF
  1213  	case OpARM64NotLessThanF:
  1214  		return OpARM64NotGreaterThanF
  1215  	case OpARM64NotGreaterThanF:
  1216  		return OpARM64NotLessThanF
  1217  	case OpARM64NotLessEqualF:
  1218  		return OpARM64NotGreaterEqualF
  1219  	case OpARM64NotGreaterEqualF:
  1220  		return OpARM64NotLessEqualF
  1221  	default:
  1222  		panic("unreachable")
  1223  	}
  1224  }
  1225  
  1226  // evaluate an ARM64 op against a flags value
  1227  // that is potentially constant; return 1 for true,
  1228  // -1 for false, and 0 for not constant.
  1229  func ccARM64Eval(op Op, flags *Value) int {
  1230  	fop := flags.Op
  1231  	if fop == OpARM64InvertFlags {
  1232  		return -ccARM64Eval(op, flags.Args[0])
  1233  	}
  1234  	if fop != OpARM64FlagConstant {
  1235  		return 0
  1236  	}
  1237  	fc := flagConstant(flags.AuxInt)
  1238  	b2i := func(b bool) int {
  1239  		if b {
  1240  			return 1
  1241  		}
  1242  		return -1
  1243  	}
  1244  	switch op {
  1245  	case OpARM64Equal:
  1246  		return b2i(fc.eq())
  1247  	case OpARM64NotEqual:
  1248  		return b2i(fc.ne())
  1249  	case OpARM64LessThan:
  1250  		return b2i(fc.lt())
  1251  	case OpARM64LessThanU:
  1252  		return b2i(fc.ult())
  1253  	case OpARM64GreaterThan:
  1254  		return b2i(fc.gt())
  1255  	case OpARM64GreaterThanU:
  1256  		return b2i(fc.ugt())
  1257  	case OpARM64LessEqual:
  1258  		return b2i(fc.le())
  1259  	case OpARM64LessEqualU:
  1260  		return b2i(fc.ule())
  1261  	case OpARM64GreaterEqual:
  1262  		return b2i(fc.ge())
  1263  	case OpARM64GreaterEqualU:
  1264  		return b2i(fc.uge())
  1265  	}
  1266  	return 0
  1267  }
  1268  
  1269  // logRule logs the use of the rule s. This will only be enabled if
  1270  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1271  func logRule(s string) {
  1272  	if ruleFile == nil {
  1273  		// Open a log file to write log to. We open in append
  1274  		// mode because all.bash runs the compiler lots of times,
  1275  		// and we want the concatenation of all of those logs.
  1276  		// This means, of course, that users need to rm the old log
  1277  		// to get fresh data.
  1278  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1279  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1280  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1281  		if err != nil {
  1282  			panic(err)
  1283  		}
  1284  		ruleFile = w
  1285  	}
  1286  	_, err := fmt.Fprintln(ruleFile, s)
  1287  	if err != nil {
  1288  		panic(err)
  1289  	}
  1290  }
  1291  
  1292  var ruleFile io.Writer
  1293  
  1294  func isConstZero(v *Value) bool {
  1295  	switch v.Op {
  1296  	case OpConstNil:
  1297  		return true
  1298  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1299  		return v.AuxInt == 0
  1300  	case OpStringMake, OpIMake, OpComplexMake:
  1301  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1302  	case OpSliceMake:
  1303  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1304  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1305  		return isConstZero(v.Args[0])
  1306  	}
  1307  	return false
  1308  }
  1309  
  1310  // reciprocalExact64 reports whether 1/c is exactly representable.
  1311  func reciprocalExact64(c float64) bool {
  1312  	b := math.Float64bits(c)
  1313  	man := b & (1<<52 - 1)
  1314  	if man != 0 {
  1315  		return false // not a power of 2, denormal, or NaN
  1316  	}
  1317  	exp := b >> 52 & (1<<11 - 1)
  1318  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1319  	// changes the exponent to 0x7fe-exp.
  1320  	switch exp {
  1321  	case 0:
  1322  		return false // ±0
  1323  	case 0x7ff:
  1324  		return false // ±inf
  1325  	case 0x7fe:
  1326  		return false // exponent is not representable
  1327  	default:
  1328  		return true
  1329  	}
  1330  }
  1331  
  1332  // reciprocalExact32 reports whether 1/c is exactly representable.
  1333  func reciprocalExact32(c float32) bool {
  1334  	b := math.Float32bits(c)
  1335  	man := b & (1<<23 - 1)
  1336  	if man != 0 {
  1337  		return false // not a power of 2, denormal, or NaN
  1338  	}
  1339  	exp := b >> 23 & (1<<8 - 1)
  1340  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1341  	// changes the exponent to 0xfe-exp.
  1342  	switch exp {
  1343  	case 0:
  1344  		return false // ±0
  1345  	case 0xff:
  1346  		return false // ±inf
  1347  	case 0xfe:
  1348  		return false // exponent is not representable
  1349  	default:
  1350  		return true
  1351  	}
  1352  }
  1353  
  1354  // check if an immediate can be directly encoded into an ARM's instruction.
  1355  func isARMImmRot(v uint32) bool {
  1356  	for i := 0; i < 16; i++ {
  1357  		if v&^0xff == 0 {
  1358  			return true
  1359  		}
  1360  		v = v<<2 | v>>30
  1361  	}
  1362  
  1363  	return false
  1364  }
  1365  
  1366  // overlap reports whether the ranges given by the given offset and
  1367  // size pairs overlap.
  1368  func overlap(offset1, size1, offset2, size2 int64) bool {
  1369  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1370  		return true
  1371  	}
  1372  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1373  		return true
  1374  	}
  1375  	return false
  1376  }
  1377  
  1378  // check if value zeroes out upper 32-bit of 64-bit register.
  1379  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1380  // because it catches same amount of cases as 4.
  1381  func zeroUpper32Bits(x *Value, depth int) bool {
  1382  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1383  		// If the value is signed, it might get re-sign-extended
  1384  		// during spill and restore. See issue 68227.
  1385  		return false
  1386  	}
  1387  	switch x.Op {
  1388  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1389  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1390  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1391  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1392  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1393  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1394  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1395  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1396  		OpAMD64SHLL, OpAMD64SHLLconst:
  1397  		return true
  1398  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1399  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1400  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1401  		return true
  1402  	case OpArg: // note: but not ArgIntReg
  1403  		// amd64 always loads args from the stack unsigned.
  1404  		// most other architectures load them sign/zero extended based on the type.
  1405  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1406  	case OpPhi, OpSelect0, OpSelect1:
  1407  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1408  		// just limit recursion depth.
  1409  		if depth <= 0 {
  1410  			return false
  1411  		}
  1412  		for i := range x.Args {
  1413  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1414  				return false
  1415  			}
  1416  		}
  1417  		return true
  1418  
  1419  	}
  1420  	return false
  1421  }
  1422  
  1423  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1424  func zeroUpper48Bits(x *Value, depth int) bool {
  1425  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1426  		return false
  1427  	}
  1428  	switch x.Op {
  1429  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1430  		return true
  1431  	case OpArg: // note: but not ArgIntReg
  1432  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1433  	case OpPhi, OpSelect0, OpSelect1:
  1434  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1435  		// just limit recursion depth.
  1436  		if depth <= 0 {
  1437  			return false
  1438  		}
  1439  		for i := range x.Args {
  1440  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1441  				return false
  1442  			}
  1443  		}
  1444  		return true
  1445  
  1446  	}
  1447  	return false
  1448  }
  1449  
  1450  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1451  func zeroUpper56Bits(x *Value, depth int) bool {
  1452  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1453  		return false
  1454  	}
  1455  	switch x.Op {
  1456  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1457  		return true
  1458  	case OpArg: // note: but not ArgIntReg
  1459  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1460  	case OpPhi, OpSelect0, OpSelect1:
  1461  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1462  		// just limit recursion depth.
  1463  		if depth <= 0 {
  1464  			return false
  1465  		}
  1466  		for i := range x.Args {
  1467  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1468  				return false
  1469  			}
  1470  		}
  1471  		return true
  1472  
  1473  	}
  1474  	return false
  1475  }
  1476  
  1477  func isInlinableMemclr(c *Config, sz int64) bool {
  1478  	if sz < 0 {
  1479  		return false
  1480  	}
  1481  	// TODO: expand this check to allow other architectures
  1482  	// see CL 454255 and issue 56997
  1483  	switch c.arch {
  1484  	case "amd64", "arm64":
  1485  		return true
  1486  	case "ppc64le", "ppc64", "loong64":
  1487  		return sz < 512
  1488  	}
  1489  	return false
  1490  }
  1491  
  1492  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1493  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1494  // safe, either because Move will do all of its loads before any of its stores, or
  1495  // because the arguments are known to be disjoint.
  1496  // This is used as a check for replacing memmove with Move ops.
  1497  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1498  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1499  	// Move ops may or may not be faster for large sizes depending on how the platform
  1500  	// lowers them, so we only perform this optimization on platforms that we know to
  1501  	// have fast Move ops.
  1502  	switch c.arch {
  1503  	case "amd64":
  1504  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1505  	case "arm64":
  1506  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1507  	case "386":
  1508  		return sz <= 8
  1509  	case "s390x", "ppc64", "ppc64le":
  1510  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1511  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1512  		return sz <= 4
  1513  	}
  1514  	return false
  1515  }
  1516  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1517  	return isInlinableMemmove(dst, src, sz, c)
  1518  }
  1519  
  1520  // logLargeCopy logs the occurrence of a large copy.
  1521  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1522  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1523  func logLargeCopy(v *Value, s int64) bool {
  1524  	if s < 128 {
  1525  		return true
  1526  	}
  1527  	if logopt.Enabled() {
  1528  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1529  	}
  1530  	return true
  1531  }
  1532  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1533  	if s < 128 {
  1534  		return
  1535  	}
  1536  	if logopt.Enabled() {
  1537  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1538  	}
  1539  }
  1540  
  1541  // hasSmallRotate reports whether the architecture has rotate instructions
  1542  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1543  func hasSmallRotate(c *Config) bool {
  1544  	switch c.arch {
  1545  	case "amd64", "386":
  1546  		return true
  1547  	default:
  1548  		return false
  1549  	}
  1550  }
  1551  
  1552  func supportsPPC64PCRel() bool {
  1553  	// PCRel is currently supported for >= power10, linux only
  1554  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1555  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1556  }
  1557  
  1558  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1559  	if sh < 0 || sh >= sz {
  1560  		panic("PPC64 shift arg sh out of range")
  1561  	}
  1562  	if mb < 0 || mb >= sz {
  1563  		panic("PPC64 shift arg mb out of range")
  1564  	}
  1565  	if me < 0 || me >= sz {
  1566  		panic("PPC64 shift arg me out of range")
  1567  	}
  1568  	return int32(sh<<16 | mb<<8 | me)
  1569  }
  1570  
  1571  func GetPPC64Shiftsh(auxint int64) int64 {
  1572  	return int64(int8(auxint >> 16))
  1573  }
  1574  
  1575  func GetPPC64Shiftmb(auxint int64) int64 {
  1576  	return int64(int8(auxint >> 8))
  1577  }
  1578  
  1579  func GetPPC64Shiftme(auxint int64) int64 {
  1580  	return int64(int8(auxint))
  1581  }
  1582  
  1583  // Test if this value can encoded as a mask for a rlwinm like
  1584  // operation.  Masks can also extend from the msb and wrap to
  1585  // the lsb too.  That is, the valid masks are 32 bit strings
  1586  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1587  //
  1588  // Note: This ignores the upper 32 bits of the input. When a
  1589  // zero extended result is desired (e.g a 64 bit result), the
  1590  // user must verify the upper 32 bits are 0 and the mask is
  1591  // contiguous (that is, non-wrapping).
  1592  func isPPC64WordRotateMask(v64 int64) bool {
  1593  	// Isolate rightmost 1 (if none 0) and add.
  1594  	v := uint32(v64)
  1595  	vp := (v & -v) + v
  1596  	// Likewise, for the wrapping case.
  1597  	vn := ^v
  1598  	vpn := (vn & -vn) + vn
  1599  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1600  }
  1601  
  1602  // Test if this mask is a valid, contiguous bitmask which can be
  1603  // represented by a RLWNM mask and also clears the upper 32 bits
  1604  // of the register.
  1605  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1606  	// Isolate rightmost 1 (if none 0) and add.
  1607  	v := uint32(v64)
  1608  	vp := (v & -v) + v
  1609  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1610  }
  1611  
  1612  // Compress mask and shift into single value of the form
  1613  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1614  // be used to regenerate the input mask.
  1615  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1616  	var mb, me, mbn, men int
  1617  
  1618  	// Determine boundaries and then decode them
  1619  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1620  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1621  	} else if nbits == 32 {
  1622  		mb = bits.LeadingZeros32(uint32(mask))
  1623  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1624  		mbn = bits.LeadingZeros32(^uint32(mask))
  1625  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1626  	} else {
  1627  		mb = bits.LeadingZeros64(uint64(mask))
  1628  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1629  		mbn = bits.LeadingZeros64(^uint64(mask))
  1630  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1631  	}
  1632  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1633  	if mb == 0 && me == int(nbits) {
  1634  		// swap the inverted values
  1635  		mb, me = men, mbn
  1636  	}
  1637  
  1638  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1639  }
  1640  
  1641  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1642  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1643  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1644  // operations can be combined. This functions assumes the two opcodes can
  1645  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1646  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1647  	mb := s
  1648  	r := 64 - s
  1649  	// A larger mb is a smaller mask.
  1650  	if (encoded>>8)&0xFF < mb {
  1651  		encoded = (encoded &^ 0xFF00) | mb<<8
  1652  	}
  1653  	// The rotate is expected to be 0.
  1654  	if (encoded & 0xFF0000) != 0 {
  1655  		panic("non-zero rotate")
  1656  	}
  1657  	return encoded | r<<16
  1658  }
  1659  
  1660  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1661  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1662  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1663  	auxint := uint64(sauxint)
  1664  	rotate = int64((auxint >> 16) & 0xFF)
  1665  	mb = int64((auxint >> 8) & 0xFF)
  1666  	me = int64((auxint >> 0) & 0xFF)
  1667  	nbits := int64((auxint >> 24) & 0xFF)
  1668  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1669  	if mb > me {
  1670  		mask = ^mask
  1671  	}
  1672  	if nbits == 32 {
  1673  		mask = uint64(uint32(mask))
  1674  	}
  1675  
  1676  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1677  	// is inclusive.
  1678  	me = (me - 1) & (nbits - 1)
  1679  	return
  1680  }
  1681  
  1682  // This verifies that the mask is a set of
  1683  // consecutive bits including the least
  1684  // significant bit.
  1685  func isPPC64ValidShiftMask(v int64) bool {
  1686  	if (v != 0) && ((v+1)&v) == 0 {
  1687  		return true
  1688  	}
  1689  	return false
  1690  }
  1691  
  1692  func getPPC64ShiftMaskLength(v int64) int64 {
  1693  	return int64(bits.Len64(uint64(v)))
  1694  }
  1695  
  1696  // Decompose a shift right into an equivalent rotate/mask,
  1697  // and return mask & m.
  1698  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1699  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1700  	return m & int64(smask)
  1701  }
  1702  
  1703  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1704  func mergePPC64AndSrwi(m, s int64) int64 {
  1705  	mask := mergePPC64RShiftMask(m, s, 32)
  1706  	if !isPPC64WordRotateMask(mask) {
  1707  		return 0
  1708  	}
  1709  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1710  }
  1711  
  1712  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1713  func mergePPC64AndSrdi(m, s int64) int64 {
  1714  	mask := mergePPC64RShiftMask(m, s, 64)
  1715  
  1716  	// Verify the rotate and mask result only uses the lower 32 bits.
  1717  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1718  	if rv&uint64(mask) != 0 {
  1719  		return 0
  1720  	}
  1721  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1722  		return 0
  1723  	}
  1724  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1725  }
  1726  
  1727  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1728  func mergePPC64AndSldi(m, s int64) int64 {
  1729  	mask := -1 << s & m
  1730  
  1731  	// Verify the rotate and mask result only uses the lower 32 bits.
  1732  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1733  	if rv&uint64(mask) != 0 {
  1734  		return 0
  1735  	}
  1736  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1737  		return 0
  1738  	}
  1739  	return encodePPC64RotateMask(s&31, mask, 32)
  1740  }
  1741  
  1742  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1743  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1744  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1745  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1746  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1747  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1748  
  1749  	// Rewrite mask to apply after the final left shift.
  1750  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1751  
  1752  	r_1 := 32 - srw
  1753  	r_2 := GetPPC64Shiftsh(sld)
  1754  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1755  
  1756  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1757  		return 0
  1758  	}
  1759  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1760  }
  1761  
  1762  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1763  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1764  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1765  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1766  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1767  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1768  
  1769  	// Rewrite mask to apply after the final left shift.
  1770  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1771  
  1772  	r_1 := 64 - srd
  1773  	r_2 := GetPPC64Shiftsh(sld)
  1774  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1775  
  1776  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1777  		return 0
  1778  	}
  1779  	// This combine only works when selecting and shifting the lower 32 bits.
  1780  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1781  	if v1&mask_3 != 0 {
  1782  		return 0
  1783  	}
  1784  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1785  }
  1786  
  1787  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1788  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1789  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1790  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1791  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1792  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1793  
  1794  	// combine the masks, and adjust for the final left shift.
  1795  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1796  	r_2 := GetPPC64Shiftsh(int64(sld))
  1797  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1798  
  1799  	// Verify the result is still a valid bitmask of <= 32 bits.
  1800  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1801  		return 0
  1802  	}
  1803  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1804  }
  1805  
  1806  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1807  // or 0 if they cannot be merged.
  1808  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1809  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1810  	mask_out := (mask_rlw & uint64(mask))
  1811  
  1812  	// Verify the result is still a valid bitmask of <= 32 bits.
  1813  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1814  		return 0
  1815  	}
  1816  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1817  }
  1818  
  1819  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1820  // result. Return rlw if it does, 0 otherwise.
  1821  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1822  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1823  	if mb > me {
  1824  		return 0
  1825  	}
  1826  	return rlw
  1827  }
  1828  
  1829  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1830  // or 0 if they cannot be merged.
  1831  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1832  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1833  
  1834  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1835  	r_mask := bits.RotateLeft32(mask, int(r))
  1836  
  1837  	mask_out := (mask_rlw & uint64(r_mask))
  1838  
  1839  	// Verify the result is still a valid bitmask of <= 32 bits.
  1840  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1841  		return 0
  1842  	}
  1843  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1844  }
  1845  
  1846  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1847  // or 0 if they cannot be merged.
  1848  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1849  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1850  	if mb > me || mb < sldi {
  1851  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1852  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1853  		return 0
  1854  	}
  1855  	// combine the masks, and adjust for the final left shift.
  1856  	mask_3 := mask_1 << sldi
  1857  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1858  
  1859  	// Verify the result is still a valid bitmask of <= 32 bits.
  1860  	if uint64(uint32(mask_3)) != mask_3 {
  1861  		return 0
  1862  	}
  1863  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1864  }
  1865  
  1866  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1867  // or return 0 if they cannot be combined.
  1868  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1869  	if sld > srw || srw >= 32 {
  1870  		return 0
  1871  	}
  1872  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1873  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1874  	mask := (mask_r & mask_l) << uint(sld)
  1875  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1876  }
  1877  
  1878  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1879  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1880  // of op.
  1881  //
  1882  // E.g consider the case:
  1883  // a = (ADD x y)
  1884  // b = (CMPconst [0] a)
  1885  // c = (OR a z)
  1886  //
  1887  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1888  // would produce:
  1889  // a  = (ADD x y)
  1890  // a' = (ADDCC x y)
  1891  // a” = (Select0 a')
  1892  // b  = (CMPconst [0] a”)
  1893  // c  = (OR a z)
  1894  //
  1895  // which makes it impossible to rewrite the second user. Instead the result
  1896  // of this conversion is:
  1897  // a' = (ADDCC x y)
  1898  // a  = (Select0 a')
  1899  // b  = (CMPconst [0] a)
  1900  // c  = (OR a z)
  1901  //
  1902  // Which makes it trivial to rewrite b using a lowering rule.
  1903  func convertPPC64OpToOpCC(op *Value) *Value {
  1904  	ccOpMap := map[Op]Op{
  1905  		OpPPC64ADD:      OpPPC64ADDCC,
  1906  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1907  		OpPPC64AND:      OpPPC64ANDCC,
  1908  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1909  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1910  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1911  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1912  		OpPPC64NEG:      OpPPC64NEGCC,
  1913  		OpPPC64NOR:      OpPPC64NORCC,
  1914  		OpPPC64OR:       OpPPC64ORCC,
  1915  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1916  		OpPPC64SUB:      OpPPC64SUBCC,
  1917  		OpPPC64XOR:      OpPPC64XORCC,
  1918  	}
  1919  	b := op.Block
  1920  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1921  	opCC.AddArgs(op.Args...)
  1922  	op.reset(OpSelect0)
  1923  	op.AddArgs(opCC)
  1924  	return op
  1925  }
  1926  
  1927  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1928  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1929  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1930  	if r != 0 || mask&0xFFFF != mask {
  1931  		return 0
  1932  	}
  1933  	return int64(mask)
  1934  }
  1935  
  1936  // Convenience function to rotate a 32 bit constant value by another constant.
  1937  func rotateLeft32(v, rotate int64) int64 {
  1938  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1939  }
  1940  
  1941  func rotateRight64(v, rotate int64) int64 {
  1942  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1943  }
  1944  
  1945  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1946  func armBFAuxInt(lsb, width int64) arm64BitField {
  1947  	if lsb < 0 || lsb > 63 {
  1948  		panic("ARM(64) bit field lsb constant out of range")
  1949  	}
  1950  	if width < 1 || lsb+width > 64 {
  1951  		panic("ARM(64) bit field width constant out of range")
  1952  	}
  1953  	return arm64BitField(width | lsb<<8)
  1954  }
  1955  
  1956  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1957  func (bfc arm64BitField) lsb() int64 {
  1958  	return int64(uint64(bfc) >> 8)
  1959  }
  1960  
  1961  // returns the width part of the auxInt field of arm64 bitfield ops.
  1962  func (bfc arm64BitField) width() int64 {
  1963  	return int64(bfc) & 0xff
  1964  }
  1965  
  1966  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1967  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1968  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1969  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1970  }
  1971  
  1972  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1973  func arm64BFWidth(mask, rshift int64) int64 {
  1974  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1975  	if shiftedMask == 0 {
  1976  		panic("ARM64 BF mask is zero")
  1977  	}
  1978  	return nto(shiftedMask)
  1979  }
  1980  
  1981  // registerizable reports whether t is a primitive type that fits in
  1982  // a register. It assumes float64 values will always fit into registers
  1983  // even if that isn't strictly true.
  1984  func registerizable(b *Block, typ *types.Type) bool {
  1985  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1986  		return true
  1987  	}
  1988  	if typ.IsInteger() {
  1989  		return typ.Size() <= b.Func.Config.RegSize
  1990  	}
  1991  	return false
  1992  }
  1993  
  1994  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1995  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1996  	f := v.Block.Func
  1997  	if !f.Config.Race {
  1998  		return false
  1999  	}
  2000  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2001  		return false
  2002  	}
  2003  	for _, b := range f.Blocks {
  2004  		for _, v := range b.Values {
  2005  			switch v.Op {
  2006  			case OpStaticCall, OpStaticLECall:
  2007  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2008  				// Allow calls to panic*
  2009  				s := v.Aux.(*AuxCall).Fn.String()
  2010  				switch s {
  2011  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2012  					"runtime.panicdivide", "runtime.panicwrap",
  2013  					"runtime.panicshift":
  2014  					continue
  2015  				}
  2016  				// If we encountered any call, we need to keep racefunc*,
  2017  				// for accurate stacktraces.
  2018  				return false
  2019  			case OpPanicBounds, OpPanicExtend:
  2020  				// Note: these are panic generators that are ok (like the static calls above).
  2021  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2022  				// We must keep the race functions if there are any other call types.
  2023  				return false
  2024  			}
  2025  		}
  2026  	}
  2027  	if isSameCall(sym, "runtime.racefuncenter") {
  2028  		// TODO REGISTER ABI this needs to be cleaned up.
  2029  		// If we're removing racefuncenter, remove its argument as well.
  2030  		if v.Args[0].Op != OpStore {
  2031  			if v.Op == OpStaticLECall {
  2032  				// there is no store, yet.
  2033  				return true
  2034  			}
  2035  			return false
  2036  		}
  2037  		mem := v.Args[0].Args[2]
  2038  		v.Args[0].reset(OpCopy)
  2039  		v.Args[0].AddArg(mem)
  2040  	}
  2041  	return true
  2042  }
  2043  
  2044  // symIsRO reports whether sym is a read-only global.
  2045  func symIsRO(sym Sym) bool {
  2046  	lsym := sym.(*obj.LSym)
  2047  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2048  }
  2049  
  2050  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2051  func symIsROZero(sym Sym) bool {
  2052  	lsym := sym.(*obj.LSym)
  2053  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2054  		return false
  2055  	}
  2056  	for _, b := range lsym.P {
  2057  		if b != 0 {
  2058  			return false
  2059  		}
  2060  	}
  2061  	return true
  2062  }
  2063  
  2064  // isFixed32 returns true if the int32 at offset off in symbol sym
  2065  // is known and constant.
  2066  func isFixed32(c *Config, sym Sym, off int64) bool {
  2067  	return isFixed(c, sym, off, 4)
  2068  }
  2069  
  2070  // isFixed returns true if the range [off,off+size] of the symbol sym
  2071  // is known and constant.
  2072  func isFixed(c *Config, sym Sym, off, size int64) bool {
  2073  	lsym := sym.(*obj.LSym)
  2074  	if lsym.Extra == nil {
  2075  		return false
  2076  	}
  2077  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2078  		if off == 2*c.PtrSize && size == 4 {
  2079  			return true // type hash field
  2080  		}
  2081  	}
  2082  	return false
  2083  }
  2084  func fixed32(c *Config, sym Sym, off int64) int32 {
  2085  	lsym := sym.(*obj.LSym)
  2086  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2087  		if off == 2*c.PtrSize {
  2088  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  2089  		}
  2090  	}
  2091  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  2092  	return 0
  2093  }
  2094  
  2095  // isFixedSym returns true if the contents of sym at the given offset
  2096  // is known and is the constant address of another symbol.
  2097  func isFixedSym(sym Sym, off int64) bool {
  2098  	lsym := sym.(*obj.LSym)
  2099  	switch {
  2100  	case lsym.Type == objabi.SRODATA:
  2101  		// itabs, dictionaries
  2102  	default:
  2103  		return false
  2104  	}
  2105  	for _, r := range lsym.R {
  2106  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2107  			return true
  2108  		}
  2109  	}
  2110  	return false
  2111  }
  2112  func fixedSym(f *Func, sym Sym, off int64) Sym {
  2113  	lsym := sym.(*obj.LSym)
  2114  	for _, r := range lsym.R {
  2115  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2116  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2117  				// In case we're loading a type out of a dictionary, we need to record
  2118  				// that the containing function might put that type in an interface.
  2119  				// That information is currently recorded in relocations in the dictionary,
  2120  				// but if we perform this load at compile time then the dictionary
  2121  				// might be dead.
  2122  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2123  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2124  				// Same, but if we're using an itab we need to record that the
  2125  				// itab._type might be put in an interface.
  2126  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2127  			}
  2128  			return r.Sym
  2129  		}
  2130  	}
  2131  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2132  	return nil
  2133  }
  2134  
  2135  // read8 reads one byte from the read-only global sym at offset off.
  2136  func read8(sym Sym, off int64) uint8 {
  2137  	lsym := sym.(*obj.LSym)
  2138  	if off >= int64(len(lsym.P)) || off < 0 {
  2139  		// Invalid index into the global sym.
  2140  		// This can happen in dead code, so we don't want to panic.
  2141  		// Just return any value, it will eventually get ignored.
  2142  		// See issue 29215.
  2143  		return 0
  2144  	}
  2145  	return lsym.P[off]
  2146  }
  2147  
  2148  // read16 reads two bytes from the read-only global sym at offset off.
  2149  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2150  	lsym := sym.(*obj.LSym)
  2151  	// lsym.P is written lazily.
  2152  	// Bytes requested after the end of lsym.P are 0.
  2153  	var src []byte
  2154  	if 0 <= off && off < int64(len(lsym.P)) {
  2155  		src = lsym.P[off:]
  2156  	}
  2157  	buf := make([]byte, 2)
  2158  	copy(buf, src)
  2159  	return byteorder.Uint16(buf)
  2160  }
  2161  
  2162  // read32 reads four bytes from the read-only global sym at offset off.
  2163  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2164  	lsym := sym.(*obj.LSym)
  2165  	var src []byte
  2166  	if 0 <= off && off < int64(len(lsym.P)) {
  2167  		src = lsym.P[off:]
  2168  	}
  2169  	buf := make([]byte, 4)
  2170  	copy(buf, src)
  2171  	return byteorder.Uint32(buf)
  2172  }
  2173  
  2174  // read64 reads eight bytes from the read-only global sym at offset off.
  2175  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2176  	lsym := sym.(*obj.LSym)
  2177  	var src []byte
  2178  	if 0 <= off && off < int64(len(lsym.P)) {
  2179  		src = lsym.P[off:]
  2180  	}
  2181  	buf := make([]byte, 8)
  2182  	copy(buf, src)
  2183  	return byteorder.Uint64(buf)
  2184  }
  2185  
  2186  // sequentialAddresses reports true if it can prove that x + n == y
  2187  func sequentialAddresses(x, y *Value, n int64) bool {
  2188  	if x == y && n == 0 {
  2189  		return true
  2190  	}
  2191  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2192  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2193  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2194  		return true
  2195  	}
  2196  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2197  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2198  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2199  		return true
  2200  	}
  2201  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2202  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2203  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2204  		return true
  2205  	}
  2206  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2207  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2208  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2209  		return true
  2210  	}
  2211  	return false
  2212  }
  2213  
  2214  // flagConstant represents the result of a compile-time comparison.
  2215  // The sense of these flags does not necessarily represent the hardware's notion
  2216  // of a flags register - these are just a compile-time construct.
  2217  // We happen to match the semantics to those of arm/arm64.
  2218  // Note that these semantics differ from x86: the carry flag has the opposite
  2219  // sense on a subtraction!
  2220  //
  2221  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2222  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2223  //	 (because it does x + ^y + C).
  2224  //
  2225  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2226  type flagConstant uint8
  2227  
  2228  // N reports whether the result of an operation is negative (high bit set).
  2229  func (fc flagConstant) N() bool {
  2230  	return fc&1 != 0
  2231  }
  2232  
  2233  // Z reports whether the result of an operation is 0.
  2234  func (fc flagConstant) Z() bool {
  2235  	return fc&2 != 0
  2236  }
  2237  
  2238  // C reports whether an unsigned add overflowed (carry), or an
  2239  // unsigned subtract did not underflow (borrow).
  2240  func (fc flagConstant) C() bool {
  2241  	return fc&4 != 0
  2242  }
  2243  
  2244  // V reports whether a signed operation overflowed or underflowed.
  2245  func (fc flagConstant) V() bool {
  2246  	return fc&8 != 0
  2247  }
  2248  
  2249  func (fc flagConstant) eq() bool {
  2250  	return fc.Z()
  2251  }
  2252  func (fc flagConstant) ne() bool {
  2253  	return !fc.Z()
  2254  }
  2255  func (fc flagConstant) lt() bool {
  2256  	return fc.N() != fc.V()
  2257  }
  2258  func (fc flagConstant) le() bool {
  2259  	return fc.Z() || fc.lt()
  2260  }
  2261  func (fc flagConstant) gt() bool {
  2262  	return !fc.Z() && fc.ge()
  2263  }
  2264  func (fc flagConstant) ge() bool {
  2265  	return fc.N() == fc.V()
  2266  }
  2267  func (fc flagConstant) ult() bool {
  2268  	return !fc.C()
  2269  }
  2270  func (fc flagConstant) ule() bool {
  2271  	return fc.Z() || fc.ult()
  2272  }
  2273  func (fc flagConstant) ugt() bool {
  2274  	return !fc.Z() && fc.uge()
  2275  }
  2276  func (fc flagConstant) uge() bool {
  2277  	return fc.C()
  2278  }
  2279  
  2280  func (fc flagConstant) ltNoov() bool {
  2281  	return fc.lt() && !fc.V()
  2282  }
  2283  func (fc flagConstant) leNoov() bool {
  2284  	return fc.le() && !fc.V()
  2285  }
  2286  func (fc flagConstant) gtNoov() bool {
  2287  	return fc.gt() && !fc.V()
  2288  }
  2289  func (fc flagConstant) geNoov() bool {
  2290  	return fc.ge() && !fc.V()
  2291  }
  2292  
  2293  func (fc flagConstant) String() string {
  2294  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2295  }
  2296  
  2297  type flagConstantBuilder struct {
  2298  	N bool
  2299  	Z bool
  2300  	C bool
  2301  	V bool
  2302  }
  2303  
  2304  func (fcs flagConstantBuilder) encode() flagConstant {
  2305  	var fc flagConstant
  2306  	if fcs.N {
  2307  		fc |= 1
  2308  	}
  2309  	if fcs.Z {
  2310  		fc |= 2
  2311  	}
  2312  	if fcs.C {
  2313  		fc |= 4
  2314  	}
  2315  	if fcs.V {
  2316  		fc |= 8
  2317  	}
  2318  	return fc
  2319  }
  2320  
  2321  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2322  //  - the results of the C flag are different
  2323  //  - the results of the V flag when y==minint are different
  2324  
  2325  // addFlags64 returns the flags that would be set from computing x+y.
  2326  func addFlags64(x, y int64) flagConstant {
  2327  	var fcb flagConstantBuilder
  2328  	fcb.Z = x+y == 0
  2329  	fcb.N = x+y < 0
  2330  	fcb.C = uint64(x+y) < uint64(x)
  2331  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2332  	return fcb.encode()
  2333  }
  2334  
  2335  // subFlags64 returns the flags that would be set from computing x-y.
  2336  func subFlags64(x, y int64) flagConstant {
  2337  	var fcb flagConstantBuilder
  2338  	fcb.Z = x-y == 0
  2339  	fcb.N = x-y < 0
  2340  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2341  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2342  	return fcb.encode()
  2343  }
  2344  
  2345  // addFlags32 returns the flags that would be set from computing x+y.
  2346  func addFlags32(x, y int32) flagConstant {
  2347  	var fcb flagConstantBuilder
  2348  	fcb.Z = x+y == 0
  2349  	fcb.N = x+y < 0
  2350  	fcb.C = uint32(x+y) < uint32(x)
  2351  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2352  	return fcb.encode()
  2353  }
  2354  
  2355  // subFlags32 returns the flags that would be set from computing x-y.
  2356  func subFlags32(x, y int32) flagConstant {
  2357  	var fcb flagConstantBuilder
  2358  	fcb.Z = x-y == 0
  2359  	fcb.N = x-y < 0
  2360  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2361  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2362  	return fcb.encode()
  2363  }
  2364  
  2365  // logicFlags64 returns flags set to the sign/zeroness of x.
  2366  // C and V are set to false.
  2367  func logicFlags64(x int64) flagConstant {
  2368  	var fcb flagConstantBuilder
  2369  	fcb.Z = x == 0
  2370  	fcb.N = x < 0
  2371  	return fcb.encode()
  2372  }
  2373  
  2374  // logicFlags32 returns flags set to the sign/zeroness of x.
  2375  // C and V are set to false.
  2376  func logicFlags32(x int32) flagConstant {
  2377  	var fcb flagConstantBuilder
  2378  	fcb.Z = x == 0
  2379  	fcb.N = x < 0
  2380  	return fcb.encode()
  2381  }
  2382  
  2383  func makeJumpTableSym(b *Block) *obj.LSym {
  2384  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2385  	// The jump table symbol is accessed only from the function symbol.
  2386  	s.Set(obj.AttrStatic, true)
  2387  	return s
  2388  }
  2389  
  2390  // canRotate reports whether the architecture supports
  2391  // rotates of integer registers with the given number of bits.
  2392  func canRotate(c *Config, bits int64) bool {
  2393  	if bits > c.PtrSize*8 {
  2394  		// Don't rewrite to rotates bigger than the machine word.
  2395  		return false
  2396  	}
  2397  	switch c.arch {
  2398  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2399  		return true
  2400  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2401  		return bits >= 32
  2402  	default:
  2403  		return false
  2404  	}
  2405  }
  2406  
  2407  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2408  func isARM64bitcon(x uint64) bool {
  2409  	if x == 1<<64-1 || x == 0 {
  2410  		return false
  2411  	}
  2412  	// determine the period and sign-extend a unit to 64 bits
  2413  	switch {
  2414  	case x != x>>32|x<<32:
  2415  		// period is 64
  2416  		// nothing to do
  2417  	case x != x>>16|x<<48:
  2418  		// period is 32
  2419  		x = uint64(int64(int32(x)))
  2420  	case x != x>>8|x<<56:
  2421  		// period is 16
  2422  		x = uint64(int64(int16(x)))
  2423  	case x != x>>4|x<<60:
  2424  		// period is 8
  2425  		x = uint64(int64(int8(x)))
  2426  	default:
  2427  		// period is 4 or 2, always true
  2428  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2429  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2430  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2431  		// 0101, 1010             -- 01   rotate, repeat
  2432  		return true
  2433  	}
  2434  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2435  }
  2436  
  2437  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2438  func sequenceOfOnes(x uint64) bool {
  2439  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2440  	y += x
  2441  	return (y-1)&y == 0
  2442  }
  2443  
  2444  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2445  func isARM64addcon(v int64) bool {
  2446  	/* uimm12 or uimm24? */
  2447  	if v < 0 {
  2448  		return false
  2449  	}
  2450  	if (v & 0xFFF) == 0 {
  2451  		v >>= 12
  2452  	}
  2453  	return v <= 0xFFF
  2454  }
  2455  
  2456  // setPos sets the position of v to pos, then returns true.
  2457  // Useful for setting the result of a rewrite's position to
  2458  // something other than the default.
  2459  func setPos(v *Value, pos src.XPos) bool {
  2460  	v.Pos = pos
  2461  	return true
  2462  }
  2463  
  2464  // isNonNegative reports whether v is known to be greater or equal to zero.
  2465  // Note that this is pretty simplistic. The prove pass generates more detailed
  2466  // nonnegative information about values.
  2467  func isNonNegative(v *Value) bool {
  2468  	if !v.Type.IsInteger() {
  2469  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2470  	}
  2471  	// TODO: return true if !v.Type.IsSigned()
  2472  	// SSA isn't type-safe enough to do that now (issue 37753).
  2473  	// The checks below depend only on the pattern of bits.
  2474  
  2475  	switch v.Op {
  2476  	case OpConst64:
  2477  		return v.AuxInt >= 0
  2478  
  2479  	case OpConst32:
  2480  		return int32(v.AuxInt) >= 0
  2481  
  2482  	case OpConst16:
  2483  		return int16(v.AuxInt) >= 0
  2484  
  2485  	case OpConst8:
  2486  		return int8(v.AuxInt) >= 0
  2487  
  2488  	case OpStringLen, OpSliceLen, OpSliceCap,
  2489  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2490  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2491  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2492  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2493  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2494  		return true
  2495  
  2496  	case OpRsh64Ux64, OpRsh32Ux64:
  2497  		by := v.Args[1]
  2498  		return by.Op == OpConst64 && by.AuxInt > 0
  2499  
  2500  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2501  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2502  		return isNonNegative(v.Args[0])
  2503  
  2504  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2505  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2506  
  2507  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2508  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2509  		OpOr64, OpOr32, OpOr16, OpOr8,
  2510  		OpXor64, OpXor32, OpXor16, OpXor8:
  2511  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2512  
  2513  		// We could handle OpPhi here, but the improvements from doing
  2514  		// so are very minor, and it is neither simple nor cheap.
  2515  	}
  2516  	return false
  2517  }
  2518  
  2519  func rewriteStructLoad(v *Value) *Value {
  2520  	b := v.Block
  2521  	ptr := v.Args[0]
  2522  	mem := v.Args[1]
  2523  
  2524  	t := v.Type
  2525  	args := make([]*Value, t.NumFields())
  2526  	for i := range args {
  2527  		ft := t.FieldType(i)
  2528  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2529  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2530  	}
  2531  
  2532  	v.reset(OpStructMake)
  2533  	v.AddArgs(args...)
  2534  	return v
  2535  }
  2536  
  2537  func rewriteStructStore(v *Value) *Value {
  2538  	b := v.Block
  2539  	dst := v.Args[0]
  2540  	x := v.Args[1]
  2541  	if x.Op != OpStructMake {
  2542  		base.Fatalf("invalid struct store: %v", x)
  2543  	}
  2544  	mem := v.Args[2]
  2545  
  2546  	t := x.Type
  2547  	for i, arg := range x.Args {
  2548  		ft := t.FieldType(i)
  2549  
  2550  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2551  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2552  	}
  2553  
  2554  	return mem
  2555  }
  2556  
  2557  // isDirectType reports whether v represents a type
  2558  // (a *runtime._type) whose value is stored directly in an
  2559  // interface (i.e., is pointer or pointer-like).
  2560  func isDirectType(v *Value) bool {
  2561  	return isDirectType1(v)
  2562  }
  2563  
  2564  // v is a type
  2565  func isDirectType1(v *Value) bool {
  2566  	switch v.Op {
  2567  	case OpITab:
  2568  		return isDirectType2(v.Args[0])
  2569  	case OpAddr:
  2570  		lsym := v.Aux.(*obj.LSym)
  2571  		if lsym.Extra == nil {
  2572  			return false
  2573  		}
  2574  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2575  			return types.IsDirectIface(ti.Type.(*types.Type))
  2576  		}
  2577  	}
  2578  	return false
  2579  }
  2580  
  2581  // v is an empty interface
  2582  func isDirectType2(v *Value) bool {
  2583  	switch v.Op {
  2584  	case OpIMake:
  2585  		return isDirectType1(v.Args[0])
  2586  	}
  2587  	return false
  2588  }
  2589  
  2590  // isDirectIface reports whether v represents an itab
  2591  // (a *runtime._itab) for a type whose value is stored directly
  2592  // in an interface (i.e., is pointer or pointer-like).
  2593  func isDirectIface(v *Value) bool {
  2594  	return isDirectIface1(v, 9)
  2595  }
  2596  
  2597  // v is an itab
  2598  func isDirectIface1(v *Value, depth int) bool {
  2599  	if depth == 0 {
  2600  		return false
  2601  	}
  2602  	switch v.Op {
  2603  	case OpITab:
  2604  		return isDirectIface2(v.Args[0], depth-1)
  2605  	case OpAddr:
  2606  		lsym := v.Aux.(*obj.LSym)
  2607  		if lsym.Extra == nil {
  2608  			return false
  2609  		}
  2610  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2611  			return types.IsDirectIface(ii.Type.(*types.Type))
  2612  		}
  2613  	case OpConstNil:
  2614  		// We can treat this as direct, because if the itab is
  2615  		// nil, the data field must be nil also.
  2616  		return true
  2617  	}
  2618  	return false
  2619  }
  2620  
  2621  // v is an interface
  2622  func isDirectIface2(v *Value, depth int) bool {
  2623  	if depth == 0 {
  2624  		return false
  2625  	}
  2626  	switch v.Op {
  2627  	case OpIMake:
  2628  		return isDirectIface1(v.Args[0], depth-1)
  2629  	case OpPhi:
  2630  		for _, a := range v.Args {
  2631  			if !isDirectIface2(a, depth-1) {
  2632  				return false
  2633  			}
  2634  		}
  2635  		return true
  2636  	}
  2637  	return false
  2638  }
  2639  
  2640  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2641  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2642  	r.sum, r.carry = int64(s), int64(c)
  2643  	return
  2644  }
  2645  
  2646  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2647  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2648  	r.hi, r.lo = int64(hi), int64(lo)
  2649  	return
  2650  }
  2651  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2652  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2653  	r.hi, r.lo = int32(hi), int32(lo)
  2654  	return
  2655  }
  2656  
  2657  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2658  func flagify(v *Value) bool {
  2659  	var flagVersion Op
  2660  	switch v.Op {
  2661  	case OpAMD64ADDQconst:
  2662  		flagVersion = OpAMD64ADDQconstflags
  2663  	case OpAMD64ADDLconst:
  2664  		flagVersion = OpAMD64ADDLconstflags
  2665  	default:
  2666  		base.Fatalf("can't flagify op %s", v.Op)
  2667  	}
  2668  	inner := v.copyInto(v.Block)
  2669  	inner.Op = flagVersion
  2670  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2671  	v.reset(OpSelect0)
  2672  	v.AddArg(inner)
  2673  	return true
  2674  }
  2675  
  2676  // PanicBoundsC contains a constant for a bounds failure.
  2677  type PanicBoundsC struct {
  2678  	C int64
  2679  }
  2680  
  2681  // PanicBoundsCC contains 2 constants for a bounds failure.
  2682  type PanicBoundsCC struct {
  2683  	Cx int64
  2684  	Cy int64
  2685  }
  2686  
  2687  func (p PanicBoundsC) CanBeAnSSAAux() {
  2688  }
  2689  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2690  }
  2691  
  2692  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2693  	return i.(PanicBoundsC)
  2694  }
  2695  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2696  	return i.(PanicBoundsCC)
  2697  }
  2698  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2699  	return p
  2700  }
  2701  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2702  	return p
  2703  }
  2704  

View as plain text