regalloc.go

   1// Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
   2// api.go.
   3//
   4// References:
   5//   - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
   6//   - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
   7//   - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
   8//   - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
   9//   - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
  10package regalloc
  11
  12import (
  13	"fmt"
  14	"math"
  15	"strings"
  16
  17	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
  18)
  19
  20// NewAllocator returns a new Allocator.
  21func NewAllocator[I Instr, B Block[I], F Function[I, B]](allocatableRegs *RegisterInfo) Allocator[I, B, F] {
  22	a := Allocator[I, B, F]{
  23		regInfo:            allocatableRegs,
  24		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]),
  25		blockStates:        wazevoapi.NewIDedPool[blockState[I, B, F]](resetBlockState[I, B, F]),
  26	}
  27	a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B, F]](resetVrState[I, B, F])
  28	a.state.reset()
  29	for _, regs := range allocatableRegs.AllocatableRegisters {
  30		for _, r := range regs {
  31			a.allocatableSet = a.allocatableSet.add(r)
  32		}
  33	}
  34	return a
  35}
  36
  37type (
  38	// RegisterInfo holds the statically-known ISA-specific register information.
  39	RegisterInfo struct {
  40		// AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
  41		// The order matters: the first element is the most preferred one when allocating.
  42		AllocatableRegisters [NumRegType][]RealReg
  43		CalleeSavedRegisters RegSet
  44		CallerSavedRegisters RegSet
  45		RealRegToVReg        []VReg
  46		// RealRegName returns the name of the given RealReg for debugging.
  47		RealRegName func(r RealReg) string
  48		RealRegType func(r RealReg) RegType
  49	}
  50
  51	// Allocator is a register allocator.
  52	Allocator[I Instr, B Block[I], F Function[I, B]] struct {
  53		// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
  54		regInfo *RegisterInfo
  55		// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
  56		allocatableSet           RegSet
  57		allocatedCalleeSavedRegs []VReg
  58		vs                       []VReg
  59		ss                       []*vrState[I, B, F]
  60		copies                   []_copy[I, B, F]
  61		phiDefInstListPool       wazevoapi.Pool[phiDefInstList[I]]
  62
  63		// Followings are re-used during various places.
  64		blks  []B
  65		reals []RealReg
  66
  67		// Following two fields are updated while iterating the blocks in the reverse postorder.
  68		state       state[I, B, F]
  69		blockStates wazevoapi.IDedPool[blockState[I, B, F]]
  70	}
  71
  72	// _copy represents a source and destination pair of a copy instruction.
  73	_copy[I Instr, B Block[I], F Function[I, B]] struct {
  74		src   *vrState[I, B, F]
  75		dstID VRegID
  76	}
  77
  78	// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
  79	programCounter int32
  80
  81	state[I Instr, B Block[I], F Function[I, B]] struct {
  82		argRealRegs []VReg
  83		regsInUse   regInUseSet[I, B, F]
  84		vrStates    wazevoapi.IDedPool[vrState[I, B, F]]
  85
  86		currentBlockID int32
  87
  88		// allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
  89		allocatedRegSet RegSet
  90	}
  91
  92	blockState[I Instr, B Block[I], F Function[I, B]] struct {
  93		// liveIns is a list of VReg that are live at the beginning of the block.
  94		liveIns []*vrState[I, B, F]
  95		// seen is true if the block is visited during the liveness analysis.
  96		seen bool
  97		// visited is true if the block is visited during the allocation phase.
  98		visited            bool
  99		startFromPredIndex int
 100		// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
 101		startRegs regInUseSet[I, B, F]
 102		// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
 103		endRegs regInUseSet[I, B, F]
 104	}
 105
 106	vrState[I Instr, B Block[I], f Function[I, B]] struct {
 107		v VReg
 108		r RealReg
 109		// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
 110		defInstr I
 111		// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
 112		defBlk B
 113		// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
 114		// reloads this value. This is used to determine the spill location. Only valid if spilled=true.
 115		lca B
 116		// lastUse is the program counter of the last use of this value. This changes while iterating the block, and
 117		// should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
 118		lastUse                 programCounter
 119		lastUseUpdatedAtBlockID int32
 120		// spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
 121		//
 122		// Note that this field is used during liveness analysis for different purpose. This is used to determine the
 123		// value is live-in or not.
 124		spilled bool
 125		// isPhi is true if this is a phi value.
 126		isPhi      bool
 127		desiredLoc desiredLoc
 128		// phiDefInstList is a list of instructions that defines this phi value.
 129		// This is used to determine the spill location, and only valid if isPhi=true.
 130		*phiDefInstList[I]
 131	}
 132
 133	// phiDefInstList is a linked list of instructions that defines a phi value.
 134	phiDefInstList[I Instr] struct {
 135		instr I
 136		v     VReg
 137		next  *phiDefInstList[I]
 138	}
 139
 140	// desiredLoc represents a desired location for a VReg.
 141	desiredLoc uint16
 142	// desiredLocKind is a kind of desired location for a VReg.
 143	desiredLocKind uint16
 144)
 145
 146const (
 147	// desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified.
 148	desiredLocKindUnspecified desiredLocKind = iota
 149	// desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values.
 150	desiredLocKindStack
 151	// desiredLocKindReg is a kind of desired location for a VReg that is in a register.
 152	desiredLocKindReg
 153	desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified)
 154	desiredLocStack       = desiredLoc(desiredLocKindStack)
 155)
 156
 157func newDesiredLocReg(r RealReg) desiredLoc {
 158	return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2)
 159}
 160
 161func (d desiredLoc) realReg() RealReg {
 162	return RealReg(d >> 2)
 163}
 164
 165func (d desiredLoc) stack() bool {
 166	return d&3 == desiredLoc(desiredLocKindStack)
 167}
 168
 169func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) {
 170	var nilInstr I
 171	l.instr = nilInstr
 172	l.next = nil
 173	l.v = VRegInvalid
 174}
 175
 176func (s *state[I, B, F]) dump(info *RegisterInfo) { //nolint:unused
 177	fmt.Println("\t\tstate:")
 178	fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
 179	fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
 180	fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
 181	fmt.Println("\t\t\tused:", s.regsInUse.format(info))
 182	var strs []string
 183	for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ {
 184		vs := s.vrStates.Get(i)
 185		if vs == nil {
 186			continue
 187		}
 188		if vs.r != RealRegInvalid {
 189			strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r)))
 190		}
 191	}
 192	fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
 193}
 194
 195func (s *state[I, B, F]) reset() {
 196	s.argRealRegs = s.argRealRegs[:0]
 197	s.vrStates.Reset()
 198	s.allocatedRegSet = RegSet(0)
 199	s.regsInUse.reset()
 200	s.currentBlockID = -1
 201}
 202
 203func resetVrState[I Instr, B Block[I], F Function[I, B]](vs *vrState[I, B, F]) {
 204	vs.v = VRegInvalid
 205	vs.r = RealRegInvalid
 206	var nilInstr I
 207	vs.defInstr = nilInstr
 208	var nilBlk B
 209	vs.defBlk = nilBlk
 210	vs.spilled = false
 211	vs.lastUse = -1
 212	vs.lastUseUpdatedAtBlockID = -1
 213	vs.lca = nilBlk
 214	vs.isPhi = false
 215	vs.phiDefInstList = nil
 216	vs.desiredLoc = desiredLocUnspecified
 217}
 218
 219func (s *state[I, B, F]) getOrAllocateVRegState(v VReg) *vrState[I, B, F] {
 220	st := s.vrStates.GetOrAllocate(int(v.ID()))
 221	if st.v == VRegInvalid {
 222		st.v = v
 223	}
 224	return st
 225}
 226
 227func (s *state[I, B, F]) getVRegState(v VRegID) *vrState[I, B, F] {
 228	return s.vrStates.Get(int(v))
 229}
 230
 231func (s *state[I, B, F]) useRealReg(r RealReg, vr *vrState[I, B, F]) {
 232	s.regsInUse.add(r, vr)
 233	vr.r = r
 234	s.allocatedRegSet = s.allocatedRegSet.add(r)
 235}
 236
 237func (s *state[I, B, F]) releaseRealReg(r RealReg) {
 238	current := s.regsInUse.get(r)
 239	if current != nil {
 240		s.regsInUse.remove(r)
 241		current.r = RealRegInvalid
 242	}
 243}
 244
 245// recordReload records that the given VReg is reloaded in the given block.
 246// This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
 247func (vs *vrState[I, B, F]) recordReload(f F, blk B) {
 248	vs.spilled = true
 249	var nilBlk B
 250	if lca := vs.lca; lca == nilBlk {
 251		if wazevoapi.RegAllocLoggingEnabled {
 252			fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
 253		}
 254		vs.lca = blk
 255	} else if lca != blk {
 256		if wazevoapi.RegAllocLoggingEnabled {
 257			fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
 258		}
 259		vs.lca = f.LowestCommonAncestor(lca, blk)
 260		if wazevoapi.RegAllocLoggingEnabled {
 261			fmt.Printf("updated lca=%d\n", vs.lca.ID())
 262		}
 263	}
 264}
 265
 266func (a *Allocator[I, B, F]) findOrSpillAllocatable(s *state[I, B, F], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
 267	r = RealRegInvalid
 268	// First, check if the preferredMask has any allocatable register.
 269	if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
 270		return preferred
 271	}
 272
 273	var lastUseAt programCounter
 274	var spillVReg VReg
 275	for _, candidateReal := range allocatable {
 276		if forbiddenMask.has(candidateReal) {
 277			continue
 278		}
 279
 280		using := s.regsInUse.get(candidateReal)
 281		if using == nil {
 282			// This is not used at this point.
 283			return candidateReal
 284		}
 285
 286		// Real registers in use should not be spilled, so we skip them.
 287		// For example, if the register is used as an argument register, and it might be
 288		// spilled and not reloaded when it ends up being used as a temporary to pass
 289		// stack based argument.
 290		if using.v.IsRealReg() {
 291			continue
 292		}
 293
 294		isPreferred := candidateReal == preferred
 295
 296		// last == -1 means the value won't be used anymore.
 297		if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
 298			lastUseAt = last
 299			r = candidateReal
 300			spillVReg = using.v
 301			if isPreferred {
 302				break
 303			}
 304		}
 305	}
 306
 307	if r == RealRegInvalid {
 308		panic("not found any allocatable register")
 309	}
 310
 311	if wazevoapi.RegAllocLoggingEnabled {
 312		fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo))
 313	}
 314	s.releaseRealReg(r)
 315	return r
 316}
 317
 318func (s *state[I, B, F]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
 319	for _, r := range allocatable {
 320		if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
 321			return r
 322		}
 323	}
 324	return RealRegInvalid
 325}
 326
 327func (s *state[I, B, F]) resetAt(bs *blockState[I, B, F]) {
 328	s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B, F]) {
 329		vs.r = RealRegInvalid
 330	})
 331	s.regsInUse.reset()
 332	bs.endRegs.range_(func(r RealReg, vs *vrState[I, B, F]) {
 333		if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn {
 334			s.regsInUse.add(r, vs)
 335			vs.r = r
 336		}
 337	})
 338}
 339
 340func resetBlockState[I Instr, B Block[I], F Function[I, B]](b *blockState[I, B, F]) {
 341	b.seen = false
 342	b.visited = false
 343	b.endRegs.reset()
 344	b.startRegs.reset()
 345	b.startFromPredIndex = -1
 346	b.liveIns = b.liveIns[:0]
 347}
 348
 349func (b *blockState[I, B, F]) dump(a *RegisterInfo) {
 350	fmt.Println("\t\tblockState:")
 351	fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
 352	fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
 353	fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
 354	fmt.Println("\t\t\tvisited:", b.visited)
 355}
 356
 357// DoAllocation performs register allocation on the given Function.
 358func (a *Allocator[I, B, F]) DoAllocation(f F) {
 359	a.livenessAnalysis(f)
 360	a.alloc(f)
 361	a.determineCalleeSavedRealRegs(f)
 362}
 363
 364func (a *Allocator[I, B, F]) determineCalleeSavedRealRegs(f F) {
 365	a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
 366	a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
 367		if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
 368			a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
 369		}
 370	})
 371	f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
 372}
 373
 374func (a *Allocator[I, B, F]) getOrAllocateBlockState(blockID int32) *blockState[I, B, F] {
 375	return a.blockStates.GetOrAllocate(int(blockID))
 376}
 377
 378// phiBlk returns the block that defines the given phi value, nil otherwise.
 379func (vs *vrState[I, B, F]) phiBlk() B {
 380	if vs.isPhi {
 381		return vs.defBlk
 382	}
 383	var nilBlk B
 384	return nilBlk
 385}
 386
 387const (
 388	programCounterLiveIn  = math.MinInt32
 389	programCounterLiveOut = math.MaxInt32
 390)
 391
 392// liveAnalysis constructs Allocator.blockLivenessData.
 393// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
 394func (a *Allocator[I, B, F]) livenessAnalysis(f F) {
 395	s := &a.state
 396
 397	for i := VRegID(0); i < vRegIDReservedForRealNum; i++ {
 398		s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i)))
 399	}
 400
 401	var nilBlk B
 402	var nilInstr I
 403	for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() {
 404		// We should gather phi value data.
 405		for _, p := range f.BlockParams(blk, &a.vs) {
 406			vs := s.getOrAllocateVRegState(p)
 407			vs.isPhi = true
 408			vs.defBlk = blk
 409		}
 410
 411		blkID := blk.ID()
 412		info := a.getOrAllocateBlockState(blkID)
 413
 414		a.ss = a.ss[:0]
 415		const (
 416			flagDeleted = false
 417			flagLive    = true
 418		)
 419		ns := blk.Succs()
 420		for i := 0; i < ns; i++ {
 421			succ := f.Succ(blk, i)
 422			if succ == nilBlk {
 423				continue
 424			}
 425
 426			succID := succ.ID()
 427			succInfo := a.getOrAllocateBlockState(succID)
 428			if !succInfo.seen { // This means the back edge.
 429				continue
 430			}
 431
 432			for _, st := range succInfo.liveIns {
 433				if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple
 434					// We use .spilled field to store the flag.
 435					st.spilled = flagLive
 436					a.ss = append(a.ss, st)
 437				}
 438			}
 439		}
 440
 441		for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() {
 442
 443			var use, def VReg
 444			var defIsPhi bool
 445			for _, def = range instr.Defs(&a.vs) {
 446				if !def.IsRealReg() {
 447					st := s.getOrAllocateVRegState(def)
 448					defIsPhi = st.isPhi
 449					// Note: We use .spilled field to store the flag.
 450					st.spilled = flagDeleted
 451				}
 452			}
 453			for _, use = range instr.Uses(&a.vs) {
 454				if !use.IsRealReg() {
 455					st := s.getOrAllocateVRegState(use)
 456					// Note: We use .spilled field to store the flag.
 457					if st.spilled != flagLive { //nolint:gosimple
 458						st.spilled = flagLive
 459						a.ss = append(a.ss, st)
 460					}
 461				}
 462			}
 463
 464			if defIsPhi {
 465				if use.Valid() && use.IsRealReg() {
 466					// If the destination is a phi value, and the source is a real register, this is the beginning of the function.
 467					a.state.argRealRegs = append(a.state.argRealRegs, use)
 468				}
 469			}
 470		}
 471
 472		for _, st := range a.ss {
 473			// We use .spilled field to store the flag.
 474			if st.spilled == flagLive { //nolint:gosimple
 475				info.liveIns = append(info.liveIns, st)
 476				st.spilled = false
 477			}
 478		}
 479
 480		info.seen = true
 481	}
 482
 483	nrs := f.LoopNestingForestRoots()
 484	for i := 0; i < nrs; i++ {
 485		root := f.LoopNestingForestRoot(i)
 486		a.loopTreeDFS(f, root)
 487	}
 488}
 489
 490// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
 491func (a *Allocator[I, B, F]) loopTreeDFS(f F, entry B) {
 492	a.blks = a.blks[:0]
 493	a.blks = append(a.blks, entry)
 494
 495	for len(a.blks) > 0 {
 496		tail := len(a.blks) - 1
 497		loop := a.blks[tail]
 498		a.blks = a.blks[:tail]
 499		a.ss = a.ss[:0]
 500		const (
 501			flagDone    = false
 502			flagPending = true
 503		)
 504		info := a.getOrAllocateBlockState(loop.ID())
 505		for _, st := range info.liveIns {
 506			if st.phiBlk() != loop {
 507				a.ss = append(a.ss, st)
 508				// We use .spilled field to store the flag.
 509				st.spilled = flagPending
 510			}
 511		}
 512
 513		var siblingAddedView []*vrState[I, B, F]
 514		cn := loop.LoopNestingForestChildren()
 515		for i := 0; i < cn; i++ {
 516			child := f.LoopNestingForestChild(loop, i)
 517			childID := child.ID()
 518			childInfo := a.getOrAllocateBlockState(childID)
 519
 520			if i == 0 {
 521				begin := len(childInfo.liveIns)
 522				for _, st := range a.ss {
 523					// We use .spilled field to store the flag.
 524					if st.spilled == flagPending { //nolint:gosimple
 525						st.spilled = flagDone
 526						// TODO: deduplicate, though I don't think it has much impact.
 527						childInfo.liveIns = append(childInfo.liveIns, st)
 528					}
 529				}
 530				siblingAddedView = childInfo.liveIns[begin:]
 531			} else {
 532				// TODO: deduplicate, though I don't think it has much impact.
 533				childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...)
 534			}
 535
 536			if child.LoopHeader() {
 537				a.blks = append(a.blks, child)
 538			}
 539		}
 540
 541		if cn == 0 {
 542			// If there's no forest child, we haven't cleared the .spilled field at this point.
 543			for _, st := range a.ss {
 544				st.spilled = false
 545			}
 546		}
 547	}
 548}
 549
 550// alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
 551// The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
 552// In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
 553// one of its predecessors. Each block inherits the selected state and starts allocation from there.
 554// If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
 555// The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
 556// at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
 557// the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
 558//
 559// All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
 560func (a *Allocator[I, B, F]) alloc(f F) {
 561	// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
 562	var nilBlk B
 563	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
 564		if wazevoapi.RegAllocLoggingEnabled {
 565			fmt.Printf("========== allocating blk%d ========\n", blk.ID())
 566		}
 567		if blk.Entry() {
 568			a.finalizeStartReg(f, blk)
 569		}
 570		a.allocBlock(f, blk)
 571	}
 572	// After the allocation, we all know the start and end state of each block. So we can fix the merge states.
 573	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
 574		a.fixMergeState(f, blk)
 575	}
 576	// Finally, we insert the spill instructions as we know all the places where the reloads happen.
 577	a.scheduleSpills(f)
 578}
 579
 580func (a *Allocator[I, B, F]) updateLiveInVRState(liveness *blockState[I, B, F]) {
 581	currentBlockID := a.state.currentBlockID
 582	for _, vs := range liveness.liveIns {
 583		vs.lastUse = programCounterLiveIn
 584		vs.lastUseUpdatedAtBlockID = currentBlockID
 585	}
 586}
 587
 588func (a *Allocator[I, B, F]) finalizeStartReg(f F, blk B) {
 589	bID := blk.ID()
 590	s := &a.state
 591	currentBlkState := a.getOrAllocateBlockState(bID)
 592	if currentBlkState.startFromPredIndex > -1 {
 593		return
 594	}
 595
 596	s.currentBlockID = bID
 597	a.updateLiveInVRState(currentBlkState)
 598
 599	preds := blk.Preds()
 600	var predState *blockState[I, B, F]
 601	switch preds {
 602	case 0: // This is the entry block.
 603	case 1:
 604		predID := f.Pred(blk, 0).ID()
 605		predState = a.getOrAllocateBlockState(predID)
 606		currentBlkState.startFromPredIndex = 0
 607	default:
 608		// TODO: there should be some better heuristic to choose the predecessor.
 609		for i := 0; i < preds; i++ {
 610			predID := f.Pred(blk, i).ID()
 611			if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
 612				predState = _predState
 613				currentBlkState.startFromPredIndex = i
 614				break
 615			}
 616		}
 617	}
 618	if predState == nil {
 619		if !blk.Entry() {
 620			panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
 621		}
 622		for _, u := range s.argRealRegs {
 623			s.useRealReg(u.RealReg(), s.getVRegState(u.ID()))
 624		}
 625		currentBlkState.startFromPredIndex = 0
 626	} else {
 627		if wazevoapi.RegAllocLoggingEnabled {
 628			fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
 629				bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
 630		}
 631		s.resetAt(predState)
 632	}
 633
 634	s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) {
 635		currentBlkState.startRegs.add(allocated, v)
 636	})
 637	if wazevoapi.RegAllocLoggingEnabled {
 638		fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo))
 639	}
 640}
 641
 642func (a *Allocator[I, B, F]) allocBlock(f F, blk B) {
 643	bID := blk.ID()
 644	s := &a.state
 645	currentBlkState := a.getOrAllocateBlockState(bID)
 646	s.currentBlockID = bID
 647
 648	if currentBlkState.startFromPredIndex < 0 {
 649		panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock")
 650	}
 651
 652	// Clears the previous state.
 653	s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { vr.r = RealRegInvalid })
 654	s.regsInUse.reset()
 655	// Then set the start state.
 656	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { s.useRealReg(allocatedRealReg, vr) })
 657
 658	desiredUpdated := a.ss[:0]
 659
 660	// Update the last use of each VReg.
 661	a.copies = a.copies[:0] // Stores the copy instructions.
 662	var pc programCounter
 663	var nilInstr I
 664	for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
 665		var useState *vrState[I, B, F]
 666		for _, use := range instr.Uses(&a.vs) {
 667			useState = s.getVRegState(use.ID())
 668			if !use.IsRealReg() {
 669				useState.lastUse = pc
 670			}
 671		}
 672
 673		if instr.IsCopy() {
 674			def := instr.Defs(&a.vs)[0]
 675			a.copies = append(a.copies, _copy[I, B, F]{src: useState, dstID: def.ID()})
 676			r := def.RealReg()
 677			if r != RealRegInvalid {
 678				if !useState.isPhi { // TODO: no idea why do we need this.
 679					useState.desiredLoc = newDesiredLocReg(r)
 680					desiredUpdated = append(desiredUpdated, useState)
 681				}
 682			}
 683		}
 684		pc++
 685	}
 686
 687	// Mark all live-out values by checking live-in of the successors.
 688	// While doing so, we also update the desired register values.
 689	var succ B
 690	var nilBlk B
 691	for i, ns := 0, blk.Succs(); i < ns; i++ {
 692		succ = f.Succ(blk, i)
 693		if succ == nilBlk {
 694			continue
 695		}
 696
 697		succID := succ.ID()
 698		succState := a.getOrAllocateBlockState(succID)
 699		for _, st := range succState.liveIns {
 700			if st.phiBlk() != succ {
 701				st.lastUse = programCounterLiveOut
 702			}
 703		}
 704
 705		if succState.startFromPredIndex > -1 {
 706			if wazevoapi.RegAllocLoggingEnabled {
 707				fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
 708			}
 709			succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B, F]) {
 710				vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
 711				desiredUpdated = append(desiredUpdated, vs)
 712			})
 713			for _, p := range f.BlockParams(succ, &a.vs) {
 714				vs := s.getVRegState(p.ID())
 715				if vs.desiredLoc.realReg() == RealRegInvalid {
 716					vs.desiredLoc = desiredLocStack
 717					desiredUpdated = append(desiredUpdated, vs)
 718				}
 719			}
 720		}
 721	}
 722
 723	// Propagate the desired register values from the end of the block to the beginning.
 724	for _, instr := range a.copies {
 725		defState := s.getVRegState(instr.dstID)
 726		desired := defState.desiredLoc.realReg()
 727		useState := instr.src
 728		if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified {
 729			useState.desiredLoc = newDesiredLocReg(desired)
 730			desiredUpdated = append(desiredUpdated, useState)
 731		}
 732	}
 733
 734	pc = 0
 735	for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
 736		if wazevoapi.RegAllocLoggingEnabled {
 737			fmt.Println(instr)
 738		}
 739
 740		var currentUsedSet RegSet
 741		killSet := a.reals[:0]
 742
 743		// Gather the set of registers that will be used in the current instruction.
 744		uses := instr.Uses(&a.vs)
 745		for _, use := range uses {
 746			if use.IsRealReg() {
 747				r := use.RealReg()
 748				currentUsedSet = currentUsedSet.add(r)
 749				if a.allocatableSet.has(r) {
 750					killSet = append(killSet, r)
 751				}
 752			} else {
 753				vs := s.getVRegState(use.ID())
 754				if r := vs.r; r != RealRegInvalid {
 755					currentUsedSet = currentUsedSet.add(r)
 756				}
 757			}
 758		}
 759
 760		for i, use := range uses {
 761			if !use.IsRealReg() {
 762				vs := s.getVRegState(use.ID())
 763				killed := vs.lastUse == pc
 764				r := vs.r
 765
 766				if r == RealRegInvalid {
 767					r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
 768						// Prefer the desired register if it's available.
 769						vs.desiredLoc.realReg())
 770					vs.recordReload(f, blk)
 771					f.ReloadRegisterBefore(use.SetRealReg(r), instr)
 772					s.useRealReg(r, vs)
 773				}
 774				if wazevoapi.RegAllocLoggingEnabled {
 775					fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
 776				}
 777				instr.AssignUse(i, use.SetRealReg(r))
 778				currentUsedSet = currentUsedSet.add(r)
 779				if killed {
 780					if wazevoapi.RegAllocLoggingEnabled {
 781						fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
 782					}
 783					killSet = append(killSet, r)
 784				}
 785			}
 786		}
 787
 788		isIndirect := instr.IsIndirectCall()
 789		if instr.IsCall() || isIndirect {
 790			addr := RealRegInvalid
 791			if isIndirect {
 792				addr = a.vs[0].RealReg()
 793			}
 794			a.releaseCallerSavedRegs(addr)
 795		}
 796
 797		for _, r := range killSet {
 798			s.releaseRealReg(r)
 799		}
 800		a.reals = killSet
 801
 802		defs := instr.Defs(&a.vs)
 803		switch len(defs) {
 804		default:
 805			// Some instructions define multiple values on real registers.
 806			// E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
 807			//
 808			// Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs
 809			// that require allocations. If we need to support such case, we need to add the logic to handle it here,
 810			// though is there any such instruction?
 811			for _, def := range defs {
 812				if !def.IsRealReg() {
 813					panic("BUG: multiple defs should be on real registers")
 814				}
 815				r := def.RealReg()
 816				if s.regsInUse.has(r) {
 817					s.releaseRealReg(r)
 818				}
 819				s.useRealReg(r, s.getVRegState(def.ID()))
 820			}
 821		case 0:
 822		case 1:
 823			def := defs[0]
 824			vState := s.getVRegState(def.ID())
 825			if def.IsRealReg() {
 826				r := def.RealReg()
 827				if a.allocatableSet.has(r) {
 828					if s.regsInUse.has(r) {
 829						s.releaseRealReg(r)
 830					}
 831					s.useRealReg(r, vState)
 832				}
 833			} else {
 834				r := vState.r
 835
 836				if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
 837					if r != desired {
 838						if (vState.isPhi && vState.defBlk == succ) ||
 839							// If this is not a phi and it's already assigned a real reg,
 840							// this value has multiple definitions, hence we cannot assign the desired register.
 841							(!s.regsInUse.has(desired) && r == RealRegInvalid) {
 842							// If the phi value is passed via a real register, we force the value to be in the desired register.
 843							if wazevoapi.RegAllocLoggingEnabled {
 844								fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired))
 845							}
 846							if r != RealRegInvalid {
 847								// If the value is already in a different real register, we release it to change the state.
 848								// Otherwise, multiple registers might have the same values at the end, which results in
 849								// messing up the merge state reconciliation.
 850								s.releaseRealReg(r)
 851							}
 852							r = desired
 853							s.releaseRealReg(r)
 854							s.useRealReg(r, vState)
 855						}
 856					}
 857				}
 858
 859				// Allocate a new real register if `def` is not currently assigned one.
 860				// It can happen when multiple instructions define the same VReg (e.g. const loads).
 861				if r == RealRegInvalid {
 862					if instr.IsCopy() {
 863						copySrc := instr.Uses(&a.vs)[0].RealReg()
 864						if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
 865							r = copySrc
 866						}
 867					}
 868					if r == RealRegInvalid {
 869						typ := def.RegType()
 870						r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
 871					}
 872					s.useRealReg(r, vState)
 873				}
 874				dr := def.SetRealReg(r)
 875				instr.AssignDef(dr)
 876				if wazevoapi.RegAllocLoggingEnabled {
 877					fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
 878				}
 879				if vState.isPhi {
 880					if vState.desiredLoc.stack() { // Stack based phi value.
 881						f.StoreRegisterAfter(dr, instr)
 882						// Release the real register as it's not used anymore.
 883						s.releaseRealReg(r)
 884					} else {
 885						// Only the register based phis are necessary to track the defining instructions
 886						// since the stack-based phis are already having stores inserted ^.
 887						n := a.phiDefInstListPool.Allocate()
 888						n.instr = instr
 889						n.next = vState.phiDefInstList
 890						n.v = dr
 891						vState.phiDefInstList = n
 892					}
 893				} else {
 894					vState.defInstr = instr
 895					vState.defBlk = blk
 896				}
 897			}
 898		}
 899		if wazevoapi.RegAllocLoggingEnabled {
 900			fmt.Println(instr)
 901		}
 902		pc++
 903	}
 904
 905	s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.endRegs.add(allocated, v) })
 906
 907	currentBlkState.visited = true
 908	if wazevoapi.RegAllocLoggingEnabled {
 909		currentBlkState.dump(a.regInfo)
 910	}
 911
 912	// Reset the desired end location.
 913	for _, vs := range desiredUpdated {
 914		vs.desiredLoc = desiredLocUnspecified
 915	}
 916	a.ss = desiredUpdated[:0]
 917
 918	for i := 0; i < blk.Succs(); i++ {
 919		succ := f.Succ(blk, i)
 920		if succ == nilBlk {
 921			continue
 922		}
 923		// If the successor is not visited yet, finalize the start state.
 924		a.finalizeStartReg(f, succ)
 925	}
 926}
 927
 928func (a *Allocator[I, B, F]) releaseCallerSavedRegs(addrReg RealReg) {
 929	s := &a.state
 930
 931	for allocated := RealReg(0); allocated < 64; allocated++ {
 932		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
 933			continue
 934		}
 935		if vs := s.regsInUse.get(allocated); vs != nil {
 936			if vs.v.IsRealReg() {
 937				continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
 938			}
 939			if !a.regInfo.CallerSavedRegisters.has(allocated) {
 940				// If this is not a caller-saved register, it is safe to keep it across the call.
 941				continue
 942			}
 943			s.releaseRealReg(allocated)
 944		}
 945	}
 946}
 947
 948func (a *Allocator[I, B, F]) fixMergeState(f F, blk B) {
 949	preds := blk.Preds()
 950	if preds <= 1 {
 951		return
 952	}
 953
 954	s := &a.state
 955
 956	// Restores the state at the beginning of the block.
 957	bID := blk.ID()
 958	blkSt := a.getOrAllocateBlockState(bID)
 959	desiredOccupants := &blkSt.startRegs
 960	var desiredOccupantsSet RegSet
 961	for i, v := range desiredOccupants {
 962		if v != nil {
 963			desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
 964		}
 965	}
 966
 967	if wazevoapi.RegAllocLoggingEnabled {
 968		fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
 969	}
 970
 971	s.currentBlockID = bID
 972	a.updateLiveInVRState(blkSt)
 973
 974	for i := 0; i < preds; i++ {
 975		if i == blkSt.startFromPredIndex {
 976			continue
 977		}
 978
 979		pred := f.Pred(blk, i)
 980		predSt := a.getOrAllocateBlockState(pred.ID())
 981
 982		s.resetAt(predSt)
 983
 984		// Finds the free registers if any.
 985		intTmp, floatTmp := VRegInvalid, VRegInvalid
 986		if intFree := s.findAllocatable(
 987			a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,
 988		); intFree != RealRegInvalid {
 989			intTmp = FromRealReg(intFree, RegTypeInt)
 990		}
 991		if floatFree := s.findAllocatable(
 992			a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,
 993		); floatFree != RealRegInvalid {
 994			floatTmp = FromRealReg(floatFree, RegTypeFloat)
 995		}
 996
 997		for r := RealReg(0); r < 64; r++ {
 998			desiredVReg := desiredOccupants.get(r)
 999			if desiredVReg == nil {
1000				continue
1001			}
1002
1003			currentVReg := s.regsInUse.get(r)
1004			if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() {
1005				continue
1006			}
1007
1008			typ := desiredVReg.v.RegType()
1009			var tmpRealReg VReg
1010			if typ == RegTypeInt {
1011				tmpRealReg = intTmp
1012			} else {
1013				tmpRealReg = floatTmp
1014			}
1015			a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)
1016		}
1017	}
1018}
1019
1020// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`.
1021//
1022//   - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor.
1023//   - desiredVReg is the desired VReg value that should be on the register `r`.
1024//   - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
1025//   - typ is the register type of the `r`.
1026func (a *Allocator[I, B, F]) reconcileEdge(f F,
1027	r RealReg,
1028	pred B,
1029	currentState, desiredState *vrState[I, B, F],
1030	freeReg VReg,
1031	typ RegType,
1032) {
1033	desiredVReg := desiredState.v
1034	currentVReg := VRegInvalid
1035	if currentState != nil {
1036		currentVReg = currentState.v
1037	}
1038	// There are four cases to consider:
1039	// 1. currentVReg is valid, but desiredVReg is on the stack.
1040	// 2. Both currentVReg and desiredVReg are valid.
1041	// 3. Desired is on a different register than `r` and currentReg is not valid.
1042	// 4. Desired is on the stack and currentReg is not valid.
1043
1044	s := &a.state
1045	if currentVReg.Valid() {
1046		er := desiredState.r
1047		if er == RealRegInvalid {
1048			// Case 1: currentVReg is valid, but desiredVReg is on the stack.
1049			if wazevoapi.RegAllocLoggingEnabled {
1050				fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
1051					desiredVReg.ID(), a.regInfo.RealRegName(r),
1052				)
1053			}
1054			// We need to move the current value to the stack, and reload the desired value into the register.
1055			// TODO: we can do better here.
1056			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
1057			s.releaseRealReg(r)
1058
1059			desiredState.recordReload(f, pred)
1060			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
1061			s.useRealReg(r, desiredState)
1062			return
1063		} else {
1064			// Case 2: Both currentVReg and desiredVReg are valid.
1065			if wazevoapi.RegAllocLoggingEnabled {
1066				fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
1067					desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
1068				)
1069			}
1070			// This case, we need to swap the values between the current and desired values.
1071			f.SwapBefore(
1072				currentVReg.SetRealReg(r),
1073				desiredVReg.SetRealReg(er),
1074				freeReg,
1075				pred.LastInstrForInsertion(),
1076			)
1077			s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
1078			s.releaseRealReg(r)
1079			s.releaseRealReg(er)
1080			s.useRealReg(r, desiredState)
1081			s.useRealReg(er, currentState)
1082			if wazevoapi.RegAllocLoggingEnabled {
1083				fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
1084			}
1085		}
1086	} else {
1087		if wazevoapi.RegAllocLoggingEnabled {
1088			fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
1089				desiredVReg.ID(), a.regInfo.RealRegName(r),
1090			)
1091		}
1092		if currentReg := desiredState.r; currentReg != RealRegInvalid {
1093			// Case 3: Desired is on a different register than `r` and currentReg is not valid.
1094			// We simply need to move the desired value to the register.
1095			f.InsertMoveBefore(
1096				FromRealReg(r, typ),
1097				desiredVReg.SetRealReg(currentReg),
1098				pred.LastInstrForInsertion(),
1099			)
1100			s.releaseRealReg(currentReg)
1101		} else {
1102			// Case 4: Both currentVReg and desiredVReg are not valid.
1103			// We simply need to reload the desired value into the register.
1104			desiredState.recordReload(f, pred)
1105			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
1106		}
1107		s.useRealReg(r, desiredState)
1108	}
1109}
1110
1111func (a *Allocator[I, B, F]) scheduleSpills(f F) {
1112	states := a.state.vrStates
1113	for i := 0; i <= states.MaxIDEncountered(); i++ {
1114		vs := states.Get(i)
1115		if vs == nil {
1116			continue
1117		}
1118		if vs.spilled {
1119			a.scheduleSpill(f, vs)
1120		}
1121	}
1122}
1123
1124func (a *Allocator[I, B, F]) scheduleSpill(f F, vs *vrState[I, B, F]) {
1125	v := vs.v
1126	// If the value is the phi value, we need to insert a spill after each phi definition.
1127	if vs.isPhi {
1128		for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
1129			f.StoreRegisterAfter(defInstr.v, defInstr.instr)
1130		}
1131		return
1132	}
1133
1134	pos := vs.lca
1135	definingBlk := vs.defBlk
1136	r := RealRegInvalid
1137	var nilBlk B
1138	if definingBlk == nilBlk {
1139		panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
1140	}
1141	if pos == nilBlk {
1142		panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
1143	}
1144
1145	if wazevoapi.RegAllocLoggingEnabled {
1146		fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
1147	}
1148	for pos != definingBlk {
1149		st := a.getOrAllocateBlockState(pos.ID())
1150		for rr := RealReg(0); rr < 64; rr++ {
1151			if vs := st.startRegs.get(rr); vs != nil && vs.v == v {
1152				r = rr
1153				// Already in the register, so we can place the spill at the beginning of the block.
1154				break
1155			}
1156		}
1157
1158		if r != RealRegInvalid {
1159			break
1160		}
1161
1162		pos = f.Idom(pos)
1163	}
1164
1165	if pos == definingBlk {
1166		defInstr := vs.defInstr
1167		defInstr.Defs(&a.vs)
1168		if wazevoapi.RegAllocLoggingEnabled {
1169			fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
1170		}
1171		f.StoreRegisterAfter(a.vs[0], defInstr)
1172	} else {
1173		// Found an ancestor block that holds the value in the register at the beginning of the block.
1174		// We need to insert a spill before the last use.
1175		first := pos.FirstInstr()
1176		if wazevoapi.RegAllocLoggingEnabled {
1177			fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
1178		}
1179		f.StoreRegisterAfter(v.SetRealReg(r), first)
1180	}
1181}
1182
1183// Reset resets the allocator's internal state so that it can be reused.
1184func (a *Allocator[I, B, F]) Reset() {
1185	a.state.reset()
1186	a.blockStates.Reset()
1187	a.phiDefInstListPool.Reset()
1188	a.vs = a.vs[:0]
1189}