1// Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
2// api.go.
3//
4// References:
5// - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
6// - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
7// - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
8// - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
9// - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
10package regalloc
11
12import (
13 "fmt"
14 "math"
15 "strings"
16
17 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
18)
19
20// NewAllocator returns a new Allocator.
21func NewAllocator[I Instr, B Block[I], F Function[I, B]](allocatableRegs *RegisterInfo) Allocator[I, B, F] {
22 a := Allocator[I, B, F]{
23 regInfo: allocatableRegs,
24 phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]),
25 blockStates: wazevoapi.NewIDedPool[blockState[I, B, F]](resetBlockState[I, B, F]),
26 }
27 a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B, F]](resetVrState[I, B, F])
28 a.state.reset()
29 for _, regs := range allocatableRegs.AllocatableRegisters {
30 for _, r := range regs {
31 a.allocatableSet = a.allocatableSet.add(r)
32 }
33 }
34 return a
35}
36
37type (
38 // RegisterInfo holds the statically-known ISA-specific register information.
39 RegisterInfo struct {
40 // AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
41 // The order matters: the first element is the most preferred one when allocating.
42 AllocatableRegisters [NumRegType][]RealReg
43 CalleeSavedRegisters RegSet
44 CallerSavedRegisters RegSet
45 RealRegToVReg []VReg
46 // RealRegName returns the name of the given RealReg for debugging.
47 RealRegName func(r RealReg) string
48 RealRegType func(r RealReg) RegType
49 }
50
51 // Allocator is a register allocator.
52 Allocator[I Instr, B Block[I], F Function[I, B]] struct {
53 // regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
54 regInfo *RegisterInfo
55 // allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
56 allocatableSet RegSet
57 allocatedCalleeSavedRegs []VReg
58 vs []VReg
59 ss []*vrState[I, B, F]
60 copies []_copy[I, B, F]
61 phiDefInstListPool wazevoapi.Pool[phiDefInstList[I]]
62
63 // Followings are re-used during various places.
64 blks []B
65 reals []RealReg
66
67 // Following two fields are updated while iterating the blocks in the reverse postorder.
68 state state[I, B, F]
69 blockStates wazevoapi.IDedPool[blockState[I, B, F]]
70 }
71
72 // _copy represents a source and destination pair of a copy instruction.
73 _copy[I Instr, B Block[I], F Function[I, B]] struct {
74 src *vrState[I, B, F]
75 dstID VRegID
76 }
77
78 // programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
79 programCounter int32
80
81 state[I Instr, B Block[I], F Function[I, B]] struct {
82 argRealRegs []VReg
83 regsInUse regInUseSet[I, B, F]
84 vrStates wazevoapi.IDedPool[vrState[I, B, F]]
85
86 currentBlockID int32
87
88 // allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
89 allocatedRegSet RegSet
90 }
91
92 blockState[I Instr, B Block[I], F Function[I, B]] struct {
93 // liveIns is a list of VReg that are live at the beginning of the block.
94 liveIns []*vrState[I, B, F]
95 // seen is true if the block is visited during the liveness analysis.
96 seen bool
97 // visited is true if the block is visited during the allocation phase.
98 visited bool
99 startFromPredIndex int
100 // startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
101 startRegs regInUseSet[I, B, F]
102 // endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
103 endRegs regInUseSet[I, B, F]
104 }
105
106 vrState[I Instr, B Block[I], f Function[I, B]] struct {
107 v VReg
108 r RealReg
109 // defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
110 defInstr I
111 // defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
112 defBlk B
113 // lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
114 // reloads this value. This is used to determine the spill location. Only valid if spilled=true.
115 lca B
116 // lastUse is the program counter of the last use of this value. This changes while iterating the block, and
117 // should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
118 lastUse programCounter
119 lastUseUpdatedAtBlockID int32
120 // spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
121 //
122 // Note that this field is used during liveness analysis for different purpose. This is used to determine the
123 // value is live-in or not.
124 spilled bool
125 // isPhi is true if this is a phi value.
126 isPhi bool
127 desiredLoc desiredLoc
128 // phiDefInstList is a list of instructions that defines this phi value.
129 // This is used to determine the spill location, and only valid if isPhi=true.
130 *phiDefInstList[I]
131 }
132
133 // phiDefInstList is a linked list of instructions that defines a phi value.
134 phiDefInstList[I Instr] struct {
135 instr I
136 v VReg
137 next *phiDefInstList[I]
138 }
139
140 // desiredLoc represents a desired location for a VReg.
141 desiredLoc uint16
142 // desiredLocKind is a kind of desired location for a VReg.
143 desiredLocKind uint16
144)
145
146const (
147 // desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified.
148 desiredLocKindUnspecified desiredLocKind = iota
149 // desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values.
150 desiredLocKindStack
151 // desiredLocKindReg is a kind of desired location for a VReg that is in a register.
152 desiredLocKindReg
153 desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified)
154 desiredLocStack = desiredLoc(desiredLocKindStack)
155)
156
157func newDesiredLocReg(r RealReg) desiredLoc {
158 return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2)
159}
160
161func (d desiredLoc) realReg() RealReg {
162 return RealReg(d >> 2)
163}
164
165func (d desiredLoc) stack() bool {
166 return d&3 == desiredLoc(desiredLocKindStack)
167}
168
169func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) {
170 var nilInstr I
171 l.instr = nilInstr
172 l.next = nil
173 l.v = VRegInvalid
174}
175
176func (s *state[I, B, F]) dump(info *RegisterInfo) { //nolint:unused
177 fmt.Println("\t\tstate:")
178 fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
179 fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
180 fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
181 fmt.Println("\t\t\tused:", s.regsInUse.format(info))
182 var strs []string
183 for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ {
184 vs := s.vrStates.Get(i)
185 if vs == nil {
186 continue
187 }
188 if vs.r != RealRegInvalid {
189 strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r)))
190 }
191 }
192 fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
193}
194
195func (s *state[I, B, F]) reset() {
196 s.argRealRegs = s.argRealRegs[:0]
197 s.vrStates.Reset()
198 s.allocatedRegSet = RegSet(0)
199 s.regsInUse.reset()
200 s.currentBlockID = -1
201}
202
203func resetVrState[I Instr, B Block[I], F Function[I, B]](vs *vrState[I, B, F]) {
204 vs.v = VRegInvalid
205 vs.r = RealRegInvalid
206 var nilInstr I
207 vs.defInstr = nilInstr
208 var nilBlk B
209 vs.defBlk = nilBlk
210 vs.spilled = false
211 vs.lastUse = -1
212 vs.lastUseUpdatedAtBlockID = -1
213 vs.lca = nilBlk
214 vs.isPhi = false
215 vs.phiDefInstList = nil
216 vs.desiredLoc = desiredLocUnspecified
217}
218
219func (s *state[I, B, F]) getOrAllocateVRegState(v VReg) *vrState[I, B, F] {
220 st := s.vrStates.GetOrAllocate(int(v.ID()))
221 if st.v == VRegInvalid {
222 st.v = v
223 }
224 return st
225}
226
227func (s *state[I, B, F]) getVRegState(v VRegID) *vrState[I, B, F] {
228 return s.vrStates.Get(int(v))
229}
230
231func (s *state[I, B, F]) useRealReg(r RealReg, vr *vrState[I, B, F]) {
232 s.regsInUse.add(r, vr)
233 vr.r = r
234 s.allocatedRegSet = s.allocatedRegSet.add(r)
235}
236
237func (s *state[I, B, F]) releaseRealReg(r RealReg) {
238 current := s.regsInUse.get(r)
239 if current != nil {
240 s.regsInUse.remove(r)
241 current.r = RealRegInvalid
242 }
243}
244
245// recordReload records that the given VReg is reloaded in the given block.
246// This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
247func (vs *vrState[I, B, F]) recordReload(f F, blk B) {
248 vs.spilled = true
249 var nilBlk B
250 if lca := vs.lca; lca == nilBlk {
251 if wazevoapi.RegAllocLoggingEnabled {
252 fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
253 }
254 vs.lca = blk
255 } else if lca != blk {
256 if wazevoapi.RegAllocLoggingEnabled {
257 fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
258 }
259 vs.lca = f.LowestCommonAncestor(lca, blk)
260 if wazevoapi.RegAllocLoggingEnabled {
261 fmt.Printf("updated lca=%d\n", vs.lca.ID())
262 }
263 }
264}
265
266func (a *Allocator[I, B, F]) findOrSpillAllocatable(s *state[I, B, F], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
267 r = RealRegInvalid
268 // First, check if the preferredMask has any allocatable register.
269 if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
270 return preferred
271 }
272
273 var lastUseAt programCounter
274 var spillVReg VReg
275 for _, candidateReal := range allocatable {
276 if forbiddenMask.has(candidateReal) {
277 continue
278 }
279
280 using := s.regsInUse.get(candidateReal)
281 if using == nil {
282 // This is not used at this point.
283 return candidateReal
284 }
285
286 // Real registers in use should not be spilled, so we skip them.
287 // For example, if the register is used as an argument register, and it might be
288 // spilled and not reloaded when it ends up being used as a temporary to pass
289 // stack based argument.
290 if using.v.IsRealReg() {
291 continue
292 }
293
294 isPreferred := candidateReal == preferred
295
296 // last == -1 means the value won't be used anymore.
297 if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
298 lastUseAt = last
299 r = candidateReal
300 spillVReg = using.v
301 if isPreferred {
302 break
303 }
304 }
305 }
306
307 if r == RealRegInvalid {
308 panic("not found any allocatable register")
309 }
310
311 if wazevoapi.RegAllocLoggingEnabled {
312 fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo))
313 }
314 s.releaseRealReg(r)
315 return r
316}
317
318func (s *state[I, B, F]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
319 for _, r := range allocatable {
320 if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
321 return r
322 }
323 }
324 return RealRegInvalid
325}
326
327func (s *state[I, B, F]) resetAt(bs *blockState[I, B, F]) {
328 s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B, F]) {
329 vs.r = RealRegInvalid
330 })
331 s.regsInUse.reset()
332 bs.endRegs.range_(func(r RealReg, vs *vrState[I, B, F]) {
333 if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn {
334 s.regsInUse.add(r, vs)
335 vs.r = r
336 }
337 })
338}
339
340func resetBlockState[I Instr, B Block[I], F Function[I, B]](b *blockState[I, B, F]) {
341 b.seen = false
342 b.visited = false
343 b.endRegs.reset()
344 b.startRegs.reset()
345 b.startFromPredIndex = -1
346 b.liveIns = b.liveIns[:0]
347}
348
349func (b *blockState[I, B, F]) dump(a *RegisterInfo) {
350 fmt.Println("\t\tblockState:")
351 fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
352 fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
353 fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
354 fmt.Println("\t\t\tvisited:", b.visited)
355}
356
357// DoAllocation performs register allocation on the given Function.
358func (a *Allocator[I, B, F]) DoAllocation(f F) {
359 a.livenessAnalysis(f)
360 a.alloc(f)
361 a.determineCalleeSavedRealRegs(f)
362}
363
364func (a *Allocator[I, B, F]) determineCalleeSavedRealRegs(f F) {
365 a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
366 a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
367 if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
368 a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
369 }
370 })
371 f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
372}
373
374func (a *Allocator[I, B, F]) getOrAllocateBlockState(blockID int32) *blockState[I, B, F] {
375 return a.blockStates.GetOrAllocate(int(blockID))
376}
377
378// phiBlk returns the block that defines the given phi value, nil otherwise.
379func (vs *vrState[I, B, F]) phiBlk() B {
380 if vs.isPhi {
381 return vs.defBlk
382 }
383 var nilBlk B
384 return nilBlk
385}
386
387const (
388 programCounterLiveIn = math.MinInt32
389 programCounterLiveOut = math.MaxInt32
390)
391
392// liveAnalysis constructs Allocator.blockLivenessData.
393// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
394func (a *Allocator[I, B, F]) livenessAnalysis(f F) {
395 s := &a.state
396
397 for i := VRegID(0); i < vRegIDReservedForRealNum; i++ {
398 s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i)))
399 }
400
401 var nilBlk B
402 var nilInstr I
403 for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() {
404 // We should gather phi value data.
405 for _, p := range f.BlockParams(blk, &a.vs) {
406 vs := s.getOrAllocateVRegState(p)
407 vs.isPhi = true
408 vs.defBlk = blk
409 }
410
411 blkID := blk.ID()
412 info := a.getOrAllocateBlockState(blkID)
413
414 a.ss = a.ss[:0]
415 const (
416 flagDeleted = false
417 flagLive = true
418 )
419 ns := blk.Succs()
420 for i := 0; i < ns; i++ {
421 succ := f.Succ(blk, i)
422 if succ == nilBlk {
423 continue
424 }
425
426 succID := succ.ID()
427 succInfo := a.getOrAllocateBlockState(succID)
428 if !succInfo.seen { // This means the back edge.
429 continue
430 }
431
432 for _, st := range succInfo.liveIns {
433 if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple
434 // We use .spilled field to store the flag.
435 st.spilled = flagLive
436 a.ss = append(a.ss, st)
437 }
438 }
439 }
440
441 for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() {
442
443 var use, def VReg
444 var defIsPhi bool
445 for _, def = range instr.Defs(&a.vs) {
446 if !def.IsRealReg() {
447 st := s.getOrAllocateVRegState(def)
448 defIsPhi = st.isPhi
449 // Note: We use .spilled field to store the flag.
450 st.spilled = flagDeleted
451 }
452 }
453 for _, use = range instr.Uses(&a.vs) {
454 if !use.IsRealReg() {
455 st := s.getOrAllocateVRegState(use)
456 // Note: We use .spilled field to store the flag.
457 if st.spilled != flagLive { //nolint:gosimple
458 st.spilled = flagLive
459 a.ss = append(a.ss, st)
460 }
461 }
462 }
463
464 if defIsPhi {
465 if use.Valid() && use.IsRealReg() {
466 // If the destination is a phi value, and the source is a real register, this is the beginning of the function.
467 a.state.argRealRegs = append(a.state.argRealRegs, use)
468 }
469 }
470 }
471
472 for _, st := range a.ss {
473 // We use .spilled field to store the flag.
474 if st.spilled == flagLive { //nolint:gosimple
475 info.liveIns = append(info.liveIns, st)
476 st.spilled = false
477 }
478 }
479
480 info.seen = true
481 }
482
483 nrs := f.LoopNestingForestRoots()
484 for i := 0; i < nrs; i++ {
485 root := f.LoopNestingForestRoot(i)
486 a.loopTreeDFS(f, root)
487 }
488}
489
490// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
491func (a *Allocator[I, B, F]) loopTreeDFS(f F, entry B) {
492 a.blks = a.blks[:0]
493 a.blks = append(a.blks, entry)
494
495 for len(a.blks) > 0 {
496 tail := len(a.blks) - 1
497 loop := a.blks[tail]
498 a.blks = a.blks[:tail]
499 a.ss = a.ss[:0]
500 const (
501 flagDone = false
502 flagPending = true
503 )
504 info := a.getOrAllocateBlockState(loop.ID())
505 for _, st := range info.liveIns {
506 if st.phiBlk() != loop {
507 a.ss = append(a.ss, st)
508 // We use .spilled field to store the flag.
509 st.spilled = flagPending
510 }
511 }
512
513 var siblingAddedView []*vrState[I, B, F]
514 cn := loop.LoopNestingForestChildren()
515 for i := 0; i < cn; i++ {
516 child := f.LoopNestingForestChild(loop, i)
517 childID := child.ID()
518 childInfo := a.getOrAllocateBlockState(childID)
519
520 if i == 0 {
521 begin := len(childInfo.liveIns)
522 for _, st := range a.ss {
523 // We use .spilled field to store the flag.
524 if st.spilled == flagPending { //nolint:gosimple
525 st.spilled = flagDone
526 // TODO: deduplicate, though I don't think it has much impact.
527 childInfo.liveIns = append(childInfo.liveIns, st)
528 }
529 }
530 siblingAddedView = childInfo.liveIns[begin:]
531 } else {
532 // TODO: deduplicate, though I don't think it has much impact.
533 childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...)
534 }
535
536 if child.LoopHeader() {
537 a.blks = append(a.blks, child)
538 }
539 }
540
541 if cn == 0 {
542 // If there's no forest child, we haven't cleared the .spilled field at this point.
543 for _, st := range a.ss {
544 st.spilled = false
545 }
546 }
547 }
548}
549
550// alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
551// The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
552// In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
553// one of its predecessors. Each block inherits the selected state and starts allocation from there.
554// If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
555// The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
556// at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
557// the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
558//
559// All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
560func (a *Allocator[I, B, F]) alloc(f F) {
561 // First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
562 var nilBlk B
563 for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
564 if wazevoapi.RegAllocLoggingEnabled {
565 fmt.Printf("========== allocating blk%d ========\n", blk.ID())
566 }
567 if blk.Entry() {
568 a.finalizeStartReg(f, blk)
569 }
570 a.allocBlock(f, blk)
571 }
572 // After the allocation, we all know the start and end state of each block. So we can fix the merge states.
573 for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
574 a.fixMergeState(f, blk)
575 }
576 // Finally, we insert the spill instructions as we know all the places where the reloads happen.
577 a.scheduleSpills(f)
578}
579
580func (a *Allocator[I, B, F]) updateLiveInVRState(liveness *blockState[I, B, F]) {
581 currentBlockID := a.state.currentBlockID
582 for _, vs := range liveness.liveIns {
583 vs.lastUse = programCounterLiveIn
584 vs.lastUseUpdatedAtBlockID = currentBlockID
585 }
586}
587
588func (a *Allocator[I, B, F]) finalizeStartReg(f F, blk B) {
589 bID := blk.ID()
590 s := &a.state
591 currentBlkState := a.getOrAllocateBlockState(bID)
592 if currentBlkState.startFromPredIndex > -1 {
593 return
594 }
595
596 s.currentBlockID = bID
597 a.updateLiveInVRState(currentBlkState)
598
599 preds := blk.Preds()
600 var predState *blockState[I, B, F]
601 switch preds {
602 case 0: // This is the entry block.
603 case 1:
604 predID := f.Pred(blk, 0).ID()
605 predState = a.getOrAllocateBlockState(predID)
606 currentBlkState.startFromPredIndex = 0
607 default:
608 // TODO: there should be some better heuristic to choose the predecessor.
609 for i := 0; i < preds; i++ {
610 predID := f.Pred(blk, i).ID()
611 if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
612 predState = _predState
613 currentBlkState.startFromPredIndex = i
614 break
615 }
616 }
617 }
618 if predState == nil {
619 if !blk.Entry() {
620 panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
621 }
622 for _, u := range s.argRealRegs {
623 s.useRealReg(u.RealReg(), s.getVRegState(u.ID()))
624 }
625 currentBlkState.startFromPredIndex = 0
626 } else {
627 if wazevoapi.RegAllocLoggingEnabled {
628 fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
629 bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
630 }
631 s.resetAt(predState)
632 }
633
634 s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) {
635 currentBlkState.startRegs.add(allocated, v)
636 })
637 if wazevoapi.RegAllocLoggingEnabled {
638 fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo))
639 }
640}
641
642func (a *Allocator[I, B, F]) allocBlock(f F, blk B) {
643 bID := blk.ID()
644 s := &a.state
645 currentBlkState := a.getOrAllocateBlockState(bID)
646 s.currentBlockID = bID
647
648 if currentBlkState.startFromPredIndex < 0 {
649 panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock")
650 }
651
652 // Clears the previous state.
653 s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { vr.r = RealRegInvalid })
654 s.regsInUse.reset()
655 // Then set the start state.
656 currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { s.useRealReg(allocatedRealReg, vr) })
657
658 desiredUpdated := a.ss[:0]
659
660 // Update the last use of each VReg.
661 a.copies = a.copies[:0] // Stores the copy instructions.
662 var pc programCounter
663 var nilInstr I
664 for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
665 var useState *vrState[I, B, F]
666 for _, use := range instr.Uses(&a.vs) {
667 useState = s.getVRegState(use.ID())
668 if !use.IsRealReg() {
669 useState.lastUse = pc
670 }
671 }
672
673 if instr.IsCopy() {
674 def := instr.Defs(&a.vs)[0]
675 a.copies = append(a.copies, _copy[I, B, F]{src: useState, dstID: def.ID()})
676 r := def.RealReg()
677 if r != RealRegInvalid {
678 if !useState.isPhi { // TODO: no idea why do we need this.
679 useState.desiredLoc = newDesiredLocReg(r)
680 desiredUpdated = append(desiredUpdated, useState)
681 }
682 }
683 }
684 pc++
685 }
686
687 // Mark all live-out values by checking live-in of the successors.
688 // While doing so, we also update the desired register values.
689 var succ B
690 var nilBlk B
691 for i, ns := 0, blk.Succs(); i < ns; i++ {
692 succ = f.Succ(blk, i)
693 if succ == nilBlk {
694 continue
695 }
696
697 succID := succ.ID()
698 succState := a.getOrAllocateBlockState(succID)
699 for _, st := range succState.liveIns {
700 if st.phiBlk() != succ {
701 st.lastUse = programCounterLiveOut
702 }
703 }
704
705 if succState.startFromPredIndex > -1 {
706 if wazevoapi.RegAllocLoggingEnabled {
707 fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
708 }
709 succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B, F]) {
710 vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
711 desiredUpdated = append(desiredUpdated, vs)
712 })
713 for _, p := range f.BlockParams(succ, &a.vs) {
714 vs := s.getVRegState(p.ID())
715 if vs.desiredLoc.realReg() == RealRegInvalid {
716 vs.desiredLoc = desiredLocStack
717 desiredUpdated = append(desiredUpdated, vs)
718 }
719 }
720 }
721 }
722
723 // Propagate the desired register values from the end of the block to the beginning.
724 for _, instr := range a.copies {
725 defState := s.getVRegState(instr.dstID)
726 desired := defState.desiredLoc.realReg()
727 useState := instr.src
728 if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified {
729 useState.desiredLoc = newDesiredLocReg(desired)
730 desiredUpdated = append(desiredUpdated, useState)
731 }
732 }
733
734 pc = 0
735 for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
736 if wazevoapi.RegAllocLoggingEnabled {
737 fmt.Println(instr)
738 }
739
740 var currentUsedSet RegSet
741 killSet := a.reals[:0]
742
743 // Gather the set of registers that will be used in the current instruction.
744 uses := instr.Uses(&a.vs)
745 for _, use := range uses {
746 if use.IsRealReg() {
747 r := use.RealReg()
748 currentUsedSet = currentUsedSet.add(r)
749 if a.allocatableSet.has(r) {
750 killSet = append(killSet, r)
751 }
752 } else {
753 vs := s.getVRegState(use.ID())
754 if r := vs.r; r != RealRegInvalid {
755 currentUsedSet = currentUsedSet.add(r)
756 }
757 }
758 }
759
760 for i, use := range uses {
761 if !use.IsRealReg() {
762 vs := s.getVRegState(use.ID())
763 killed := vs.lastUse == pc
764 r := vs.r
765
766 if r == RealRegInvalid {
767 r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
768 // Prefer the desired register if it's available.
769 vs.desiredLoc.realReg())
770 vs.recordReload(f, blk)
771 f.ReloadRegisterBefore(use.SetRealReg(r), instr)
772 s.useRealReg(r, vs)
773 }
774 if wazevoapi.RegAllocLoggingEnabled {
775 fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
776 }
777 instr.AssignUse(i, use.SetRealReg(r))
778 currentUsedSet = currentUsedSet.add(r)
779 if killed {
780 if wazevoapi.RegAllocLoggingEnabled {
781 fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
782 }
783 killSet = append(killSet, r)
784 }
785 }
786 }
787
788 isIndirect := instr.IsIndirectCall()
789 if instr.IsCall() || isIndirect {
790 addr := RealRegInvalid
791 if isIndirect {
792 addr = a.vs[0].RealReg()
793 }
794 a.releaseCallerSavedRegs(addr)
795 }
796
797 for _, r := range killSet {
798 s.releaseRealReg(r)
799 }
800 a.reals = killSet
801
802 defs := instr.Defs(&a.vs)
803 switch len(defs) {
804 default:
805 // Some instructions define multiple values on real registers.
806 // E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
807 //
808 // Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs
809 // that require allocations. If we need to support such case, we need to add the logic to handle it here,
810 // though is there any such instruction?
811 for _, def := range defs {
812 if !def.IsRealReg() {
813 panic("BUG: multiple defs should be on real registers")
814 }
815 r := def.RealReg()
816 if s.regsInUse.has(r) {
817 s.releaseRealReg(r)
818 }
819 s.useRealReg(r, s.getVRegState(def.ID()))
820 }
821 case 0:
822 case 1:
823 def := defs[0]
824 vState := s.getVRegState(def.ID())
825 if def.IsRealReg() {
826 r := def.RealReg()
827 if a.allocatableSet.has(r) {
828 if s.regsInUse.has(r) {
829 s.releaseRealReg(r)
830 }
831 s.useRealReg(r, vState)
832 }
833 } else {
834 r := vState.r
835
836 if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
837 if r != desired {
838 if (vState.isPhi && vState.defBlk == succ) ||
839 // If this is not a phi and it's already assigned a real reg,
840 // this value has multiple definitions, hence we cannot assign the desired register.
841 (!s.regsInUse.has(desired) && r == RealRegInvalid) {
842 // If the phi value is passed via a real register, we force the value to be in the desired register.
843 if wazevoapi.RegAllocLoggingEnabled {
844 fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired))
845 }
846 if r != RealRegInvalid {
847 // If the value is already in a different real register, we release it to change the state.
848 // Otherwise, multiple registers might have the same values at the end, which results in
849 // messing up the merge state reconciliation.
850 s.releaseRealReg(r)
851 }
852 r = desired
853 s.releaseRealReg(r)
854 s.useRealReg(r, vState)
855 }
856 }
857 }
858
859 // Allocate a new real register if `def` is not currently assigned one.
860 // It can happen when multiple instructions define the same VReg (e.g. const loads).
861 if r == RealRegInvalid {
862 if instr.IsCopy() {
863 copySrc := instr.Uses(&a.vs)[0].RealReg()
864 if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
865 r = copySrc
866 }
867 }
868 if r == RealRegInvalid {
869 typ := def.RegType()
870 r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
871 }
872 s.useRealReg(r, vState)
873 }
874 dr := def.SetRealReg(r)
875 instr.AssignDef(dr)
876 if wazevoapi.RegAllocLoggingEnabled {
877 fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
878 }
879 if vState.isPhi {
880 if vState.desiredLoc.stack() { // Stack based phi value.
881 f.StoreRegisterAfter(dr, instr)
882 // Release the real register as it's not used anymore.
883 s.releaseRealReg(r)
884 } else {
885 // Only the register based phis are necessary to track the defining instructions
886 // since the stack-based phis are already having stores inserted ^.
887 n := a.phiDefInstListPool.Allocate()
888 n.instr = instr
889 n.next = vState.phiDefInstList
890 n.v = dr
891 vState.phiDefInstList = n
892 }
893 } else {
894 vState.defInstr = instr
895 vState.defBlk = blk
896 }
897 }
898 }
899 if wazevoapi.RegAllocLoggingEnabled {
900 fmt.Println(instr)
901 }
902 pc++
903 }
904
905 s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.endRegs.add(allocated, v) })
906
907 currentBlkState.visited = true
908 if wazevoapi.RegAllocLoggingEnabled {
909 currentBlkState.dump(a.regInfo)
910 }
911
912 // Reset the desired end location.
913 for _, vs := range desiredUpdated {
914 vs.desiredLoc = desiredLocUnspecified
915 }
916 a.ss = desiredUpdated[:0]
917
918 for i := 0; i < blk.Succs(); i++ {
919 succ := f.Succ(blk, i)
920 if succ == nilBlk {
921 continue
922 }
923 // If the successor is not visited yet, finalize the start state.
924 a.finalizeStartReg(f, succ)
925 }
926}
927
928func (a *Allocator[I, B, F]) releaseCallerSavedRegs(addrReg RealReg) {
929 s := &a.state
930
931 for allocated := RealReg(0); allocated < 64; allocated++ {
932 if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
933 continue
934 }
935 if vs := s.regsInUse.get(allocated); vs != nil {
936 if vs.v.IsRealReg() {
937 continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
938 }
939 if !a.regInfo.CallerSavedRegisters.has(allocated) {
940 // If this is not a caller-saved register, it is safe to keep it across the call.
941 continue
942 }
943 s.releaseRealReg(allocated)
944 }
945 }
946}
947
948func (a *Allocator[I, B, F]) fixMergeState(f F, blk B) {
949 preds := blk.Preds()
950 if preds <= 1 {
951 return
952 }
953
954 s := &a.state
955
956 // Restores the state at the beginning of the block.
957 bID := blk.ID()
958 blkSt := a.getOrAllocateBlockState(bID)
959 desiredOccupants := &blkSt.startRegs
960 var desiredOccupantsSet RegSet
961 for i, v := range desiredOccupants {
962 if v != nil {
963 desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
964 }
965 }
966
967 if wazevoapi.RegAllocLoggingEnabled {
968 fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
969 }
970
971 s.currentBlockID = bID
972 a.updateLiveInVRState(blkSt)
973
974 for i := 0; i < preds; i++ {
975 if i == blkSt.startFromPredIndex {
976 continue
977 }
978
979 pred := f.Pred(blk, i)
980 predSt := a.getOrAllocateBlockState(pred.ID())
981
982 s.resetAt(predSt)
983
984 // Finds the free registers if any.
985 intTmp, floatTmp := VRegInvalid, VRegInvalid
986 if intFree := s.findAllocatable(
987 a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,
988 ); intFree != RealRegInvalid {
989 intTmp = FromRealReg(intFree, RegTypeInt)
990 }
991 if floatFree := s.findAllocatable(
992 a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,
993 ); floatFree != RealRegInvalid {
994 floatTmp = FromRealReg(floatFree, RegTypeFloat)
995 }
996
997 for r := RealReg(0); r < 64; r++ {
998 desiredVReg := desiredOccupants.get(r)
999 if desiredVReg == nil {
1000 continue
1001 }
1002
1003 currentVReg := s.regsInUse.get(r)
1004 if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() {
1005 continue
1006 }
1007
1008 typ := desiredVReg.v.RegType()
1009 var tmpRealReg VReg
1010 if typ == RegTypeInt {
1011 tmpRealReg = intTmp
1012 } else {
1013 tmpRealReg = floatTmp
1014 }
1015 a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)
1016 }
1017 }
1018}
1019
1020// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`.
1021//
1022// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor.
1023// - desiredVReg is the desired VReg value that should be on the register `r`.
1024// - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
1025// - typ is the register type of the `r`.
1026func (a *Allocator[I, B, F]) reconcileEdge(f F,
1027 r RealReg,
1028 pred B,
1029 currentState, desiredState *vrState[I, B, F],
1030 freeReg VReg,
1031 typ RegType,
1032) {
1033 desiredVReg := desiredState.v
1034 currentVReg := VRegInvalid
1035 if currentState != nil {
1036 currentVReg = currentState.v
1037 }
1038 // There are four cases to consider:
1039 // 1. currentVReg is valid, but desiredVReg is on the stack.
1040 // 2. Both currentVReg and desiredVReg are valid.
1041 // 3. Desired is on a different register than `r` and currentReg is not valid.
1042 // 4. Desired is on the stack and currentReg is not valid.
1043
1044 s := &a.state
1045 if currentVReg.Valid() {
1046 er := desiredState.r
1047 if er == RealRegInvalid {
1048 // Case 1: currentVReg is valid, but desiredVReg is on the stack.
1049 if wazevoapi.RegAllocLoggingEnabled {
1050 fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
1051 desiredVReg.ID(), a.regInfo.RealRegName(r),
1052 )
1053 }
1054 // We need to move the current value to the stack, and reload the desired value into the register.
1055 // TODO: we can do better here.
1056 f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
1057 s.releaseRealReg(r)
1058
1059 desiredState.recordReload(f, pred)
1060 f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
1061 s.useRealReg(r, desiredState)
1062 return
1063 } else {
1064 // Case 2: Both currentVReg and desiredVReg are valid.
1065 if wazevoapi.RegAllocLoggingEnabled {
1066 fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
1067 desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
1068 )
1069 }
1070 // This case, we need to swap the values between the current and desired values.
1071 f.SwapBefore(
1072 currentVReg.SetRealReg(r),
1073 desiredVReg.SetRealReg(er),
1074 freeReg,
1075 pred.LastInstrForInsertion(),
1076 )
1077 s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
1078 s.releaseRealReg(r)
1079 s.releaseRealReg(er)
1080 s.useRealReg(r, desiredState)
1081 s.useRealReg(er, currentState)
1082 if wazevoapi.RegAllocLoggingEnabled {
1083 fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
1084 }
1085 }
1086 } else {
1087 if wazevoapi.RegAllocLoggingEnabled {
1088 fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
1089 desiredVReg.ID(), a.regInfo.RealRegName(r),
1090 )
1091 }
1092 if currentReg := desiredState.r; currentReg != RealRegInvalid {
1093 // Case 3: Desired is on a different register than `r` and currentReg is not valid.
1094 // We simply need to move the desired value to the register.
1095 f.InsertMoveBefore(
1096 FromRealReg(r, typ),
1097 desiredVReg.SetRealReg(currentReg),
1098 pred.LastInstrForInsertion(),
1099 )
1100 s.releaseRealReg(currentReg)
1101 } else {
1102 // Case 4: Both currentVReg and desiredVReg are not valid.
1103 // We simply need to reload the desired value into the register.
1104 desiredState.recordReload(f, pred)
1105 f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
1106 }
1107 s.useRealReg(r, desiredState)
1108 }
1109}
1110
1111func (a *Allocator[I, B, F]) scheduleSpills(f F) {
1112 states := a.state.vrStates
1113 for i := 0; i <= states.MaxIDEncountered(); i++ {
1114 vs := states.Get(i)
1115 if vs == nil {
1116 continue
1117 }
1118 if vs.spilled {
1119 a.scheduleSpill(f, vs)
1120 }
1121 }
1122}
1123
1124func (a *Allocator[I, B, F]) scheduleSpill(f F, vs *vrState[I, B, F]) {
1125 v := vs.v
1126 // If the value is the phi value, we need to insert a spill after each phi definition.
1127 if vs.isPhi {
1128 for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
1129 f.StoreRegisterAfter(defInstr.v, defInstr.instr)
1130 }
1131 return
1132 }
1133
1134 pos := vs.lca
1135 definingBlk := vs.defBlk
1136 r := RealRegInvalid
1137 var nilBlk B
1138 if definingBlk == nilBlk {
1139 panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
1140 }
1141 if pos == nilBlk {
1142 panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
1143 }
1144
1145 if wazevoapi.RegAllocLoggingEnabled {
1146 fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
1147 }
1148 for pos != definingBlk {
1149 st := a.getOrAllocateBlockState(pos.ID())
1150 for rr := RealReg(0); rr < 64; rr++ {
1151 if vs := st.startRegs.get(rr); vs != nil && vs.v == v {
1152 r = rr
1153 // Already in the register, so we can place the spill at the beginning of the block.
1154 break
1155 }
1156 }
1157
1158 if r != RealRegInvalid {
1159 break
1160 }
1161
1162 pos = f.Idom(pos)
1163 }
1164
1165 if pos == definingBlk {
1166 defInstr := vs.defInstr
1167 defInstr.Defs(&a.vs)
1168 if wazevoapi.RegAllocLoggingEnabled {
1169 fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
1170 }
1171 f.StoreRegisterAfter(a.vs[0], defInstr)
1172 } else {
1173 // Found an ancestor block that holds the value in the register at the beginning of the block.
1174 // We need to insert a spill before the last use.
1175 first := pos.FirstInstr()
1176 if wazevoapi.RegAllocLoggingEnabled {
1177 fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
1178 }
1179 f.StoreRegisterAfter(v.SetRealReg(r), first)
1180 }
1181}
1182
1183// Reset resets the allocator's internal state so that it can be reused.
1184func (a *Allocator[I, B, F]) Reset() {
1185 a.state.reset()
1186 a.blockStates.Reset()
1187 a.phiDefInstListPool.Reset()
1188 a.vs = a.vs[:0]
1189}