pass.go

  1package ssa
  2
  3import (
  4	"fmt"
  5
  6	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
  7)
  8
  9// RunPasses implements Builder.RunPasses.
 10//
 11// The order here matters; some pass depends on the previous ones.
 12//
 13// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks
 14// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.
 15func (b *builder) RunPasses() {
 16	b.runPreBlockLayoutPasses()
 17	b.runBlockLayoutPass()
 18	b.runPostBlockLayoutPasses()
 19	b.runFinalizingPasses()
 20}
 21
 22func (b *builder) runPreBlockLayoutPasses() {
 23	passSortSuccessors(b)
 24	passDeadBlockEliminationOpt(b)
 25	// The result of passCalculateImmediateDominators will be used by various passes below.
 26	passCalculateImmediateDominators(b)
 27	passRedundantPhiEliminationOpt(b)
 28	passNopInstElimination(b)
 29
 30	// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
 31	// 	WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case.
 32	// 	See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html
 33
 34	// TODO: implement more optimization passes like:
 35	// 	block coalescing.
 36	// 	Copy-propagation.
 37	// 	Constant folding.
 38	// 	Common subexpression elimination.
 39	// 	Arithmetic simplifications.
 40	// 	and more!
 41
 42	// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
 43	passDeadCodeEliminationOpt(b)
 44	b.donePreBlockLayoutPasses = true
 45}
 46
 47func (b *builder) runBlockLayoutPass() {
 48	if !b.donePreBlockLayoutPasses {
 49		panic("runBlockLayoutPass must be called after all pre passes are done")
 50	}
 51	passLayoutBlocks(b)
 52	b.doneBlockLayout = true
 53}
 54
 55// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable,
 56// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks.
 57func (b *builder) runPostBlockLayoutPasses() {
 58	if !b.doneBlockLayout {
 59		panic("runPostBlockLayoutPasses must be called after block layout pass is done")
 60	}
 61	// TODO: Do more. e.g. tail duplication, loop unrolling, etc.
 62
 63	b.donePostBlockLayoutPasses = true
 64}
 65
 66// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified.
 67func (b *builder) runFinalizingPasses() {
 68	if !b.donePostBlockLayoutPasses {
 69		panic("runFinalizingPasses must be called after post block layout passes are done")
 70	}
 71	// Critical edges are split, so we fix the loop nesting forest.
 72	passBuildLoopNestingForest(b)
 73	passBuildDominatorTree(b)
 74	// Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps.
 75	b.markFallthroughJumps()
 76}
 77
 78// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
 79func passDeadBlockEliminationOpt(b *builder) {
 80	entryBlk := b.entryBlk()
 81	b.blkStack = append(b.blkStack, entryBlk)
 82	for len(b.blkStack) > 0 {
 83		reachableBlk := b.blkStack[len(b.blkStack)-1]
 84		b.blkStack = b.blkStack[:len(b.blkStack)-1]
 85		reachableBlk.visited = 1
 86
 87		if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
 88			panic(fmt.Sprintf("%s is not sealed", reachableBlk))
 89		}
 90
 91		if wazevoapi.SSAValidationEnabled {
 92			reachableBlk.validate(b)
 93		}
 94
 95		for _, succ := range reachableBlk.success {
 96			if succ.visited == 1 {
 97				continue
 98			}
 99			b.blkStack = append(b.blkStack, succ)
100		}
101	}
102
103	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
104		if blk.visited != 1 {
105			blk.invalid = true
106		}
107		blk.visited = 0
108	}
109}
110
111// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).
112// This requires the reverse post-order traversal to be calculated before calling this function,
113// hence passCalculateImmediateDominators must be called before this.
114func passRedundantPhiEliminationOpt(b *builder) {
115	redundantParams := b.redundantParams[:0] // reuse the slice from previous iterations.
116
117	// TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the
118	//  same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact
119	//  that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is
120	//  relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,
121	//  the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
122	//  complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
123	//  -- Note --
124	// 	Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when
125	// 	running on the reverse post-order. It might be possible to optimize this further by using the dominator tree.
126	for {
127		changed := false
128		_ = b.blockIteratorReversePostOrderBegin() // skip entry block!
129		// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!
130		for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() {
131			params := blk.params.View()
132			paramNum := len(params)
133
134			for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
135				phiValue := params[paramIndex]
136				redundant := true
137
138				nonSelfReferencingValue := ValueInvalid
139				for predIndex := range blk.preds {
140					br := blk.preds[predIndex].branch
141					// Resolve the alias in the arguments so that we could use the previous iteration's result.
142					b.resolveArgumentAlias(br)
143					pred := br.vs.View()[paramIndex]
144					if pred == phiValue {
145						// This is self-referencing: PHI from the same PHI.
146						continue
147					}
148
149					if !nonSelfReferencingValue.Valid() {
150						nonSelfReferencingValue = pred
151						continue
152					}
153
154					if nonSelfReferencingValue != pred {
155						redundant = false
156						break
157					}
158				}
159
160				if !nonSelfReferencingValue.Valid() {
161					// This shouldn't happen, and must be a bug in builder.go.
162					panic("BUG: params added but only self-referencing")
163				}
164
165				if redundant {
166					redundantParams = append(redundantParams, redundantParam{
167						index: paramIndex, uniqueValue: nonSelfReferencingValue,
168					})
169				}
170			}
171
172			if len(redundantParams) == 0 {
173				continue
174			}
175			changed = true
176
177			// Remove the redundant PHIs from the argument list of branching instructions.
178			for predIndex := range blk.preds {
179				redundantParamsCur, predParamCur := 0, 0
180				predBlk := blk.preds[predIndex]
181				branchInst := predBlk.branch
182				view := branchInst.vs.View()
183				for argIndex, value := range view {
184					if len(redundantParams) == redundantParamsCur ||
185						redundantParams[redundantParamsCur].index != argIndex {
186						view[predParamCur] = value
187						predParamCur++
188					} else {
189						redundantParamsCur++
190					}
191				}
192				branchInst.vs.Cut(predParamCur)
193			}
194
195			// Still need to have the definition of the value of the PHI (previously as the parameter).
196			for i := range redundantParams {
197				redundantValue := &redundantParams[i]
198				phiValue := params[redundantValue.index]
199				// Create an alias in this block from the only phi argument to the phi value.
200				b.alias(phiValue, redundantValue.uniqueValue)
201			}
202
203			// Finally, Remove the param from the blk.
204			paramsCur, redundantParamsCur := 0, 0
205			for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
206				param := params[paramIndex]
207				if len(redundantParams) == redundantParamsCur || redundantParams[redundantParamsCur].index != paramIndex {
208					params[paramsCur] = param
209					paramsCur++
210				} else {
211					redundantParamsCur++
212				}
213			}
214			blk.params.Cut(paramsCur)
215
216			// Clears the map for the next iteration.
217			redundantParams = redundantParams[:0]
218		}
219
220		if !changed {
221			break
222		}
223	}
224
225	// Reuse the slice for the future passes.
226	b.redundantParams = redundantParams
227}
228
229// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and
230// eliminates all the unnecessary instructions whose ref count is zero.
231// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction
232// during the process. This is the last SSA-level optimization pass and after this,
233// the SSA function is ready to be used by backends.
234//
235// TODO: the algorithm here might not be efficient. Get back to this later.
236func passDeadCodeEliminationOpt(b *builder) {
237	nvid := int(b.nextValueID)
238	if nvid >= len(b.valuesInfo) {
239		l := nvid - len(b.valuesInfo) + 1
240		b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...)
241		view := b.valuesInfo[len(b.valuesInfo)-l:]
242		for i := range view {
243			view[i].alias = ValueInvalid
244		}
245	}
246
247	// First, we gather all the instructions with side effects.
248	liveInstructions := b.instStack[:0]
249	// During the process, we will assign InstructionGroupID to each instruction, which is not
250	// relevant to dead code elimination, but we need in the backend.
251	var gid InstructionGroupID
252	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
253		for cur := blk.rootInstr; cur != nil; cur = cur.next {
254			cur.gid = gid
255			switch cur.sideEffect() {
256			case sideEffectTraps:
257				// The trappable should always be alive.
258				liveInstructions = append(liveInstructions, cur)
259			case sideEffectStrict:
260				liveInstructions = append(liveInstructions, cur)
261				// The strict side effect should create different instruction groups.
262				gid++
263			}
264		}
265	}
266
267	// Find all the instructions referenced by live instructions transitively.
268	for len(liveInstructions) > 0 {
269		tail := len(liveInstructions) - 1
270		live := liveInstructions[tail]
271		liveInstructions = liveInstructions[:tail]
272		if live.live {
273			// If it's already marked alive, this is referenced multiple times,
274			// so we can skip it.
275			continue
276		}
277		live.live = true
278
279		// Before we walk, we need to resolve the alias first.
280		b.resolveArgumentAlias(live)
281
282		v1, v2, v3, vs := live.Args()
283		if v1.Valid() {
284			producingInst := b.InstructionOfValue(v1)
285			if producingInst != nil {
286				liveInstructions = append(liveInstructions, producingInst)
287			}
288		}
289
290		if v2.Valid() {
291			producingInst := b.InstructionOfValue(v2)
292			if producingInst != nil {
293				liveInstructions = append(liveInstructions, producingInst)
294			}
295		}
296
297		if v3.Valid() {
298			producingInst := b.InstructionOfValue(v3)
299			if producingInst != nil {
300				liveInstructions = append(liveInstructions, producingInst)
301			}
302		}
303
304		for _, v := range vs {
305			producingInst := b.InstructionOfValue(v)
306			if producingInst != nil {
307				liveInstructions = append(liveInstructions, producingInst)
308			}
309		}
310	}
311
312	// Now that all the live instructions are flagged as live=true, we eliminate all dead instructions.
313	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
314		for cur := blk.rootInstr; cur != nil; cur = cur.next {
315			if !cur.live {
316				// Remove the instruction from the list.
317				if prev := cur.prev; prev != nil {
318					prev.next = cur.next
319				} else {
320					blk.rootInstr = cur.next
321				}
322				if next := cur.next; next != nil {
323					next.prev = cur.prev
324				}
325				continue
326			}
327
328			// If the value alive, we can be sure that arguments are used definitely.
329			// Hence, we can increment the value reference counts.
330			v1, v2, v3, vs := cur.Args()
331			if v1.Valid() {
332				b.incRefCount(v1.ID(), cur)
333			}
334			if v2.Valid() {
335				b.incRefCount(v2.ID(), cur)
336			}
337			if v3.Valid() {
338				b.incRefCount(v3.ID(), cur)
339			}
340			for _, v := range vs {
341				b.incRefCount(v.ID(), cur)
342			}
343		}
344	}
345
346	b.instStack = liveInstructions // we reuse the stack for the next iteration.
347}
348
349func (b *builder) incRefCount(id ValueID, from *Instruction) {
350	if wazevoapi.SSALoggingEnabled {
351		fmt.Printf("v%d referenced from %v\n", id, from.Format(b))
352	}
353	info := &b.valuesInfo[id]
354	info.RefCount++
355}
356
357// passNopInstElimination eliminates the instructions which is essentially a no-op.
358func passNopInstElimination(b *builder) {
359	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
360		for cur := blk.rootInstr; cur != nil; cur = cur.next {
361			switch cur.Opcode() {
362			// TODO: add more logics here.
363			case OpcodeIshl, OpcodeSshr, OpcodeUshr:
364				x, amount := cur.Arg2()
365				definingInst := b.InstructionOfValue(amount)
366				if definingInst == nil {
367					// If there's no defining instruction, that means the amount is coming from the parameter.
368					continue
369				}
370				if definingInst.Constant() {
371					v := definingInst.ConstantVal()
372
373					if x.Type().Bits() == 64 {
374						v = v % 64
375					} else {
376						v = v % 32
377					}
378					if v == 0 {
379						b.alias(cur.Return(), x)
380					}
381				}
382			}
383		}
384	}
385}
386
387// passSortSuccessors sorts the successors of each block in the natural program order.
388func passSortSuccessors(b *builder) {
389	for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
390		blk := b.basicBlocksPool.View(i)
391		sortBlocks(blk.success)
392	}
393}