1package ssa
2
3import (
4 "fmt"
5
6 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
7)
8
9// RunPasses implements Builder.RunPasses.
10//
11// The order here matters; some pass depends on the previous ones.
12//
13// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks
14// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.
15func (b *builder) RunPasses() {
16 b.runPreBlockLayoutPasses()
17 b.runBlockLayoutPass()
18 b.runPostBlockLayoutPasses()
19 b.runFinalizingPasses()
20}
21
22func (b *builder) runPreBlockLayoutPasses() {
23 passSortSuccessors(b)
24 passDeadBlockEliminationOpt(b)
25 // The result of passCalculateImmediateDominators will be used by various passes below.
26 passCalculateImmediateDominators(b)
27 passRedundantPhiEliminationOpt(b)
28 passNopInstElimination(b)
29
30 // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
31 // WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case.
32 // See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html
33
34 // TODO: implement more optimization passes like:
35 // block coalescing.
36 // Copy-propagation.
37 // Constant folding.
38 // Common subexpression elimination.
39 // Arithmetic simplifications.
40 // and more!
41
42 // passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
43 passDeadCodeEliminationOpt(b)
44 b.donePreBlockLayoutPasses = true
45}
46
47func (b *builder) runBlockLayoutPass() {
48 if !b.donePreBlockLayoutPasses {
49 panic("runBlockLayoutPass must be called after all pre passes are done")
50 }
51 passLayoutBlocks(b)
52 b.doneBlockLayout = true
53}
54
55// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable,
56// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks.
57func (b *builder) runPostBlockLayoutPasses() {
58 if !b.doneBlockLayout {
59 panic("runPostBlockLayoutPasses must be called after block layout pass is done")
60 }
61 // TODO: Do more. e.g. tail duplication, loop unrolling, etc.
62
63 b.donePostBlockLayoutPasses = true
64}
65
66// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified.
67func (b *builder) runFinalizingPasses() {
68 if !b.donePostBlockLayoutPasses {
69 panic("runFinalizingPasses must be called after post block layout passes are done")
70 }
71 // Critical edges are split, so we fix the loop nesting forest.
72 passBuildLoopNestingForest(b)
73 passBuildDominatorTree(b)
74 // Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps.
75 b.markFallthroughJumps()
76}
77
78// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
79func passDeadBlockEliminationOpt(b *builder) {
80 entryBlk := b.entryBlk()
81 b.blkStack = append(b.blkStack, entryBlk)
82 for len(b.blkStack) > 0 {
83 reachableBlk := b.blkStack[len(b.blkStack)-1]
84 b.blkStack = b.blkStack[:len(b.blkStack)-1]
85 reachableBlk.visited = 1
86
87 if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
88 panic(fmt.Sprintf("%s is not sealed", reachableBlk))
89 }
90
91 if wazevoapi.SSAValidationEnabled {
92 reachableBlk.validate(b)
93 }
94
95 for _, succ := range reachableBlk.success {
96 if succ.visited == 1 {
97 continue
98 }
99 b.blkStack = append(b.blkStack, succ)
100 }
101 }
102
103 for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
104 if blk.visited != 1 {
105 blk.invalid = true
106 }
107 blk.visited = 0
108 }
109}
110
111// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).
112// This requires the reverse post-order traversal to be calculated before calling this function,
113// hence passCalculateImmediateDominators must be called before this.
114func passRedundantPhiEliminationOpt(b *builder) {
115 redundantParams := b.redundantParams[:0] // reuse the slice from previous iterations.
116
117 // TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the
118 // same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact
119 // that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is
120 // relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,
121 // the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
122 // complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
123 // -- Note --
124 // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when
125 // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree.
126 for {
127 changed := false
128 _ = b.blockIteratorReversePostOrderBegin() // skip entry block!
129 // Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!
130 for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() {
131 params := blk.params.View()
132 paramNum := len(params)
133
134 for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
135 phiValue := params[paramIndex]
136 redundant := true
137
138 nonSelfReferencingValue := ValueInvalid
139 for predIndex := range blk.preds {
140 br := blk.preds[predIndex].branch
141 // Resolve the alias in the arguments so that we could use the previous iteration's result.
142 b.resolveArgumentAlias(br)
143 pred := br.vs.View()[paramIndex]
144 if pred == phiValue {
145 // This is self-referencing: PHI from the same PHI.
146 continue
147 }
148
149 if !nonSelfReferencingValue.Valid() {
150 nonSelfReferencingValue = pred
151 continue
152 }
153
154 if nonSelfReferencingValue != pred {
155 redundant = false
156 break
157 }
158 }
159
160 if !nonSelfReferencingValue.Valid() {
161 // This shouldn't happen, and must be a bug in builder.go.
162 panic("BUG: params added but only self-referencing")
163 }
164
165 if redundant {
166 redundantParams = append(redundantParams, redundantParam{
167 index: paramIndex, uniqueValue: nonSelfReferencingValue,
168 })
169 }
170 }
171
172 if len(redundantParams) == 0 {
173 continue
174 }
175 changed = true
176
177 // Remove the redundant PHIs from the argument list of branching instructions.
178 for predIndex := range blk.preds {
179 redundantParamsCur, predParamCur := 0, 0
180 predBlk := blk.preds[predIndex]
181 branchInst := predBlk.branch
182 view := branchInst.vs.View()
183 for argIndex, value := range view {
184 if len(redundantParams) == redundantParamsCur ||
185 redundantParams[redundantParamsCur].index != argIndex {
186 view[predParamCur] = value
187 predParamCur++
188 } else {
189 redundantParamsCur++
190 }
191 }
192 branchInst.vs.Cut(predParamCur)
193 }
194
195 // Still need to have the definition of the value of the PHI (previously as the parameter).
196 for i := range redundantParams {
197 redundantValue := &redundantParams[i]
198 phiValue := params[redundantValue.index]
199 // Create an alias in this block from the only phi argument to the phi value.
200 b.alias(phiValue, redundantValue.uniqueValue)
201 }
202
203 // Finally, Remove the param from the blk.
204 paramsCur, redundantParamsCur := 0, 0
205 for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
206 param := params[paramIndex]
207 if len(redundantParams) == redundantParamsCur || redundantParams[redundantParamsCur].index != paramIndex {
208 params[paramsCur] = param
209 paramsCur++
210 } else {
211 redundantParamsCur++
212 }
213 }
214 blk.params.Cut(paramsCur)
215
216 // Clears the map for the next iteration.
217 redundantParams = redundantParams[:0]
218 }
219
220 if !changed {
221 break
222 }
223 }
224
225 // Reuse the slice for the future passes.
226 b.redundantParams = redundantParams
227}
228
229// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and
230// eliminates all the unnecessary instructions whose ref count is zero.
231// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction
232// during the process. This is the last SSA-level optimization pass and after this,
233// the SSA function is ready to be used by backends.
234//
235// TODO: the algorithm here might not be efficient. Get back to this later.
236func passDeadCodeEliminationOpt(b *builder) {
237 nvid := int(b.nextValueID)
238 if nvid >= len(b.valuesInfo) {
239 l := nvid - len(b.valuesInfo) + 1
240 b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...)
241 view := b.valuesInfo[len(b.valuesInfo)-l:]
242 for i := range view {
243 view[i].alias = ValueInvalid
244 }
245 }
246
247 // First, we gather all the instructions with side effects.
248 liveInstructions := b.instStack[:0]
249 // During the process, we will assign InstructionGroupID to each instruction, which is not
250 // relevant to dead code elimination, but we need in the backend.
251 var gid InstructionGroupID
252 for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
253 for cur := blk.rootInstr; cur != nil; cur = cur.next {
254 cur.gid = gid
255 switch cur.sideEffect() {
256 case sideEffectTraps:
257 // The trappable should always be alive.
258 liveInstructions = append(liveInstructions, cur)
259 case sideEffectStrict:
260 liveInstructions = append(liveInstructions, cur)
261 // The strict side effect should create different instruction groups.
262 gid++
263 }
264 }
265 }
266
267 // Find all the instructions referenced by live instructions transitively.
268 for len(liveInstructions) > 0 {
269 tail := len(liveInstructions) - 1
270 live := liveInstructions[tail]
271 liveInstructions = liveInstructions[:tail]
272 if live.live {
273 // If it's already marked alive, this is referenced multiple times,
274 // so we can skip it.
275 continue
276 }
277 live.live = true
278
279 // Before we walk, we need to resolve the alias first.
280 b.resolveArgumentAlias(live)
281
282 v1, v2, v3, vs := live.Args()
283 if v1.Valid() {
284 producingInst := b.InstructionOfValue(v1)
285 if producingInst != nil {
286 liveInstructions = append(liveInstructions, producingInst)
287 }
288 }
289
290 if v2.Valid() {
291 producingInst := b.InstructionOfValue(v2)
292 if producingInst != nil {
293 liveInstructions = append(liveInstructions, producingInst)
294 }
295 }
296
297 if v3.Valid() {
298 producingInst := b.InstructionOfValue(v3)
299 if producingInst != nil {
300 liveInstructions = append(liveInstructions, producingInst)
301 }
302 }
303
304 for _, v := range vs {
305 producingInst := b.InstructionOfValue(v)
306 if producingInst != nil {
307 liveInstructions = append(liveInstructions, producingInst)
308 }
309 }
310 }
311
312 // Now that all the live instructions are flagged as live=true, we eliminate all dead instructions.
313 for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
314 for cur := blk.rootInstr; cur != nil; cur = cur.next {
315 if !cur.live {
316 // Remove the instruction from the list.
317 if prev := cur.prev; prev != nil {
318 prev.next = cur.next
319 } else {
320 blk.rootInstr = cur.next
321 }
322 if next := cur.next; next != nil {
323 next.prev = cur.prev
324 }
325 continue
326 }
327
328 // If the value alive, we can be sure that arguments are used definitely.
329 // Hence, we can increment the value reference counts.
330 v1, v2, v3, vs := cur.Args()
331 if v1.Valid() {
332 b.incRefCount(v1.ID(), cur)
333 }
334 if v2.Valid() {
335 b.incRefCount(v2.ID(), cur)
336 }
337 if v3.Valid() {
338 b.incRefCount(v3.ID(), cur)
339 }
340 for _, v := range vs {
341 b.incRefCount(v.ID(), cur)
342 }
343 }
344 }
345
346 b.instStack = liveInstructions // we reuse the stack for the next iteration.
347}
348
349func (b *builder) incRefCount(id ValueID, from *Instruction) {
350 if wazevoapi.SSALoggingEnabled {
351 fmt.Printf("v%d referenced from %v\n", id, from.Format(b))
352 }
353 info := &b.valuesInfo[id]
354 info.RefCount++
355}
356
357// passNopInstElimination eliminates the instructions which is essentially a no-op.
358func passNopInstElimination(b *builder) {
359 for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
360 for cur := blk.rootInstr; cur != nil; cur = cur.next {
361 switch cur.Opcode() {
362 // TODO: add more logics here.
363 case OpcodeIshl, OpcodeSshr, OpcodeUshr:
364 x, amount := cur.Arg2()
365 definingInst := b.InstructionOfValue(amount)
366 if definingInst == nil {
367 // If there's no defining instruction, that means the amount is coming from the parameter.
368 continue
369 }
370 if definingInst.Constant() {
371 v := definingInst.ConstantVal()
372
373 if x.Type().Bits() == 64 {
374 v = v % 64
375 } else {
376 v = v % 32
377 }
378 if v == 0 {
379 b.alias(cur.Return(), x)
380 }
381 }
382 }
383 }
384 }
385}
386
387// passSortSuccessors sorts the successors of each block in the natural program order.
388func passSortSuccessors(b *builder) {
389 for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
390 blk := b.basicBlocksPool.View(i)
391 sortBlocks(blk.success)
392 }
393}