1package backend
2
3import (
4 "context"
5 "fmt"
6
7 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
8 "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
9 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
10)
11
12// NewCompiler returns a new Compiler that can generate a machine code.
13func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
14 return newCompiler(ctx, mach, builder)
15}
16
17func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
18 argResultInts, argResultFloats := mach.ArgsResultsRegs()
19 c := &compiler{
20 mach: mach, ssaBuilder: builder,
21 nextVRegID: regalloc.VRegIDNonReservedBegin,
22 argResultInts: argResultInts,
23 argResultFloats: argResultFloats,
24 }
25 mach.SetCompiler(c)
26 return c
27}
28
29// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
30// use the information there to emit the final machine code.
31type Compiler interface {
32 // SSABuilder returns the ssa.Builder used by this compiler.
33 SSABuilder() ssa.Builder
34
35 // Compile executes the following steps:
36 // 1. Lower()
37 // 2. RegAlloc()
38 // 3. Finalize()
39 // 4. Encode()
40 //
41 // Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
42 //
43 // The returned byte slices are the machine code and the relocation information for the machine code.
44 // The caller is responsible for copying them immediately since the compiler may reuse the buffer.
45 Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
46
47 // Lower lowers the given ssa.Instruction to the machine-specific instructions.
48 Lower()
49
50 // RegAlloc performs the register allocation after Lower is called.
51 RegAlloc()
52
53 // Finalize performs the finalization of the compilation, including machine code emission.
54 // This must be called after RegAlloc.
55 Finalize(ctx context.Context) error
56
57 // Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
58 Buf() []byte
59
60 BufPtr() *[]byte
61
62 // Format returns the debug string of the current state of the compiler.
63 Format() string
64
65 // Init initializes the internal state of the compiler for the next compilation.
66 Init()
67
68 // AllocateVReg allocates a new virtual register of the given type.
69 AllocateVReg(typ ssa.Type) regalloc.VReg
70
71 // ValueDefinition returns the definition of the given value.
72 ValueDefinition(ssa.Value) SSAValueDefinition
73
74 // VRegOf returns the virtual register of the given ssa.Value.
75 VRegOf(value ssa.Value) regalloc.VReg
76
77 // TypeOf returns the ssa.Type of the given virtual register.
78 TypeOf(regalloc.VReg) ssa.Type
79
80 // MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
81 // and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
82 MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool
83
84 // MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
85 // this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
86 //
87 // Note: caller should be careful to avoid excessive allocation on opcodes slice.
88 MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
89
90 // AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
91 AddRelocationInfo(funcRef ssa.FuncRef)
92
93 // AddSourceOffsetInfo appends the source offset information for the given offset.
94 AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
95
96 // SourceOffsetInfo returns the source offset information for the current buffer offset.
97 SourceOffsetInfo() []SourceOffsetInfo
98
99 // EmitByte appends a byte to the buffer. Used during the code emission.
100 EmitByte(b byte)
101
102 // Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
103 Emit4Bytes(b uint32)
104
105 // Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
106 Emit8Bytes(b uint64)
107
108 // GetFunctionABI returns the ABI information for the given signature.
109 GetFunctionABI(sig *ssa.Signature) *FunctionABI
110}
111
112// RelocationInfo represents the relocation information for a call instruction.
113type RelocationInfo struct {
114 // Offset represents the offset from the beginning of the machine code of either a function or the entire module.
115 Offset int64
116 // Target is the target function of the call instruction.
117 FuncRef ssa.FuncRef
118}
119
120// compiler implements Compiler.
121type compiler struct {
122 mach Machine
123 currentGID ssa.InstructionGroupID
124 ssaBuilder ssa.Builder
125 // nextVRegID is the next virtual register ID to be allocated.
126 nextVRegID regalloc.VRegID
127 // ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
128 ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
129 ssaValuesInfo []ssa.ValueInfo
130 // returnVRegs is the list of virtual registers that store the return values.
131 returnVRegs []regalloc.VReg
132 varEdges [][2]regalloc.VReg
133 varEdgeTypes []ssa.Type
134 constEdges []struct {
135 cInst *ssa.Instruction
136 dst regalloc.VReg
137 }
138 vRegSet []bool
139 vRegIDs []regalloc.VRegID
140 tempRegs []regalloc.VReg
141 tmpVals []ssa.Value
142 ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
143 buf []byte
144 relocations []RelocationInfo
145 sourceOffsets []SourceOffsetInfo
146 // abis maps ssa.SignatureID to the ABI implementation.
147 abis []FunctionABI
148 argResultInts, argResultFloats []regalloc.RealReg
149}
150
151// SourceOffsetInfo is a data to associate the source offset with the executable offset.
152type SourceOffsetInfo struct {
153 // SourceOffset is the source offset in the original source code.
154 SourceOffset ssa.SourceOffset
155 // ExecutableOffset is the offset in the compiled executable.
156 ExecutableOffset int64
157}
158
159// Compile implements Compiler.Compile.
160func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
161 c.Lower()
162 if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
163 fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
164 }
165 if wazevoapi.DeterministicCompilationVerifierEnabled {
166 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
167 }
168 c.RegAlloc()
169 if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
170 fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
171 }
172 if wazevoapi.DeterministicCompilationVerifierEnabled {
173 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
174 }
175 if err := c.Finalize(ctx); err != nil {
176 return nil, nil, err
177 }
178 if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
179 fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
180 }
181 if wazevoapi.DeterministicCompilationVerifierEnabled {
182 wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
183 }
184 return c.buf, c.relocations, nil
185}
186
187// RegAlloc implements Compiler.RegAlloc.
188func (c *compiler) RegAlloc() {
189 c.mach.RegAlloc()
190}
191
192// Finalize implements Compiler.Finalize.
193func (c *compiler) Finalize(ctx context.Context) error {
194 c.mach.PostRegAlloc()
195 return c.mach.Encode(ctx)
196}
197
198// setCurrentGroupID sets the current instruction group ID.
199func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
200 c.currentGID = gid
201}
202
203// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
204func (c *compiler) assignVirtualRegisters() {
205 builder := c.ssaBuilder
206 c.ssaValuesInfo = builder.ValuesInfo()
207
208 if diff := len(c.ssaValuesInfo) - len(c.ssaValueToVRegs); diff > 0 {
209 c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, diff+1)...)
210 }
211
212 for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
213 // First we assign a virtual register to each parameter.
214 for i := 0; i < blk.Params(); i++ {
215 p := blk.Param(i)
216 pid := p.ID()
217 typ := p.Type()
218 vreg := c.AllocateVReg(typ)
219 c.ssaValueToVRegs[pid] = vreg
220 c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
221 }
222
223 // Assigns each value to a virtual register produced by instructions.
224 for cur := blk.Root(); cur != nil; cur = cur.Next() {
225 r, rs := cur.Returns()
226 if r.Valid() {
227 id := r.ID()
228 ssaTyp := r.Type()
229 typ := r.Type()
230 vReg := c.AllocateVReg(typ)
231 c.ssaValueToVRegs[id] = vReg
232 c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
233 }
234 for _, r := range rs {
235 id := r.ID()
236 ssaTyp := r.Type()
237 vReg := c.AllocateVReg(ssaTyp)
238 c.ssaValueToVRegs[id] = vReg
239 c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
240 }
241 }
242 }
243
244 for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
245 typ := retBlk.Param(i).Type()
246 vReg := c.AllocateVReg(typ)
247 c.returnVRegs = append(c.returnVRegs, vReg)
248 c.ssaTypeOfVRegID[vReg.ID()] = typ
249 }
250}
251
252// AllocateVReg implements Compiler.AllocateVReg.
253func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
254 regType := regalloc.RegTypeOf(typ)
255 r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
256
257 id := r.ID()
258 if int(id) >= len(c.ssaTypeOfVRegID) {
259 c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
260 }
261 c.ssaTypeOfVRegID[id] = typ
262 c.nextVRegID++
263 return r
264}
265
266// Init implements Compiler.Init.
267func (c *compiler) Init() {
268 c.currentGID = 0
269 c.nextVRegID = regalloc.VRegIDNonReservedBegin
270 c.returnVRegs = c.returnVRegs[:0]
271 c.mach.Reset()
272 c.varEdges = c.varEdges[:0]
273 c.constEdges = c.constEdges[:0]
274 c.buf = c.buf[:0]
275 c.sourceOffsets = c.sourceOffsets[:0]
276 c.relocations = c.relocations[:0]
277}
278
279// ValueDefinition implements Compiler.ValueDefinition.
280func (c *compiler) ValueDefinition(value ssa.Value) SSAValueDefinition {
281 return SSAValueDefinition{
282 V: value,
283 Instr: c.ssaBuilder.InstructionOfValue(value),
284 RefCount: c.ssaValuesInfo[value.ID()].RefCount,
285 }
286}
287
288// VRegOf implements Compiler.VRegOf.
289func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
290 return c.ssaValueToVRegs[value.ID()]
291}
292
293// Format implements Compiler.Format.
294func (c *compiler) Format() string {
295 return c.mach.Format()
296}
297
298// TypeOf implements Compiler.Format.
299func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
300 return c.ssaTypeOfVRegID[v.ID()]
301}
302
303// MatchInstr implements Compiler.MatchInstr.
304func (c *compiler) MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool {
305 instr := def.Instr
306 return def.IsFromInstr() &&
307 instr.Opcode() == opcode &&
308 instr.GroupID() == c.currentGID &&
309 def.RefCount < 2
310}
311
312// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
313func (c *compiler) MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
314 instr := def.Instr
315 if !def.IsFromInstr() {
316 return ssa.OpcodeInvalid
317 }
318
319 if instr.GroupID() != c.currentGID {
320 return ssa.OpcodeInvalid
321 }
322
323 if def.RefCount >= 2 {
324 return ssa.OpcodeInvalid
325 }
326
327 opcode := instr.Opcode()
328 for _, op := range opcodes {
329 if opcode == op {
330 return opcode
331 }
332 }
333 return ssa.OpcodeInvalid
334}
335
336// SSABuilder implements Compiler .SSABuilder.
337func (c *compiler) SSABuilder() ssa.Builder {
338 return c.ssaBuilder
339}
340
341// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
342func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
343 c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
344 SourceOffset: sourceOffset,
345 ExecutableOffset: executableOffset,
346 })
347}
348
349// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
350func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
351 return c.sourceOffsets
352}
353
354// AddRelocationInfo implements Compiler.AddRelocationInfo.
355func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
356 c.relocations = append(c.relocations, RelocationInfo{
357 Offset: int64(len(c.buf)),
358 FuncRef: funcRef,
359 })
360}
361
362// Emit8Bytes implements Compiler.Emit8Bytes.
363func (c *compiler) Emit8Bytes(b uint64) {
364 c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
365}
366
367// Emit4Bytes implements Compiler.Emit4Bytes.
368func (c *compiler) Emit4Bytes(b uint32) {
369 c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
370}
371
372// EmitByte implements Compiler.EmitByte.
373func (c *compiler) EmitByte(b byte) {
374 c.buf = append(c.buf, b)
375}
376
377// Buf implements Compiler.Buf.
378func (c *compiler) Buf() []byte {
379 return c.buf
380}
381
382// BufPtr implements Compiler.BufPtr.
383func (c *compiler) BufPtr() *[]byte {
384 return &c.buf
385}
386
387func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
388 if int(sig.ID) >= len(c.abis) {
389 c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
390 }
391
392 abi := &c.abis[sig.ID]
393 if abi.Initialized {
394 return abi
395 }
396
397 abi.Init(sig, c.argResultInts, c.argResultFloats)
398 return abi
399}