compiler.go

  1package backend
  2
  3import (
  4	"context"
  5	"fmt"
  6
  7	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
  8	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
  9	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
 10)
 11
 12// NewCompiler returns a new Compiler that can generate a machine code.
 13func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
 14	return newCompiler(ctx, mach, builder)
 15}
 16
 17func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
 18	argResultInts, argResultFloats := mach.ArgsResultsRegs()
 19	c := &compiler{
 20		mach: mach, ssaBuilder: builder,
 21		nextVRegID:      regalloc.VRegIDNonReservedBegin,
 22		argResultInts:   argResultInts,
 23		argResultFloats: argResultFloats,
 24	}
 25	mach.SetCompiler(c)
 26	return c
 27}
 28
 29// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
 30// use the information there to emit the final machine code.
 31type Compiler interface {
 32	// SSABuilder returns the ssa.Builder used by this compiler.
 33	SSABuilder() ssa.Builder
 34
 35	// Compile executes the following steps:
 36	// 	1. Lower()
 37	// 	2. RegAlloc()
 38	// 	3. Finalize()
 39	// 	4. Encode()
 40	//
 41	// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
 42	//
 43	// The returned byte slices are the machine code and the relocation information for the machine code.
 44	// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
 45	Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
 46
 47	// Lower lowers the given ssa.Instruction to the machine-specific instructions.
 48	Lower()
 49
 50	// RegAlloc performs the register allocation after Lower is called.
 51	RegAlloc()
 52
 53	// Finalize performs the finalization of the compilation, including machine code emission.
 54	// This must be called after RegAlloc.
 55	Finalize(ctx context.Context) error
 56
 57	// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
 58	Buf() []byte
 59
 60	BufPtr() *[]byte
 61
 62	// Format returns the debug string of the current state of the compiler.
 63	Format() string
 64
 65	// Init initializes the internal state of the compiler for the next compilation.
 66	Init()
 67
 68	// AllocateVReg allocates a new virtual register of the given type.
 69	AllocateVReg(typ ssa.Type) regalloc.VReg
 70
 71	// ValueDefinition returns the definition of the given value.
 72	ValueDefinition(ssa.Value) SSAValueDefinition
 73
 74	// VRegOf returns the virtual register of the given ssa.Value.
 75	VRegOf(value ssa.Value) regalloc.VReg
 76
 77	// TypeOf returns the ssa.Type of the given virtual register.
 78	TypeOf(regalloc.VReg) ssa.Type
 79
 80	// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
 81	// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
 82	MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool
 83
 84	// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
 85	// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
 86	//
 87	// Note: caller should be careful to avoid excessive allocation on opcodes slice.
 88	MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
 89
 90	// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
 91	AddRelocationInfo(funcRef ssa.FuncRef)
 92
 93	// AddSourceOffsetInfo appends the source offset information for the given offset.
 94	AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
 95
 96	// SourceOffsetInfo returns the source offset information for the current buffer offset.
 97	SourceOffsetInfo() []SourceOffsetInfo
 98
 99	// EmitByte appends a byte to the buffer. Used during the code emission.
100	EmitByte(b byte)
101
102	// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
103	Emit4Bytes(b uint32)
104
105	// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
106	Emit8Bytes(b uint64)
107
108	// GetFunctionABI returns the ABI information for the given signature.
109	GetFunctionABI(sig *ssa.Signature) *FunctionABI
110}
111
112// RelocationInfo represents the relocation information for a call instruction.
113type RelocationInfo struct {
114	// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
115	Offset int64
116	// Target is the target function of the call instruction.
117	FuncRef ssa.FuncRef
118}
119
120// compiler implements Compiler.
121type compiler struct {
122	mach       Machine
123	currentGID ssa.InstructionGroupID
124	ssaBuilder ssa.Builder
125	// nextVRegID is the next virtual register ID to be allocated.
126	nextVRegID regalloc.VRegID
127	// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
128	ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
129	ssaValuesInfo   []ssa.ValueInfo
130	// returnVRegs is the list of virtual registers that store the return values.
131	returnVRegs  []regalloc.VReg
132	varEdges     [][2]regalloc.VReg
133	varEdgeTypes []ssa.Type
134	constEdges   []struct {
135		cInst *ssa.Instruction
136		dst   regalloc.VReg
137	}
138	vRegSet         []bool
139	vRegIDs         []regalloc.VRegID
140	tempRegs        []regalloc.VReg
141	tmpVals         []ssa.Value
142	ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
143	buf             []byte
144	relocations     []RelocationInfo
145	sourceOffsets   []SourceOffsetInfo
146	// abis maps ssa.SignatureID to the ABI implementation.
147	abis                           []FunctionABI
148	argResultInts, argResultFloats []regalloc.RealReg
149}
150
151// SourceOffsetInfo is a data to associate the source offset with the executable offset.
152type SourceOffsetInfo struct {
153	// SourceOffset is the source offset in the original source code.
154	SourceOffset ssa.SourceOffset
155	// ExecutableOffset is the offset in the compiled executable.
156	ExecutableOffset int64
157}
158
159// Compile implements Compiler.Compile.
160func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
161	c.Lower()
162	if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
163		fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
164	}
165	if wazevoapi.DeterministicCompilationVerifierEnabled {
166		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
167	}
168	c.RegAlloc()
169	if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
170		fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
171	}
172	if wazevoapi.DeterministicCompilationVerifierEnabled {
173		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
174	}
175	if err := c.Finalize(ctx); err != nil {
176		return nil, nil, err
177	}
178	if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
179		fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
180	}
181	if wazevoapi.DeterministicCompilationVerifierEnabled {
182		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
183	}
184	return c.buf, c.relocations, nil
185}
186
187// RegAlloc implements Compiler.RegAlloc.
188func (c *compiler) RegAlloc() {
189	c.mach.RegAlloc()
190}
191
192// Finalize implements Compiler.Finalize.
193func (c *compiler) Finalize(ctx context.Context) error {
194	c.mach.PostRegAlloc()
195	return c.mach.Encode(ctx)
196}
197
198// setCurrentGroupID sets the current instruction group ID.
199func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
200	c.currentGID = gid
201}
202
203// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
204func (c *compiler) assignVirtualRegisters() {
205	builder := c.ssaBuilder
206	c.ssaValuesInfo = builder.ValuesInfo()
207
208	if diff := len(c.ssaValuesInfo) - len(c.ssaValueToVRegs); diff > 0 {
209		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, diff+1)...)
210	}
211
212	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
213		// First we assign a virtual register to each parameter.
214		for i := 0; i < blk.Params(); i++ {
215			p := blk.Param(i)
216			pid := p.ID()
217			typ := p.Type()
218			vreg := c.AllocateVReg(typ)
219			c.ssaValueToVRegs[pid] = vreg
220			c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
221		}
222
223		// Assigns each value to a virtual register produced by instructions.
224		for cur := blk.Root(); cur != nil; cur = cur.Next() {
225			r, rs := cur.Returns()
226			if r.Valid() {
227				id := r.ID()
228				ssaTyp := r.Type()
229				typ := r.Type()
230				vReg := c.AllocateVReg(typ)
231				c.ssaValueToVRegs[id] = vReg
232				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
233			}
234			for _, r := range rs {
235				id := r.ID()
236				ssaTyp := r.Type()
237				vReg := c.AllocateVReg(ssaTyp)
238				c.ssaValueToVRegs[id] = vReg
239				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
240			}
241		}
242	}
243
244	for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
245		typ := retBlk.Param(i).Type()
246		vReg := c.AllocateVReg(typ)
247		c.returnVRegs = append(c.returnVRegs, vReg)
248		c.ssaTypeOfVRegID[vReg.ID()] = typ
249	}
250}
251
252// AllocateVReg implements Compiler.AllocateVReg.
253func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
254	regType := regalloc.RegTypeOf(typ)
255	r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
256
257	id := r.ID()
258	if int(id) >= len(c.ssaTypeOfVRegID) {
259		c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
260	}
261	c.ssaTypeOfVRegID[id] = typ
262	c.nextVRegID++
263	return r
264}
265
266// Init implements Compiler.Init.
267func (c *compiler) Init() {
268	c.currentGID = 0
269	c.nextVRegID = regalloc.VRegIDNonReservedBegin
270	c.returnVRegs = c.returnVRegs[:0]
271	c.mach.Reset()
272	c.varEdges = c.varEdges[:0]
273	c.constEdges = c.constEdges[:0]
274	c.buf = c.buf[:0]
275	c.sourceOffsets = c.sourceOffsets[:0]
276	c.relocations = c.relocations[:0]
277}
278
279// ValueDefinition implements Compiler.ValueDefinition.
280func (c *compiler) ValueDefinition(value ssa.Value) SSAValueDefinition {
281	return SSAValueDefinition{
282		V:        value,
283		Instr:    c.ssaBuilder.InstructionOfValue(value),
284		RefCount: c.ssaValuesInfo[value.ID()].RefCount,
285	}
286}
287
288// VRegOf implements Compiler.VRegOf.
289func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
290	return c.ssaValueToVRegs[value.ID()]
291}
292
293// Format implements Compiler.Format.
294func (c *compiler) Format() string {
295	return c.mach.Format()
296}
297
298// TypeOf implements Compiler.Format.
299func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
300	return c.ssaTypeOfVRegID[v.ID()]
301}
302
303// MatchInstr implements Compiler.MatchInstr.
304func (c *compiler) MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool {
305	instr := def.Instr
306	return def.IsFromInstr() &&
307		instr.Opcode() == opcode &&
308		instr.GroupID() == c.currentGID &&
309		def.RefCount < 2
310}
311
312// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
313func (c *compiler) MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
314	instr := def.Instr
315	if !def.IsFromInstr() {
316		return ssa.OpcodeInvalid
317	}
318
319	if instr.GroupID() != c.currentGID {
320		return ssa.OpcodeInvalid
321	}
322
323	if def.RefCount >= 2 {
324		return ssa.OpcodeInvalid
325	}
326
327	opcode := instr.Opcode()
328	for _, op := range opcodes {
329		if opcode == op {
330			return opcode
331		}
332	}
333	return ssa.OpcodeInvalid
334}
335
336// SSABuilder implements Compiler .SSABuilder.
337func (c *compiler) SSABuilder() ssa.Builder {
338	return c.ssaBuilder
339}
340
341// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
342func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
343	c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
344		SourceOffset:     sourceOffset,
345		ExecutableOffset: executableOffset,
346	})
347}
348
349// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
350func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
351	return c.sourceOffsets
352}
353
354// AddRelocationInfo implements Compiler.AddRelocationInfo.
355func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
356	c.relocations = append(c.relocations, RelocationInfo{
357		Offset:  int64(len(c.buf)),
358		FuncRef: funcRef,
359	})
360}
361
362// Emit8Bytes implements Compiler.Emit8Bytes.
363func (c *compiler) Emit8Bytes(b uint64) {
364	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
365}
366
367// Emit4Bytes implements Compiler.Emit4Bytes.
368func (c *compiler) Emit4Bytes(b uint32) {
369	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
370}
371
372// EmitByte implements Compiler.EmitByte.
373func (c *compiler) EmitByte(b byte) {
374	c.buf = append(c.buf, b)
375}
376
377// Buf implements Compiler.Buf.
378func (c *compiler) Buf() []byte {
379	return c.buf
380}
381
382// BufPtr implements Compiler.BufPtr.
383func (c *compiler) BufPtr() *[]byte {
384	return &c.buf
385}
386
387func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
388	if int(sig.ID) >= len(c.abis) {
389		c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
390	}
391
392	abi := &c.abis[sig.ID]
393	if abi.Initialized {
394		return abi
395	}
396
397	abi.Init(sig, c.argResultInts, c.argResultFloats)
398	return abi
399}