1package wasmdebug
2
3import (
4 "debug/dwarf"
5 "errors"
6 "fmt"
7 "io"
8 "sort"
9 "strings"
10 "sync"
11)
12
13// DWARFLines is used to retrieve source code line information from the DWARF data.
14type DWARFLines struct {
15 // d is created by DWARF custom sections.
16 d *dwarf.Data
17 // linesPerEntry maps dwarf.Offset for dwarf.Entry to the list of lines contained by the entry.
18 // The value is sorted in the increasing order by the address.
19 linesPerEntry map[dwarf.Offset][]line
20 mux sync.Mutex
21}
22
23type line struct {
24 addr uint64
25 pos dwarf.LineReaderPos
26}
27
28// NewDWARFLines returns DWARFLines for the given *dwarf.Data.
29func NewDWARFLines(d *dwarf.Data) *DWARFLines {
30 if d == nil {
31 return nil
32 }
33 return &DWARFLines{d: d, linesPerEntry: map[dwarf.Offset][]line{}}
34}
35
36// isTombstoneAddr returns true if the given address is invalid a.k.a tombstone address which was made no longer valid
37// by linker. According to the DWARF spec[1], the value is encoded as 0xffffffff for Wasm (as 32-bit target),
38// but some tools encode it either in -1, -2 [2] or 1<<32 (This might not be by tools, but by debug/dwarf package's bug).
39//
40// [1] https://dwarfstd.org/issues/200609.1.html
41// [2] https://github.com/WebAssembly/binaryen/blob/97178d08d4a20d2a5e3a6be813fc6a7079ef86e1/src/wasm/wasm-debug.cpp#L651-L660
42// [3] https://reviews.llvm.org/D81784
43func isTombstoneAddr(addr uint64) bool {
44 addr32 := int32(addr)
45 return addr32 == -1 || addr32 == -2 ||
46 addr32 == 0 // This covers 1 <<32.
47}
48
49// Line returns the line information for the given instructionOffset which is an offset in
50// the code section of the original Wasm binary. Returns empty string if the info is not found.
51func (d *DWARFLines) Line(instructionOffset uint64) (ret []string) {
52 if d == nil {
53 return
54 }
55
56 // DWARFLines is created per Wasm binary, so there's a possibility that multiple instances
57 // created from a same binary face runtime error at the same time, and that results in
58 // concurrent access to this function.
59 d.mux.Lock()
60 defer d.mux.Unlock()
61
62 r := d.d.Reader()
63
64 var inlinedRoutines []*dwarf.Entry
65 var cu *dwarf.Entry
66 var inlinedDone bool
67entry:
68 for {
69 ent, err := r.Next()
70 if err != nil || ent == nil {
71 break
72 }
73
74 // If we already found the compilation unit and relevant inlined routines, we can stop searching entries.
75 if cu != nil && inlinedDone {
76 break
77 }
78
79 switch ent.Tag {
80 case dwarf.TagCompileUnit, dwarf.TagInlinedSubroutine:
81 default:
82 // Only CompileUnit and InlinedSubroutines are relevant.
83 continue
84 }
85
86 // Check if the entry spans the range which contains the target instruction.
87 ranges, err := d.d.Ranges(ent)
88 if err != nil {
89 continue
90 }
91 for _, pcs := range ranges {
92 start, end := pcs[0], pcs[1]
93 if isTombstoneAddr(start) || isTombstoneAddr(end) {
94 continue
95 }
96 if start <= instructionOffset && instructionOffset < end {
97 switch ent.Tag {
98 case dwarf.TagCompileUnit:
99 cu = ent
100 case dwarf.TagInlinedSubroutine:
101 inlinedRoutines = append(inlinedRoutines, ent)
102 // Search inlined subroutines until all the children.
103 inlinedDone = !ent.Children
104 // Not that "children" in the DWARF spec is defined as the next entry to this entry.
105 // See "2.3 Relationship of Debugging Information Entries" in https://dwarfstd.org/doc/DWARF4.pdf
106 }
107 continue entry
108 }
109 }
110 }
111
112 // If the relevant compilation unit is not found, nothing we can do with this DWARF info.
113 if cu == nil {
114 return
115 }
116
117 lineReader, err := d.d.LineReader(cu)
118 if err != nil || lineReader == nil {
119 return
120 }
121 var lines []line
122 var ok bool
123 var le dwarf.LineEntry
124 // Get the lines inside the entry.
125 if lines, ok = d.linesPerEntry[cu.Offset]; !ok {
126 // If not found, we create the list of lines by reading all the LineEntries in the Entry.
127 //
128 // Note that the dwarf.LineEntry.SeekPC API shouldn't be used because the Go's dwarf package assumes that
129 // all the line entries in an Entry are sorted in increasing order which *might not* be true
130 // for some languages. Such order requirement is not a part of DWARF specification,
131 // and in fact Zig language tends to emit interleaved line information.
132 //
133 // Thus, here we read all line entries here, and sort them in the increasing order wrt addresses.
134 for {
135 pos := lineReader.Tell()
136 err = lineReader.Next(&le)
137 if errors.Is(err, io.EOF) {
138 break
139 } else if err != nil {
140 return
141 }
142 // TODO: Maybe we should ignore tombstone addresses by using isTombstoneAddr,
143 // but not sure if that would be an issue in practice.
144 lines = append(lines, line{addr: le.Address, pos: pos})
145 }
146 sort.Slice(lines, func(i, j int) bool { return lines[i].addr < lines[j].addr })
147 d.linesPerEntry[cu.Offset] = lines // Caches for the future inquiries for the same Entry.
148 }
149
150 // Now we have the lines for this entry. We can find the corresponding source line for instructionOffset
151 // via binary search on the list.
152 n := len(lines)
153 index := sort.Search(n, func(i int) bool { return lines[i].addr >= instructionOffset })
154
155 if index == n { // This case the address is not found. See the doc sort.Search.
156 return
157 }
158
159 ln := lines[index]
160 if ln.addr != instructionOffset {
161 // If the address doesn't match exactly, the previous entry is the one that contains the instruction.
162 // That can happen anytime as the DWARF spec allows it, and other tools can handle it in this way conventionally
163 // https://github.com/gimli-rs/addr2line/blob/3a2dbaf84551a06a429f26e9c96071bb409b371f/src/lib.rs#L236-L242
164 // https://github.com/kateinoigakukun/wasminspect/blob/f29f052f1b03104da9f702508ac0c1bbc3530ae4/crates/debugger/src/dwarf/mod.rs#L453-L459
165 if index-1 < 0 {
166 return
167 }
168 ln = lines[index-1]
169 }
170
171 // Advance the line reader for the found position.
172 lineReader.Seek(ln.pos)
173 err = lineReader.Next(&le)
174 if err != nil {
175 // If we reach this block, that means there's a bug in the []line creation logic above.
176 panic("BUG: stored dwarf.LineReaderPos is invalid")
177 }
178
179 // In the inlined case, the line info is the innermost inlined function call.
180 inlined := len(inlinedRoutines) != 0
181 prefix := fmt.Sprintf("%#x: ", instructionOffset)
182 ret = append(ret, formatLine(prefix, le.File.Name, int64(le.Line), int64(le.Column), inlined))
183
184 if inlined {
185 prefix = strings.Repeat(" ", len(prefix))
186 files := lineReader.Files()
187 // inlinedRoutines contain the inlined call information in the reverse order (children is higher than parent),
188 // so we traverse the reverse order and emit the inlined calls.
189 for i := len(inlinedRoutines) - 1; i >= 0; i-- {
190 inlined := inlinedRoutines[i]
191 fileIndex, ok := inlined.Val(dwarf.AttrCallFile).(int64)
192 if !ok {
193 return
194 } else if fileIndex >= int64(len(files)) {
195 // This in theory shouldn't happen according to the spec, but guard against ill-formed DWARF info.
196 return
197 }
198 fileName := files[fileIndex]
199 line, _ := inlined.Val(dwarf.AttrCallLine).(int64)
200 col, _ := inlined.Val(dwarf.AttrCallColumn).(int64)
201 ret = append(ret, formatLine(prefix, fileName.Name, line, col,
202 // Last one is the origin of the inlined function calls.
203 i != 0))
204 }
205 }
206 return
207}
208
209func formatLine(prefix, fileName string, line, col int64, inlined bool) string {
210 builder := strings.Builder{}
211 builder.WriteString(prefix)
212 builder.WriteString(fileName)
213
214 if line != 0 {
215 builder.WriteString(fmt.Sprintf(":%d", line))
216 if col != 0 {
217 builder.WriteString(fmt.Sprintf(":%d", col))
218 }
219 }
220
221 if inlined {
222 builder.WriteString(" (inlined)")
223 }
224 return builder.String()
225}