enclosing.go

  1// Copyright 2013 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package astutil
  6
  7// This file defines utilities for working with source positions.
  8
  9import (
 10	"fmt"
 11	"go/ast"
 12	"go/token"
 13	"sort"
 14)
 15
 16// PathEnclosingInterval returns the node that encloses the source
 17// interval [start, end), and all its ancestors up to the AST root.
 18//
 19// The definition of "enclosing" used by this function considers
 20// additional whitespace abutting a node to be enclosed by it.
 21// In this example:
 22//
 23//              z := x + y // add them
 24//                   <-A->
 25//                  <----B----->
 26//
 27// the ast.BinaryExpr(+) node is considered to enclose interval B
 28// even though its [Pos()..End()) is actually only interval A.
 29// This behaviour makes user interfaces more tolerant of imperfect
 30// input.
 31//
 32// This function treats tokens as nodes, though they are not included
 33// in the result. e.g. PathEnclosingInterval("+") returns the
 34// enclosing ast.BinaryExpr("x + y").
 35//
 36// If start==end, the 1-char interval following start is used instead.
 37//
 38// The 'exact' result is true if the interval contains only path[0]
 39// and perhaps some adjacent whitespace.  It is false if the interval
 40// overlaps multiple children of path[0], or if it contains only
 41// interior whitespace of path[0].
 42// In this example:
 43//
 44//              z := x + y // add them
 45//                <--C-->     <---E-->
 46//                  ^
 47//                  D
 48//
 49// intervals C, D and E are inexact.  C is contained by the
 50// z-assignment statement, because it spans three of its children (:=,
 51// x, +).  So too is the 1-char interval D, because it contains only
 52// interior whitespace of the assignment.  E is considered interior
 53// whitespace of the BlockStmt containing the assignment.
 54//
 55// Precondition: [start, end) both lie within the same file as root.
 56// TODO(adonovan): return (nil, false) in this case and remove precond.
 57// Requires FileSet; see loader.tokenFileContainsPos.
 58//
 59// Postcondition: path is never nil; it always contains at least 'root'.
 60//
 61func PathEnclosingInterval(root *ast.File, start, end token.Pos) (path []ast.Node, exact bool) {
 62	// fmt.Printf("EnclosingInterval %d %d\n", start, end) // debugging
 63
 64	// Precondition: node.[Pos..End) and adjoining whitespace contain [start, end).
 65	var visit func(node ast.Node) bool
 66	visit = func(node ast.Node) bool {
 67		path = append(path, node)
 68
 69		nodePos := node.Pos()
 70		nodeEnd := node.End()
 71
 72		// fmt.Printf("visit(%T, %d, %d)\n", node, nodePos, nodeEnd) // debugging
 73
 74		// Intersect [start, end) with interval of node.
 75		if start < nodePos {
 76			start = nodePos
 77		}
 78		if end > nodeEnd {
 79			end = nodeEnd
 80		}
 81
 82		// Find sole child that contains [start, end).
 83		children := childrenOf(node)
 84		l := len(children)
 85		for i, child := range children {
 86			// [childPos, childEnd) is unaugmented interval of child.
 87			childPos := child.Pos()
 88			childEnd := child.End()
 89
 90			// [augPos, augEnd) is whitespace-augmented interval of child.
 91			augPos := childPos
 92			augEnd := childEnd
 93			if i > 0 {
 94				augPos = children[i-1].End() // start of preceding whitespace
 95			}
 96			if i < l-1 {
 97				nextChildPos := children[i+1].Pos()
 98				// Does [start, end) lie between child and next child?
 99				if start >= augEnd && end <= nextChildPos {
100					return false // inexact match
101				}
102				augEnd = nextChildPos // end of following whitespace
103			}
104
105			// fmt.Printf("\tchild %d: [%d..%d)\tcontains interval [%d..%d)?\n",
106			// 	i, augPos, augEnd, start, end) // debugging
107
108			// Does augmented child strictly contain [start, end)?
109			if augPos <= start && end <= augEnd {
110				_, isToken := child.(tokenNode)
111				return isToken || visit(child)
112			}
113
114			// Does [start, end) overlap multiple children?
115			// i.e. left-augmented child contains start
116			// but LR-augmented child does not contain end.
117			if start < childEnd && end > augEnd {
118				break
119			}
120		}
121
122		// No single child contained [start, end),
123		// so node is the result.  Is it exact?
124
125		// (It's tempting to put this condition before the
126		// child loop, but it gives the wrong result in the
127		// case where a node (e.g. ExprStmt) and its sole
128		// child have equal intervals.)
129		if start == nodePos && end == nodeEnd {
130			return true // exact match
131		}
132
133		return false // inexact: overlaps multiple children
134	}
135
136	if start > end {
137		start, end = end, start
138	}
139
140	if start < root.End() && end > root.Pos() {
141		if start == end {
142			end = start + 1 // empty interval => interval of size 1
143		}
144		exact = visit(root)
145
146		// Reverse the path:
147		for i, l := 0, len(path); i < l/2; i++ {
148			path[i], path[l-1-i] = path[l-1-i], path[i]
149		}
150	} else {
151		// Selection lies within whitespace preceding the
152		// first (or following the last) declaration in the file.
153		// The result nonetheless always includes the ast.File.
154		path = append(path, root)
155	}
156
157	return
158}
159
160// tokenNode is a dummy implementation of ast.Node for a single token.
161// They are used transiently by PathEnclosingInterval but never escape
162// this package.
163//
164type tokenNode struct {
165	pos token.Pos
166	end token.Pos
167}
168
169func (n tokenNode) Pos() token.Pos {
170	return n.pos
171}
172
173func (n tokenNode) End() token.Pos {
174	return n.end
175}
176
177func tok(pos token.Pos, len int) ast.Node {
178	return tokenNode{pos, pos + token.Pos(len)}
179}
180
181// childrenOf returns the direct non-nil children of ast.Node n.
182// It may include fake ast.Node implementations for bare tokens.
183// it is not safe to call (e.g.) ast.Walk on such nodes.
184//
185func childrenOf(n ast.Node) []ast.Node {
186	var children []ast.Node
187
188	// First add nodes for all true subtrees.
189	ast.Inspect(n, func(node ast.Node) bool {
190		if node == n { // push n
191			return true // recur
192		}
193		if node != nil { // push child
194			children = append(children, node)
195		}
196		return false // no recursion
197	})
198
199	// Then add fake Nodes for bare tokens.
200	switch n := n.(type) {
201	case *ast.ArrayType:
202		children = append(children,
203			tok(n.Lbrack, len("[")),
204			tok(n.Elt.End(), len("]")))
205
206	case *ast.AssignStmt:
207		children = append(children,
208			tok(n.TokPos, len(n.Tok.String())))
209
210	case *ast.BasicLit:
211		children = append(children,
212			tok(n.ValuePos, len(n.Value)))
213
214	case *ast.BinaryExpr:
215		children = append(children, tok(n.OpPos, len(n.Op.String())))
216
217	case *ast.BlockStmt:
218		children = append(children,
219			tok(n.Lbrace, len("{")),
220			tok(n.Rbrace, len("}")))
221
222	case *ast.BranchStmt:
223		children = append(children,
224			tok(n.TokPos, len(n.Tok.String())))
225
226	case *ast.CallExpr:
227		children = append(children,
228			tok(n.Lparen, len("(")),
229			tok(n.Rparen, len(")")))
230		if n.Ellipsis != 0 {
231			children = append(children, tok(n.Ellipsis, len("...")))
232		}
233
234	case *ast.CaseClause:
235		if n.List == nil {
236			children = append(children,
237				tok(n.Case, len("default")))
238		} else {
239			children = append(children,
240				tok(n.Case, len("case")))
241		}
242		children = append(children, tok(n.Colon, len(":")))
243
244	case *ast.ChanType:
245		switch n.Dir {
246		case ast.RECV:
247			children = append(children, tok(n.Begin, len("<-chan")))
248		case ast.SEND:
249			children = append(children, tok(n.Begin, len("chan<-")))
250		case ast.RECV | ast.SEND:
251			children = append(children, tok(n.Begin, len("chan")))
252		}
253
254	case *ast.CommClause:
255		if n.Comm == nil {
256			children = append(children,
257				tok(n.Case, len("default")))
258		} else {
259			children = append(children,
260				tok(n.Case, len("case")))
261		}
262		children = append(children, tok(n.Colon, len(":")))
263
264	case *ast.Comment:
265		// nop
266
267	case *ast.CommentGroup:
268		// nop
269
270	case *ast.CompositeLit:
271		children = append(children,
272			tok(n.Lbrace, len("{")),
273			tok(n.Rbrace, len("{")))
274
275	case *ast.DeclStmt:
276		// nop
277
278	case *ast.DeferStmt:
279		children = append(children,
280			tok(n.Defer, len("defer")))
281
282	case *ast.Ellipsis:
283		children = append(children,
284			tok(n.Ellipsis, len("...")))
285
286	case *ast.EmptyStmt:
287		// nop
288
289	case *ast.ExprStmt:
290		// nop
291
292	case *ast.Field:
293		// TODO(adonovan): Field.{Doc,Comment,Tag}?
294
295	case *ast.FieldList:
296		children = append(children,
297			tok(n.Opening, len("(")),
298			tok(n.Closing, len(")")))
299
300	case *ast.File:
301		// TODO test: Doc
302		children = append(children,
303			tok(n.Package, len("package")))
304
305	case *ast.ForStmt:
306		children = append(children,
307			tok(n.For, len("for")))
308
309	case *ast.FuncDecl:
310		// TODO(adonovan): FuncDecl.Comment?
311
312		// Uniquely, FuncDecl breaks the invariant that
313		// preorder traversal yields tokens in lexical order:
314		// in fact, FuncDecl.Recv precedes FuncDecl.Type.Func.
315		//
316		// As a workaround, we inline the case for FuncType
317		// here and order things correctly.
318		//
319		children = nil // discard ast.Walk(FuncDecl) info subtrees
320		children = append(children, tok(n.Type.Func, len("func")))
321		if n.Recv != nil {
322			children = append(children, n.Recv)
323		}
324		children = append(children, n.Name)
325		if n.Type.Params != nil {
326			children = append(children, n.Type.Params)
327		}
328		if n.Type.Results != nil {
329			children = append(children, n.Type.Results)
330		}
331		if n.Body != nil {
332			children = append(children, n.Body)
333		}
334
335	case *ast.FuncLit:
336		// nop
337
338	case *ast.FuncType:
339		if n.Func != 0 {
340			children = append(children,
341				tok(n.Func, len("func")))
342		}
343
344	case *ast.GenDecl:
345		children = append(children,
346			tok(n.TokPos, len(n.Tok.String())))
347		if n.Lparen != 0 {
348			children = append(children,
349				tok(n.Lparen, len("(")),
350				tok(n.Rparen, len(")")))
351		}
352
353	case *ast.GoStmt:
354		children = append(children,
355			tok(n.Go, len("go")))
356
357	case *ast.Ident:
358		children = append(children,
359			tok(n.NamePos, len(n.Name)))
360
361	case *ast.IfStmt:
362		children = append(children,
363			tok(n.If, len("if")))
364
365	case *ast.ImportSpec:
366		// TODO(adonovan): ImportSpec.{Doc,EndPos}?
367
368	case *ast.IncDecStmt:
369		children = append(children,
370			tok(n.TokPos, len(n.Tok.String())))
371
372	case *ast.IndexExpr:
373		children = append(children,
374			tok(n.Lbrack, len("{")),
375			tok(n.Rbrack, len("}")))
376
377	case *ast.InterfaceType:
378		children = append(children,
379			tok(n.Interface, len("interface")))
380
381	case *ast.KeyValueExpr:
382		children = append(children,
383			tok(n.Colon, len(":")))
384
385	case *ast.LabeledStmt:
386		children = append(children,
387			tok(n.Colon, len(":")))
388
389	case *ast.MapType:
390		children = append(children,
391			tok(n.Map, len("map")))
392
393	case *ast.ParenExpr:
394		children = append(children,
395			tok(n.Lparen, len("(")),
396			tok(n.Rparen, len(")")))
397
398	case *ast.RangeStmt:
399		children = append(children,
400			tok(n.For, len("for")),
401			tok(n.TokPos, len(n.Tok.String())))
402
403	case *ast.ReturnStmt:
404		children = append(children,
405			tok(n.Return, len("return")))
406
407	case *ast.SelectStmt:
408		children = append(children,
409			tok(n.Select, len("select")))
410
411	case *ast.SelectorExpr:
412		// nop
413
414	case *ast.SendStmt:
415		children = append(children,
416			tok(n.Arrow, len("<-")))
417
418	case *ast.SliceExpr:
419		children = append(children,
420			tok(n.Lbrack, len("[")),
421			tok(n.Rbrack, len("]")))
422
423	case *ast.StarExpr:
424		children = append(children, tok(n.Star, len("*")))
425
426	case *ast.StructType:
427		children = append(children, tok(n.Struct, len("struct")))
428
429	case *ast.SwitchStmt:
430		children = append(children, tok(n.Switch, len("switch")))
431
432	case *ast.TypeAssertExpr:
433		children = append(children,
434			tok(n.Lparen-1, len(".")),
435			tok(n.Lparen, len("(")),
436			tok(n.Rparen, len(")")))
437
438	case *ast.TypeSpec:
439		// TODO(adonovan): TypeSpec.{Doc,Comment}?
440
441	case *ast.TypeSwitchStmt:
442		children = append(children, tok(n.Switch, len("switch")))
443
444	case *ast.UnaryExpr:
445		children = append(children, tok(n.OpPos, len(n.Op.String())))
446
447	case *ast.ValueSpec:
448		// TODO(adonovan): ValueSpec.{Doc,Comment}?
449
450	case *ast.BadDecl, *ast.BadExpr, *ast.BadStmt:
451		// nop
452	}
453
454	// TODO(adonovan): opt: merge the logic of ast.Inspect() into
455	// the switch above so we can make interleaved callbacks for
456	// both Nodes and Tokens in the right order and avoid the need
457	// to sort.
458	sort.Sort(byPos(children))
459
460	return children
461}
462
463type byPos []ast.Node
464
465func (sl byPos) Len() int {
466	return len(sl)
467}
468func (sl byPos) Less(i, j int) bool {
469	return sl[i].Pos() < sl[j].Pos()
470}
471func (sl byPos) Swap(i, j int) {
472	sl[i], sl[j] = sl[j], sl[i]
473}
474
475// NodeDescription returns a description of the concrete type of n suitable
476// for a user interface.
477//
478// TODO(adonovan): in some cases (e.g. Field, FieldList, Ident,
479// StarExpr) we could be much more specific given the path to the AST
480// root.  Perhaps we should do that.
481//
482func NodeDescription(n ast.Node) string {
483	switch n := n.(type) {
484	case *ast.ArrayType:
485		return "array type"
486	case *ast.AssignStmt:
487		return "assignment"
488	case *ast.BadDecl:
489		return "bad declaration"
490	case *ast.BadExpr:
491		return "bad expression"
492	case *ast.BadStmt:
493		return "bad statement"
494	case *ast.BasicLit:
495		return "basic literal"
496	case *ast.BinaryExpr:
497		return fmt.Sprintf("binary %s operation", n.Op)
498	case *ast.BlockStmt:
499		return "block"
500	case *ast.BranchStmt:
501		switch n.Tok {
502		case token.BREAK:
503			return "break statement"
504		case token.CONTINUE:
505			return "continue statement"
506		case token.GOTO:
507			return "goto statement"
508		case token.FALLTHROUGH:
509			return "fall-through statement"
510		}
511	case *ast.CallExpr:
512		if len(n.Args) == 1 && !n.Ellipsis.IsValid() {
513			return "function call (or conversion)"
514		}
515		return "function call"
516	case *ast.CaseClause:
517		return "case clause"
518	case *ast.ChanType:
519		return "channel type"
520	case *ast.CommClause:
521		return "communication clause"
522	case *ast.Comment:
523		return "comment"
524	case *ast.CommentGroup:
525		return "comment group"
526	case *ast.CompositeLit:
527		return "composite literal"
528	case *ast.DeclStmt:
529		return NodeDescription(n.Decl) + " statement"
530	case *ast.DeferStmt:
531		return "defer statement"
532	case *ast.Ellipsis:
533		return "ellipsis"
534	case *ast.EmptyStmt:
535		return "empty statement"
536	case *ast.ExprStmt:
537		return "expression statement"
538	case *ast.Field:
539		// Can be any of these:
540		// struct {x, y int}  -- struct field(s)
541		// struct {T}         -- anon struct field
542		// interface {I}      -- interface embedding
543		// interface {f()}    -- interface method
544		// func (A) func(B) C -- receiver, param(s), result(s)
545		return "field/method/parameter"
546	case *ast.FieldList:
547		return "field/method/parameter list"
548	case *ast.File:
549		return "source file"
550	case *ast.ForStmt:
551		return "for loop"
552	case *ast.FuncDecl:
553		return "function declaration"
554	case *ast.FuncLit:
555		return "function literal"
556	case *ast.FuncType:
557		return "function type"
558	case *ast.GenDecl:
559		switch n.Tok {
560		case token.IMPORT:
561			return "import declaration"
562		case token.CONST:
563			return "constant declaration"
564		case token.TYPE:
565			return "type declaration"
566		case token.VAR:
567			return "variable declaration"
568		}
569	case *ast.GoStmt:
570		return "go statement"
571	case *ast.Ident:
572		return "identifier"
573	case *ast.IfStmt:
574		return "if statement"
575	case *ast.ImportSpec:
576		return "import specification"
577	case *ast.IncDecStmt:
578		if n.Tok == token.INC {
579			return "increment statement"
580		}
581		return "decrement statement"
582	case *ast.IndexExpr:
583		return "index expression"
584	case *ast.InterfaceType:
585		return "interface type"
586	case *ast.KeyValueExpr:
587		return "key/value association"
588	case *ast.LabeledStmt:
589		return "statement label"
590	case *ast.MapType:
591		return "map type"
592	case *ast.Package:
593		return "package"
594	case *ast.ParenExpr:
595		return "parenthesized " + NodeDescription(n.X)
596	case *ast.RangeStmt:
597		return "range loop"
598	case *ast.ReturnStmt:
599		return "return statement"
600	case *ast.SelectStmt:
601		return "select statement"
602	case *ast.SelectorExpr:
603		return "selector"
604	case *ast.SendStmt:
605		return "channel send"
606	case *ast.SliceExpr:
607		return "slice expression"
608	case *ast.StarExpr:
609		return "*-operation" // load/store expr or pointer type
610	case *ast.StructType:
611		return "struct type"
612	case *ast.SwitchStmt:
613		return "switch statement"
614	case *ast.TypeAssertExpr:
615		return "type assertion"
616	case *ast.TypeSpec:
617		return "type specification"
618	case *ast.TypeSwitchStmt:
619		return "type switch"
620	case *ast.UnaryExpr:
621		return fmt.Sprintf("unary %s operation", n.Op)
622	case *ast.ValueSpec:
623		return "value specification"
624
625	}
626	panic(fmt.Sprintf("unexpected node type: %T", n))
627}