xml_decoder.go

  1package xml
  2
  3import (
  4	"encoding/xml"
  5	"fmt"
  6	"strings"
  7)
  8
  9// NodeDecoder is a XML decoder wrapper that is responsible to decoding
 10// a single XML Node element and it's nested member elements. This wrapper decoder
 11// takes in the start element of the top level node being decoded.
 12type NodeDecoder struct {
 13	Decoder *xml.Decoder
 14	StartEl xml.StartElement
 15}
 16
 17// WrapNodeDecoder returns an initialized XMLNodeDecoder
 18func WrapNodeDecoder(decoder *xml.Decoder, startEl xml.StartElement) NodeDecoder {
 19	return NodeDecoder{
 20		Decoder: decoder,
 21		StartEl: startEl,
 22	}
 23}
 24
 25// Token on a Node Decoder returns a xml StartElement. It returns a boolean that indicates the
 26// a token is the node decoder's end node token; and an error which indicates any error
 27// that occurred while retrieving the start element
 28func (d NodeDecoder) Token() (t xml.StartElement, done bool, err error) {
 29	for {
 30		token, e := d.Decoder.Token()
 31		if e != nil {
 32			return t, done, e
 33		}
 34
 35		// check if we reach end of the node being decoded
 36		if el, ok := token.(xml.EndElement); ok {
 37			return t, el == d.StartEl.End(), err
 38		}
 39
 40		if t, ok := token.(xml.StartElement); ok {
 41			return restoreAttrNamespaces(t), false, err
 42		}
 43
 44		// skip token if it is a comment or preamble or empty space value due to indentation
 45		// or if it's a value and is not expected
 46	}
 47}
 48
 49// restoreAttrNamespaces update XML attributes to restore the short namespaces found within
 50// the raw XML document.
 51func restoreAttrNamespaces(node xml.StartElement) xml.StartElement {
 52	if len(node.Attr) == 0 {
 53		return node
 54	}
 55
 56	// Generate a mapping of XML namespace values to their short names.
 57	ns := map[string]string{}
 58	for _, a := range node.Attr {
 59		if a.Name.Space == "xmlns" {
 60			ns[a.Value] = a.Name.Local
 61			break
 62		}
 63	}
 64
 65	for i, a := range node.Attr {
 66		if a.Name.Space == "xmlns" {
 67			continue
 68		}
 69		// By default, xml.Decoder will fully resolve these namespaces. So if you had <foo xmlns:bar=baz bar:bin=hi/>
 70		// then by default the second attribute would have the `Name.Space` resolved to `baz`. But we need it to
 71		// continue to resolve as `bar` so we can easily identify it later on.
 72		if v, ok := ns[node.Attr[i].Name.Space]; ok {
 73			node.Attr[i].Name.Space = v
 74		}
 75	}
 76	return node
 77}
 78
 79// GetElement looks for the given tag name at the current level, and returns the element if found, and
 80// skipping over non-matching elements. Returns an error if the node is not found, or if an error occurs while walking
 81// the document.
 82func (d NodeDecoder) GetElement(name string) (t xml.StartElement, err error) {
 83	for {
 84		token, done, err := d.Token()
 85		if err != nil {
 86			return t, err
 87		}
 88		if done {
 89			return t, fmt.Errorf("%s node not found", name)
 90		}
 91		switch {
 92		case strings.EqualFold(name, token.Name.Local):
 93			return token, nil
 94		default:
 95			err = d.Decoder.Skip()
 96			if err != nil {
 97				return t, err
 98			}
 99		}
100	}
101}
102
103// Value provides an abstraction to retrieve char data value within an xml element.
104// The method will return an error if it encounters a nested xml element instead of char data.
105// This method should only be used to retrieve simple type or blob shape values as []byte.
106func (d NodeDecoder) Value() (c []byte, err error) {
107	t, e := d.Decoder.Token()
108	if e != nil {
109		return c, e
110	}
111
112	endElement := d.StartEl.End()
113
114	switch ev := t.(type) {
115	case xml.CharData:
116		c = ev.Copy()
117	case xml.EndElement: // end tag or self-closing
118		if ev == endElement {
119			return []byte{}, err
120		}
121		return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
122	default:
123		return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
124	}
125
126	t, e = d.Decoder.Token()
127	if e != nil {
128		return c, e
129	}
130
131	if ev, ok := t.(xml.EndElement); ok {
132		if ev == endElement {
133			return c, err
134		}
135	}
136
137	return c, fmt.Errorf("expected end element %v, got %T type %v instead", endElement, t, t)
138}
139
140// FetchRootElement takes in a decoder and returns the first start element within the xml body.
141// This function is useful in fetching the start element of an XML response and ignore the
142// comments and preamble
143func FetchRootElement(decoder *xml.Decoder) (startElement xml.StartElement, err error) {
144	for {
145		t, e := decoder.Token()
146		if e != nil {
147			return startElement, e
148		}
149
150		if startElement, ok := t.(xml.StartElement); ok {
151			return startElement, err
152		}
153	}
154}