Skip to content

Commit 1430f15

Browse files
cristopermmcdole
authored andcommitted
Keep track of the current xml:base value
Tracks xml:base attributes in a stack of *url.URLs. Consumers of the parser can access the top-level URL through `XMLPullParser.BaseStack.Top()` This is useful for applications that need to resolve URLs in XML documents relative to the xml:base attributes. To that end, a helper method is provided which will resolve a relative string to an absolute URL according to the current base: `func (p *XMLPullParser) XmlBaseResolveUrl(u string) (*url.URL, error)` Includes a single test. It is not comprehensive, but it checks for xml:base to two levels, tests resolving a string against the current base, as well as resolution of relative xml:base values.
1 parent 1ba3125 commit 1430f15

File tree

2 files changed

+118
-1
lines changed

2 files changed

+118
-1
lines changed

xpp.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@ import (
55
"errors"
66
"fmt"
77
"io"
8+
"net/url"
89
"strings"
910
)
1011

1112
type XMLEventType int
1213
type CharsetReader func(charset string, input io.Reader) (io.Reader, error)
1314

15+
const xmlNSURI = "http://www.w3.org/XML/1998/namespace"
16+
1417
const (
1518
StartDocument XMLEventType = iota
1619
EndDocument
@@ -24,10 +27,33 @@ const (
2427
// TODO: CDSECT ?
2528
)
2629

30+
type urlStack []*url.URL
31+
32+
func (s *urlStack) push(u *url.URL) {
33+
*s = append([]*url.URL{u}, *s...)
34+
}
35+
36+
func (s *urlStack) pop() *url.URL {
37+
if s == nil || len(*s) == 0 {
38+
return nil
39+
}
40+
var top *url.URL
41+
top, *s = (*s)[0], (*s)[1:]
42+
return top
43+
}
44+
45+
func (s *urlStack) Top() *url.URL {
46+
if s == nil || len(*s) == 0 {
47+
return nil
48+
}
49+
return (*s)[0]
50+
}
51+
2752
type XMLPullParser struct {
2853
// Document State
2954
Spaces map[string]string
3055
SpacesStack []map[string]string
56+
BaseStack urlStack
3157

3258
// Token State
3359
Depth int
@@ -214,6 +240,7 @@ func (p *XMLPullParser) DecodeElement(v interface{}) error {
214240
p.Depth--
215241
p.Name = name
216242
p.token = nil
243+
p.popBase()
217244
return nil
218245
}
219246

@@ -263,6 +290,26 @@ func (p *XMLPullParser) EventType(t xml.Token) (event XMLEventType) {
263290
return
264291
}
265292

293+
// resolve the given string as a URL relative to current xml:base
294+
func (p *XMLPullParser) XmlBaseResolveUrl(u string) (*url.URL, error) {
295+
curr := p.BaseStack.Top()
296+
if curr == nil {
297+
return nil, nil
298+
}
299+
300+
relURL, err := url.Parse(u)
301+
if err != nil {
302+
return nil, err
303+
}
304+
if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
305+
// There's no reason someone would use a path in xml:base if they
306+
// didn't mean for it to be a directory
307+
curr.Path = curr.Path + "/"
308+
}
309+
absURL := curr.ResolveReference(relURL)
310+
return absURL, nil
311+
}
312+
266313
func (p *XMLPullParser) processToken(t xml.Token) {
267314
switch tt := t.(type) {
268315
case xml.StartElement:
@@ -286,6 +333,7 @@ func (p *XMLPullParser) processStartToken(t xml.StartElement) {
286333
p.Name = t.Name.Local
287334
p.Space = t.Name.Space
288335
p.trackNamespaces(t)
336+
p.pushBase()
289337
}
290338

291339
func (p *XMLPullParser) processEndToken(t xml.EndElement) {
@@ -297,6 +345,7 @@ func (p *XMLPullParser) processEndToken(t xml.EndElement) {
297345
p.Spaces = p.SpacesStack[len(p.SpacesStack)-1]
298346
}
299347
p.Name = t.Name.Local
348+
p.popBase()
300349
}
301350

302351
func (p *XMLPullParser) processCharDataToken(t xml.CharData) {
@@ -340,3 +389,40 @@ func (p *XMLPullParser) trackNamespaces(t xml.StartElement) {
340389
p.Spaces = newSpace
341390
p.SpacesStack = append(p.SpacesStack, newSpace)
342391
}
392+
393+
// returns the popped base URL
394+
func (p *XMLPullParser) popBase() string {
395+
url := p.BaseStack.pop()
396+
if url != nil {
397+
return url.String()
398+
}
399+
return ""
400+
}
401+
402+
// Searches current attributes for xml:base and updates the urlStack
403+
func (p *XMLPullParser) pushBase() error {
404+
var base string
405+
// search list of attrs for "xml:base"
406+
for _, attr := range p.Attrs {
407+
if attr.Name.Local == "base" && attr.Name.Space == xmlNSURI {
408+
base = attr.Value
409+
break
410+
}
411+
}
412+
if base == "" {
413+
// no base attribute found
414+
return nil
415+
}
416+
417+
newURL, err := url.Parse(base)
418+
if err != nil {
419+
return err
420+
}
421+
422+
topURL := p.BaseStack.Top()
423+
if topURL != nil {
424+
newURL = topURL.ResolveReference(newURL)
425+
}
426+
p.BaseStack.push(newURL)
427+
return nil
428+
}

xpp_test.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import (
55
"io"
66
"testing"
77

8-
"github.com/mmcdole/goxpp"
8+
xpp "github.com/mmcdole/goxpp"
99
"github.com/stretchr/testify/assert"
1010
)
1111

@@ -85,6 +85,37 @@ func TestDecodeElementDepth(t *testing.T) {
8585
p.DecodeElement(&v{})
8686
}
8787

88+
func TestXMLBase(t *testing.T) {
89+
crReader := func(charset string, input io.Reader) (io.Reader, error) {
90+
return input, nil
91+
}
92+
r := bytes.NewBufferString(`<root xml:base="https://example.org/"><d2 xml:base="relative">foo</d2><d2>bar</d2></root>`)
93+
p := xpp.NewXMLPullParser(r, false, crReader)
94+
95+
type v struct{}
96+
97+
// move to root
98+
p.NextTag()
99+
assert.Equal(t, "root", p.Name)
100+
assert.Equal(t, "https://example.org/", p.BaseStack.Top().String())
101+
102+
// decode first <d2>
103+
p.NextTag()
104+
assert.Equal(t, "d2", p.Name)
105+
assert.Equal(t, "https://example.org/relative", p.BaseStack.Top().String())
106+
107+
resolved, err := p.XmlBaseResolveUrl("test")
108+
assert.NoError(t, err)
109+
assert.Equal(t, "https://example.org/relative/test", resolved.String())
110+
p.DecodeElement(&v{})
111+
112+
// decode second <d2>
113+
p.NextTag()
114+
assert.Equal(t, "d2", p.Name)
115+
assert.Equal(t, "https://example.org/", p.BaseStack.Top().String())
116+
p.DecodeElement(&v{})
117+
}
118+
88119
func toNextStart(t *testing.T, p *xpp.XMLPullParser) {
89120
for {
90121
tok, err := p.NextToken()

0 commit comments

Comments
 (0)