Skip to content

Commit 11b1a15

Browse files
authored
Merge pull request #10 from ndw/empty-xml-lang
Correct the defects in xml:lang processing
2 parents edea8ad + cb58804 commit 11b1a15

File tree

17 files changed

+272
-68
lines changed

17 files changed

+272
-68
lines changed

README.org

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
* Saxon XInclude Processor
2+
:PROPERTIES:
3+
:CUSTOM_ID: h-A2085245-5C37-47C0-A798-B9BA74A7E5DD
4+
:END:
25

36
This is an [[http://www.w3.org/TR/xinclude][XInclude]] processor. It operates on the Saxon data model; it
47
is not a streaming processor.
58

9+
At the Java level, it works like this:
10+
611
#+BEGIN_SRC java
712
XdmNode document = … // You got a document from somewhere, right?
813
XInclude xinclude = new XInclude();
@@ -17,6 +22,9 @@ schemes could be added with relative ease, but that may be a bit
1722
aspirational.
1823

1924
* Extension function
25+
:PROPERTIES:
26+
:CUSTOM_ID: h-9CC4637B-261F-45DB-8F6B-241DC0E722AF
27+
:END:
2028

2129
You can use this XInclude implementation from XSLT as an extension
2230
function. Pass the command line option ~-init:com.nwalsh.xslt.Register~
@@ -48,7 +56,45 @@ map is a map from QName keys to values. Only two keys are recognized:
4856
value for each is “true”. You can omit the second argument entirely if
4957
you’re happy with those defaults.
5058

59+
** Command line example
60+
:PROPERTIES:
61+
:CUSTOM_ID: h-38B19AEE-42B0-4828-A637-5E146C8334D7
62+
:END:
63+
64+
You can use the ~xinclude.xsl~ file included in the distribution to
65+
expand the XIncludes in a document.
66+
67+
The trickiest part, as usual with Java applications, is arranging for
68+
the correct classpath. You need to include the Saxon jar file, the
69+
SInclude jar file, and any other jars that are necessary for
70+
dependencies (for example, the XML Resolver jar files for Saxon 11+).
71+
72+
If you have downloaded Saxon HE 11.5 and unzipped it into the
73+
directory ~saxon~, the following classpath will work:
74+
75+
#+BEGIN_SRC
76+
export CLASSPATH=saxon/saxon-he-11.5.jar\
77+
:saxon/lib/xmlresolver-4.6.4.jar\
78+
:build/libs/sinclude-4.2.1.jar
79+
#+END_SRC
80+
81+
On Windows, the syntax is different. And you may find it more
82+
convienent to run from a shell script. The actual transformation is:
83+
84+
#+BEGIN_SRC
85+
java -cp $CLASSPATH net.sf.saxon.Transform \
86+
-init:com.nwalsh.xslt.Register \
87+
-xsl:src/test/resources/xinclude.xsl \
88+
-s:input-document.xml -o:output-docuent.xml
89+
#+END_SRC
90+
91+
The =-init:= option will make sure that Saxon can find and use the
92+
extension function.
93+
5194
* XPointer schemes
95+
:PROPERTIES:
96+
:CUSTOM_ID: h-38009E5E-7A17-49A7-9857-22D7201BB1D8
97+
:END:
5298

5399
The standard [[https://www.w3.org/TR/xptr-xmlns/][xmlns()]] and [[https://www.w3.org/TR/xptr-element/][element()]] schemes are supported for XML parsing. An
54100
xpath() scheme is also supported. It evaluates an XPath expression against the document.

gradle.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ org.gradle.jvmargs=-Xmx4096m
22

33
basename=sinclude
44
sincludeTitle=Saxon XInclude
5-
sincludeVersion=4.2.1
5+
sincludeVersion=5.0.0
66

7-
saxonVersion=11.4
7+
saxonVersion=11.5

src/main/java/com/nwalsh/sinclude/XInclude.java

Lines changed: 77 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,60 @@
11
package com.nwalsh.sinclude;
22

33
import com.nwalsh.DebuggingLogger;
4-
import com.nwalsh.sinclude.exceptions.*;
5-
import com.nwalsh.sinclude.schemes.*;
4+
import com.nwalsh.sinclude.exceptions.XIncludeException;
5+
import com.nwalsh.sinclude.exceptions.XIncludeFallbackException;
6+
import com.nwalsh.sinclude.exceptions.XIncludeLoopException;
7+
import com.nwalsh.sinclude.exceptions.XIncludeNoFragmentException;
8+
import com.nwalsh.sinclude.exceptions.XIncludeSyntaxException;
9+
import com.nwalsh.sinclude.schemes.ElementScheme;
10+
import com.nwalsh.sinclude.schemes.RFC5147Scheme;
11+
import com.nwalsh.sinclude.schemes.SearchScheme;
12+
import com.nwalsh.sinclude.schemes.XPathScheme;
13+
import com.nwalsh.sinclude.schemes.XmlnsScheme;
14+
import com.nwalsh.sinclude.utils.NodeUtils;
615
import com.nwalsh.sinclude.utils.ReceiverUtils;
7-
import com.nwalsh.sinclude.xpointer.*;
16+
import com.nwalsh.sinclude.xpointer.DefaultFragmentIdParser;
17+
import com.nwalsh.sinclude.xpointer.FragmentIdParser;
18+
import com.nwalsh.sinclude.xpointer.ParseType;
19+
import com.nwalsh.sinclude.xpointer.Scheme;
20+
import com.nwalsh.sinclude.xpointer.SchemeData;
21+
import com.nwalsh.sinclude.xpointer.SelectionResult;
822
import net.sf.saxon.event.Receiver;
923
import net.sf.saxon.event.ReceiverOption;
1024
import net.sf.saxon.expr.parser.Loc;
11-
import net.sf.saxon.om.*;
12-
import net.sf.saxon.s9api.*;
25+
import net.sf.saxon.om.AttributeInfo;
26+
import net.sf.saxon.om.AttributeMap;
27+
import net.sf.saxon.om.EmptyAttributeMap;
28+
import net.sf.saxon.om.FingerprintedQName;
29+
import net.sf.saxon.om.NodeInfo;
30+
import net.sf.saxon.om.NodeName;
31+
import net.sf.saxon.s9api.Axis;
32+
import net.sf.saxon.s9api.DocumentBuilder;
33+
import net.sf.saxon.s9api.Processor;
34+
import net.sf.saxon.s9api.QName;
35+
import net.sf.saxon.s9api.SaxonApiException;
36+
import net.sf.saxon.s9api.Serializer;
37+
import net.sf.saxon.s9api.XdmDestination;
38+
import net.sf.saxon.s9api.XdmNode;
39+
import net.sf.saxon.s9api.XdmNodeKind;
40+
import net.sf.saxon.s9api.XdmSequenceIterator;
1341
import net.sf.saxon.trans.XPathException;
1442
import net.sf.saxon.type.BuiltInAtomicType;
1543

16-
import javax.xml.XMLConstants;
1744
import java.io.File;
1845
import java.net.URI;
19-
import java.util.*;
46+
import java.util.Collections;
47+
import java.util.HashMap;
48+
import java.util.HashSet;
49+
import java.util.Stack;
50+
import java.util.Vector;
2051
import java.util.regex.Matcher;
2152
import java.util.regex.Pattern;
2253

54+
import static com.nwalsh.sinclude.utils.NodeUtils.xml_base;
55+
import static com.nwalsh.sinclude.utils.NodeUtils.xml_id;
56+
import static com.nwalsh.sinclude.utils.NodeUtils.xml_lang;
57+
2358
public class XInclude {
2459
private static final String NS_XML = "http://www.w3.org/XML/1998/namespace";
2560
private static final String NS_XINCLUDE = "http://www.w3.org/2001/XInclude";
@@ -28,10 +63,6 @@ public class XInclude {
2863

2964
private static final String localAttrNS = "http://www.w3.org/2001/XInclude/local-attributes";
3065

31-
private static final QName xml_base = new QName("xml", XMLConstants.XML_NS_URI, "base");
32-
private static final QName xml_lang = new QName("xml", XMLConstants.XML_NS_URI, "lang");
33-
private static final QName xml_id = new QName("xml", XMLConstants.XML_NS_URI, "id");
34-
3566
private static final QName _set_xml_id = new QName("", "set-xml-id");
3667
private static final QName _accept = new QName("", "accept");
3768
private static final QName _accept_language = new QName("", "accept-language");
@@ -355,6 +386,10 @@ public XdmNode process(XdmNode node) throws XPathException {
355386
if (xptr != null) {
356387
Exception lastException = null;
357388
boolean success = false;
389+
390+
fragmentIdParser.setProperty(xml_base, node.getParent().getBaseURI().toString());
391+
fragmentIdParser.setProperty(xml_lang, NodeUtils.getLang(node.getParent()));
392+
358393
Scheme[] pointers = fragmentIdParser.parseFragmentIdentifier(parse, xptr);
359394
for (Scheme pointer : pointers) {
360395
if (!success) {
@@ -384,6 +419,10 @@ public XdmNode process(XdmNode node) throws XPathException {
384419
}
385420
}
386421
}
422+
423+
fragmentIdParser.setProperty(xml_base, null);
424+
fragmentIdParser.setProperty(xml_lang, null);
425+
387426
if (!success) {
388427
if (lastException != null) {
389428
throw new XIncludeNoFragmentException("Failed to locate fragment: " + xptr + " (" + lastException.getMessage() + ")", lastException);
@@ -419,14 +458,17 @@ public XdmNode process(XdmNode node) throws XPathException {
419458
}
420459

421460
private XdmNode fixup(XdmNode xinclude, XdmNode document, String setId) {
422-
// Fixing up xml:base is usually handled by the fragid processor.
461+
// Fixup is usually handled by the fragid processor.
462+
423463
// It's only ever true here if we're XIncluding a whole document.
424-
// Consequently, fixupLang never applies here.
425464
if (document.getNodeKind() != XdmNodeKind.DOCUMENT) {
426465
// This is an internal error and should never happen
427466
throw new IllegalArgumentException("XInclude fixup can only be called on a document");
428467
}
429468

469+
String contextLanguage = NodeUtils.getLang(xinclude.getParent());
470+
String contextBaseURI = NodeUtils.getLang(xinclude.getParent());
471+
430472
try {
431473
XdmDestination destination = ReceiverUtils.makeDestination(document);
432474
Receiver receiver = ReceiverUtils.makeReceiver(document, destination);
@@ -473,15 +515,28 @@ private XdmNode fixup(XdmNode xinclude, XdmNode document, String setId) {
473515
}
474516
}
475517

476-
if (getFixupXmlBase()) {
477-
// If fixupXmlBase is true, this nodes base URI will be correct because either:
478-
// 1. The XPathScheme will have already done fixup or
479-
// 2. The whole document is being XIncluded (in which case fixup is still necessary)
480-
AttributeInfo base = new AttributeInfo(fq_xml_base,
481-
BuiltInAtomicType.UNTYPED_ATOMIC,
482-
node.getBaseURI().toASCIIString(),
483-
Loc.NONE, ReceiverOption.NONE);
484-
amap = amap.put(base);
518+
if (getFixupXmlBase() && node.getBaseURI() != null) {
519+
if (contextBaseURI == null || !contextBaseURI.equals(node.getBaseURI().toString())) {
520+
AttributeInfo base = new AttributeInfo(fq_xml_base,
521+
BuiltInAtomicType.UNTYPED_ATOMIC,
522+
node.getBaseURI().toString(),
523+
Loc.NONE, ReceiverOption.NONE);
524+
amap = amap.put(base);
525+
}
526+
}
527+
528+
if (getFixupXmlLang()) {
529+
String lang = NodeUtils.getLang(node);
530+
if (lang == null && contextLanguage != null) {
531+
lang = "";
532+
}
533+
if (lang != null) {
534+
AttributeInfo xml_lang = new AttributeInfo(fq_xml_lang,
535+
BuiltInAtomicType.UNTYPED_ATOMIC,
536+
lang,
537+
Loc.NONE, ReceiverOption.NONE);
538+
amap = amap.put(xml_lang);
539+
}
485540
}
486541

487542
for (AttributeInfo ainfo : attributes) {

src/main/java/com/nwalsh/sinclude/schemes/AbstractXmlScheme.java

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.nwalsh.sinclude.XInclude;
44
import com.nwalsh.sinclude.exceptions.FixupException;
5+
import com.nwalsh.sinclude.utils.NodeUtils;
56
import com.nwalsh.sinclude.utils.ReceiverUtils;
67
import net.sf.saxon.event.Receiver;
78
import net.sf.saxon.event.ReceiverOption;
@@ -11,35 +12,25 @@
1112
import net.sf.saxon.om.FingerprintedQName;
1213
import net.sf.saxon.om.NodeInfo;
1314
import net.sf.saxon.s9api.Axis;
14-
import net.sf.saxon.s9api.QName;
1515
import net.sf.saxon.s9api.XdmDestination;
1616
import net.sf.saxon.s9api.XdmNode;
1717
import net.sf.saxon.s9api.XdmNodeKind;
1818
import net.sf.saxon.s9api.XdmSequenceIterator;
1919
import net.sf.saxon.trans.XPathException;
2020
import net.sf.saxon.type.BuiltInAtomicType;
2121

22-
import javax.xml.XMLConstants;
22+
import static com.nwalsh.sinclude.utils.NodeUtils.xml_base;
23+
import static com.nwalsh.sinclude.utils.NodeUtils.xml_lang;
2324

2425
public abstract class AbstractXmlScheme {
25-
public static final QName xml_lang = new QName("xml", XMLConstants.XML_NS_URI, "lang");
26-
public static final QName xml_base = new QName("xml", XMLConstants.XML_NS_URI, "base");
27-
2826
private static final FingerprintedQName fq_xml_lang =
2927
new FingerprintedQName(xml_lang.getPrefix(), xml_lang.getNamespaceURI(), xml_lang.getLocalName());
3028
private static final FingerprintedQName fq_xml_base =
3129
new FingerprintedQName(xml_base.getPrefix(), xml_base.getNamespaceURI(), xml_base.getLocalName());
3230

3331
protected XInclude xinclude = null;
34-
35-
public String getLang(XdmNode node) {
36-
String lang = null;
37-
while (lang == null && node.getNodeKind() == XdmNodeKind.ELEMENT) {
38-
lang = node.getAttributeValue(xml_lang);
39-
node = node.getParent();
40-
}
41-
return lang;
42-
}
32+
protected String contextLanguage = null;
33+
protected String contextBaseURI = null;
4334

4435
protected XdmNode fixup(XdmNode node) {
4536
if (!xinclude.getFixupXmlBase() && !xinclude.getFixupXmlLang()) {
@@ -48,7 +39,10 @@ protected XdmNode fixup(XdmNode node) {
4839

4940
String lang = null;
5041
if (xinclude.getFixupXmlLang() && node.getAttributeValue(xml_lang) == null) {
51-
lang = getLang(node);
42+
lang = NodeUtils.getLang(node);
43+
if (lang == null && contextLanguage != null) {
44+
lang = ""; // Issue #8
45+
}
5246
}
5347

5448
if (node.getNodeKind() != XdmNodeKind.ELEMENT) {
@@ -64,11 +58,13 @@ protected XdmNode fixup(XdmNode node) {
6458
AttributeMap attributes = node.getUnderlyingNode().attributes();
6559

6660
if (xinclude.getFixupXmlBase() && node.getBaseURI() != null) {
67-
AttributeInfo base = new AttributeInfo(fq_xml_base,
68-
BuiltInAtomicType.UNTYPED_ATOMIC,
69-
node.getBaseURI().toASCIIString(),
70-
Loc.NONE, ReceiverOption.NONE);
71-
attributes = attributes.put(base);
61+
if (contextBaseURI == null || !contextBaseURI.equals(node.getBaseURI().toString())) {
62+
AttributeInfo base = new AttributeInfo(fq_xml_base,
63+
BuiltInAtomicType.UNTYPED_ATOMIC,
64+
node.getBaseURI().toString(),
65+
Loc.NONE, ReceiverOption.NONE);
66+
attributes = attributes.put(base);
67+
}
7268
}
7369

7470
if (lang != null) {

src/main/java/com/nwalsh/sinclude/schemes/ElementScheme.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ public class ElementScheme extends XPathScheme {
1414
private String fragid = null;
1515

1616
@Override
17-
public ElementScheme newInstance(String fdata, XInclude xinclude) {
17+
public ElementScheme newInstance(String fdata, XInclude xinclude, String contextLanguage, String contextBaseURI) {
1818
ElementScheme scheme = new ElementScheme();
1919
scheme.xinclude = xinclude;
2020
scheme.fragid = fdata;
2121
scheme.xpath = toXPath(fdata);
22+
scheme.contextLanguage = contextLanguage;
23+
scheme.contextBaseURI = contextBaseURI;
2224
return scheme;
2325
}
2426

src/main/java/com/nwalsh/sinclude/schemes/XPathScheme.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ public class XPathScheme extends AbstractXmlScheme implements XmlScheme {
2121
protected String xpath = null;
2222

2323
@Override
24-
public XPathScheme newInstance(String fdata, XInclude xinclude) {
24+
public XPathScheme newInstance(String fdata, XInclude xinclude, String contextLanguage, String contextBaseURI) {
2525
XPathScheme scheme = new XPathScheme();
2626
scheme.xpath = fdata;
2727
scheme.xinclude = xinclude;
28+
scheme.contextLanguage = contextLanguage;
29+
scheme.contextBaseURI = contextBaseURI;
2830
return scheme;
2931
}
3032

src/main/java/com/nwalsh/sinclude/schemes/XmlnsScheme.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public class XmlnsScheme implements XmlScheme {
1717
private XmlnsData data = null;
1818

1919
@Override
20-
public XmlnsScheme newInstance(String fdata, XInclude xinclude) {
20+
public XmlnsScheme newInstance(String fdata, XInclude xinclude, String contextLanguage, String contexBaseURI) {
2121
XmlnsData data = null;
2222
Matcher matcher = nsRE.matcher(fdata);
2323
if (matcher.find()) {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.nwalsh.sinclude.utils;
2+
3+
import net.sf.saxon.s9api.QName;
4+
import net.sf.saxon.s9api.XdmNode;
5+
import net.sf.saxon.s9api.XdmNodeKind;
6+
7+
import javax.xml.XMLConstants;
8+
9+
public class NodeUtils {
10+
public static final QName xml_id = new QName("xml", XMLConstants.XML_NS_URI, "id");
11+
public static final QName xml_lang = new QName("xml", XMLConstants.XML_NS_URI, "lang");
12+
public static final QName xml_base = new QName("xml", XMLConstants.XML_NS_URI, "base");
13+
14+
public static String getLang(XdmNode node) {
15+
String lang = null;
16+
while (lang == null && node.getNodeKind() == XdmNodeKind.ELEMENT) {
17+
lang = node.getAttributeValue(xml_lang);
18+
node = node.getParent();
19+
}
20+
return lang;
21+
}
22+
}
23+

0 commit comments

Comments
 (0)