Skip to content

Commit f69332a

Browse files
authored
Changed default XML parser to Woodstox (#482)
Changes the default StAX parser to Woodstox. This will allow us to be more in control of the parser's behavior. Also it's a bit faster than Java's default one.
1 parent 4eecd14 commit f69332a

File tree

5 files changed

+34
-7
lines changed

5 files changed

+34
-7
lines changed

framework/codemodder-base/src/main/java/io/codemodder/DefaultXPathStreamProcessor.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,14 @@ public Optional<XPathStreamProcessChange> process(
7373
XMLEventWriter xmlWriter = outputFactory.createXMLEventWriter(sw);
7474
while (xmlReader.hasNext()) {
7575
final XMLEvent currentEvent = xmlReader.nextEvent();
76-
Location location = currentEvent.getLocation();
77-
if (doesPositionMatch(httpMethodPositions, location)) {
78-
handler.handle(xmlReader, xmlWriter, currentEvent);
76+
// get the position of the last character of the event, that is, the start of the next one
77+
if (xmlReader.hasNext()) {
78+
Location location = xmlReader.peek().getLocation();
79+
if (doesPositionMatch(httpMethodPositions, location)) {
80+
handler.handle(xmlReader, xmlWriter, currentEvent);
81+
} else {
82+
xmlWriter.add(currentEvent);
83+
}
7984
} else {
8085
xmlWriter.add(currentEvent);
8186
}

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ picocli = "4.7.0"
1515
slf4j = "2.0.6"
1616
guice = "5.1.0"
1717
dom4j = "2.1.4"
18+
woodstox = "7.1.0"
1819

1920
[libraries]
2021
autovalue-annotations = { module = "com.google.auto.value:auto-value-annotations", version.ref = "auto-value" }
@@ -27,6 +28,7 @@ contrast-sarif = "com.contrastsecurity:java-sarif:2.0"
2728
gson = "com.google.code.gson:gson:2.9.0"
2829
guice = { module = "com.google.inject:guice", version.ref = "guice" }
2930
immutables = "org.immutables:value:2.9.0"
31+
woodstox = { module = "com.fasterxml.woodstox:woodstox-core", version.ref = "woodstox" }
3032
jackson-core = { module = "com.fasterxml.jackson.core:jackson-core", version.ref = "jackson" }
3133
jackson-yaml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml", version.ref = "jackson" }
3234
javadiff = "io.github.java-diff-utils:java-diff-utils:4.12"

plugins/codemodder-plugin-maven/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@ dependencies {
3131
implementation(libs.diff.match.patch)
3232
implementation(libs.slf4j.simple)
3333
implementation(libs.slf4j.api)
34+
implementation(libs.woodstox)
3435
}

plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/MavenProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*
1919
* <p>a. We skip parent finding if there's not a relativePath declaration (this is by design), so
2020
* sometimes pom finding will fail on purpose b. there are several flags on ProjectModelFactory
21-
* which aren't applied. They relate to verisons, upgrading and particularly: Actives Profiles c. If
21+
* which aren't applied. They relate to versions, upgrading and particularly: Actives Profiles c. If
2222
* you need anything declared in a ~/.m2/settings.xml, we don't support that (e.g., passwords or
2323
* proxies) d. Haven't tested, but I'm almost sure that it wouldn't work on any repo other than
2424
* central e. We allow on this module to do online resolution. HOWEVER by default its offline f. You

plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/operator/FormatCommand.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class FormatCommand extends AbstractCommand {
3838
private static final Logger LOGGER = LoggerFactory.getLogger(FormatCommand.class);
3939

4040
/** StAX InputFactory */
41-
private XMLInputFactory inputFactory = hardenFactory(XMLInputFactory.newInstance());
41+
private XMLInputFactory inputFactory = XMLInputFactory.newInstance().newInstance();
4242

4343
/** StAX OutputFactory */
4444
private XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
@@ -324,8 +324,27 @@ private void parseXmlAndCharset(POMDocument pomFile) throws XMLStreamException,
324324
String originalPomCharsetString =
325325
new String(pomFile.getOriginalPom(), pomFile.getCharset());
326326

327-
String untrimmedOriginalContent =
328-
originalPomCharsetString.substring(elementStart, offset);
327+
var prev = prevEvents.get(prevEvents.size() - 1);
328+
String untrimmedOriginalContent = "";
329+
// is self-closing element, tag is contained within the offset of the next element
330+
if (prev instanceof StartElement
331+
&& prev.getLocation().getCharacterOffset()
332+
== endElementEvent.getLocation().getCharacterOffset()) {
333+
untrimmedOriginalContent =
334+
originalPomCharsetString.substring(
335+
offset, eventReader.peek().getLocation().getCharacterOffset());
336+
} else {
337+
// is empty tag, the last character events is not in between the tags
338+
if (prev.isStartElement()) {
339+
untrimmedOriginalContent =
340+
originalPomCharsetString.substring(
341+
prev.getLocation().getCharacterOffset(),
342+
eventReader.peek().getLocation().getCharacterOffset());
343+
344+
} else {
345+
untrimmedOriginalContent = originalPomCharsetString.substring(elementStart, offset);
346+
}
347+
}
329348

330349
String trimmedOriginalContent = untrimmedOriginalContent.trim();
331350

0 commit comments

Comments
 (0)