Skip to content

Commit 46bc1a4

Browse files
committed
Fix escaping by using RFC compliant parser (#496)
By configuring the CSVReader with an RFC-compliant parser the escaping is fixed. - update opencsv dependency to version 5.9
1 parent 71c367c commit 46bc1a4

File tree

3 files changed

+41
-9
lines changed

3 files changed

+41
-9
lines changed

metafacture-csv/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ description = 'Modules for processing comma-separated values'
1919

2020
dependencies {
2121
api project(':metafacture-framework')
22-
implementation 'com.opencsv:opencsv:3.10'
22+
implementation 'com.opencsv:opencsv:5.9'
2323
testImplementation "junit:junit:${versions.junit}"
2424
testImplementation "org.mockito:mockito-core:${versions.mockito}"
2525
}

metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2013, 2014 Deutsche Nationalbibliothek
2+
* Copyright 2013-2024 Deutsche Nationalbibliothek and hbz
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -16,15 +16,18 @@
1616

1717
package org.metafacture.csv;
1818

19+
import com.opencsv.CSVReader;
20+
import com.opencsv.CSVReaderBuilder;
21+
import com.opencsv.RFC4180Parser;
22+
import com.opencsv.RFC4180ParserBuilder;
23+
import com.opencsv.exceptions.CsvException;
1924
import org.metafacture.framework.FluxCommand;
2025
import org.metafacture.framework.StreamReceiver;
2126
import org.metafacture.framework.annotations.Description;
2227
import org.metafacture.framework.annotations.In;
2328
import org.metafacture.framework.annotations.Out;
2429
import org.metafacture.framework.helpers.DefaultObjectPipe;
2530

26-
import com.opencsv.CSVReader;
27-
2831
import java.io.IOException;
2932
import java.io.StringReader;
3033
import java.util.List;
@@ -48,6 +51,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
4851
private String[] header = new String[0];
4952
private int count;
5053
private boolean hasHeader;
54+
private RFC4180Parser parser;
5155

5256
/**
5357
* Creates an instance of {@link CsvDecoder} with a given separator.
@@ -56,6 +60,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
5660
*/
5761
public CsvDecoder(final String separator) {
5862
this.separator = separator.charAt(0);
63+
initializeCsvParser();
5964
}
6065

6166
/**
@@ -65,13 +70,21 @@ public CsvDecoder(final String separator) {
6570
*/
6671
public CsvDecoder(final char separator) {
6772
this.separator = separator;
73+
initializeCsvParser();
6874
}
6975

7076
/**
7177
* Creates an instance of {@link CsvDecoder}. The default separator is
7278
* {@value #DEFAULT_SEP}.
7379
*/
7480
public CsvDecoder() {
81+
initializeCsvParser();
82+
}
83+
84+
private void initializeCsvParser() {
85+
this.parser = new RFC4180ParserBuilder()
86+
.withSeparator(separator)
87+
.build();
7588
}
7689

7790
@Override
@@ -105,18 +118,18 @@ else if (parts.length == header.length) {
105118
}
106119
}
107120

108-
private String[] parseCsv(final String string) {
121+
private String[] parseCsv(final String csv) {
109122
String[] parts = new String[0];
110123
try {
111-
final CSVReader reader = new CSVReader(new StringReader(string),
112-
separator);
124+
final CSVReader reader = new CSVReaderBuilder(new StringReader(csv))
125+
.withCSVParser(parser)
126+
.build();
113127
final List<String[]> lines = reader.readAll();
114128
if (lines.size() > 0) {
115129
parts = lines.get(0);
116130
}
117131
reader.close();
118-
}
119-
catch (final IOException e) {
132+
} catch (final IOException | CsvException e) {
120133
e.printStackTrace();
121134
}
122135
return parts;
@@ -139,5 +152,6 @@ public void setHasHeader(final boolean hasHeader) {
139152
*/
140153
public void setSeparator(final String separator) {
141154
this.separator = separator.charAt(0);
155+
initializeCsvParser();
142156
}
143157
}

metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,22 @@ public void testTabSeparated() {
8989
ordered.verify(receiver).endRecord();
9090
}
9191

92+
/**
93+
In: "a","b\t","c\\t","\","\cd\"
94+
Out: a, b , c\\t, \, \cd\
95+
*/
96+
@Test
97+
public void issue496_escaping() {
98+
decoder.setHasHeader(false);
99+
decoder.process("\"a\",\"b\t\",\"c\\t\",\"\\\",\"\\cd\\\"");
100+
final InOrder ordered = inOrder(receiver);
101+
ordered.verify(receiver).startRecord("1");
102+
ordered.verify(receiver).literal("0", "a");
103+
ordered.verify(receiver).literal("1", "b\t");
104+
ordered.verify(receiver).literal("2", "c\\t");
105+
ordered.verify(receiver).literal("3", "\\");
106+
ordered.verify(receiver).literal("4", "\\cd\\");
107+
ordered.verify(receiver).endRecord();
108+
}
109+
92110
}

0 commit comments

Comments
 (0)