Skip to content

Commit b962077

Browse files
committed
Fixing issue with URIs that cannot be parsed
Was previously throwing an error, but MLCP just logs the error and uses the URI as the file path. We're now doing the same thing.
1 parent 8feae96 commit b962077

File tree

3 files changed

+47
-12
lines changed

3 files changed

+47
-12
lines changed

src/main/java/com/marklogic/spark/writer/file/FileUtil.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
package com.marklogic.spark.writer.file;
22

3-
import com.marklogic.spark.ConnectorException;
3+
import com.marklogic.spark.Util;
44

55
import java.net.URI;
66
import java.net.URISyntaxException;
77

88
abstract class FileUtil {
99

1010
static String makePathFromDocumentURI(String documentURI) {
11-
// Copied from MLCP
12-
URI uri;
11+
// Mostly copied from MLCP.
1312
try {
14-
uri = new URI(documentURI);
13+
URI uri = new URI(documentURI);
14+
// The isOpaque check is made because an opaque URI will not have a path.
15+
return uri.isOpaque() ? uri.getSchemeSpecificPart() : uri.getPath();
1516
} catch (URISyntaxException e) {
16-
throw new ConnectorException(String.format("Unable to construct URI from: %s", documentURI), e);
17+
// MLCP logs errors from parsing the URI at the "WARN" level. That seems noisy, as large numbers of URIs
18+
// could e.g. have spaces in them. So DEBUG is used instead.
19+
if (Util.MAIN_LOGGER.isDebugEnabled()) {
20+
Util.MAIN_LOGGER.debug("Unable to parse document URI: {}; will use unparsed URI as file path.", documentURI);
21+
}
22+
return documentURI;
1723
}
18-
// The isOpaque check is made because an opaque URI will not have a path.
19-
return uri.isOpaque() ? uri.getSchemeSpecificPart() : uri.getPath();
2024
}
2125

2226
private FileUtil() {

src/test/java/com/marklogic/spark/writer/file/FileUtilTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package com.marklogic.spark.writer.file;
22

3-
import com.marklogic.spark.ConnectorException;
43
import org.junit.jupiter.api.Test;
54

65
import static org.junit.jupiter.api.Assertions.assertEquals;
7-
import static org.junit.jupiter.api.Assertions.assertThrows;
86

97
class FileUtilTest {
108

@@ -22,7 +20,9 @@ void makePathFromOpaqueURI() {
2220

2321
@Test
2422
void makePathWithInvalidURI() {
25-
ConnectorException ex = assertThrows(ConnectorException.class, () -> FileUtil.makePathFromDocumentURI(":::"));
26-
assertEquals("Unable to construct URI from: :::", ex.getMessage());
23+
String uri = FileUtil.makePathFromDocumentURI("has space.json");
24+
assertEquals("has space.json", uri, "If a java.net.URI cannot be constructed - in this case, it's due to " +
25+
"the space in the string - then the error should be logged at the DEBUG level and the original value " +
26+
"should be returned.");
2727
}
2828
}

src/test/java/com/marklogic/spark/writer/file/WriteDocumentFilesTest.java

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package com.marklogic.spark.writer.file;
22

33
import com.fasterxml.jackson.databind.JsonNode;
4-
import com.fasterxml.jackson.databind.ObjectMapper;
54
import com.marklogic.client.document.DocumentWriteSet;
65
import com.marklogic.client.document.TextDocumentManager;
76
import com.marklogic.client.io.DocumentMetadataHandle;
@@ -85,4 +84,36 @@ void variousURIs(@TempDir Path tempDir) throws Exception {
8584
content = new String(FileCopyUtils.copyToByteArray(files[filenames.indexOf("example2.txt")]));
8685
assertEquals("Opaque URI", content);
8786
}
87+
88+
@Test
89+
void uriHasSpace(@TempDir Path tempDir) {
90+
final String uri = "/has space.json";
91+
92+
newSparkSession().read().format(CONNECTOR_IDENTIFIER)
93+
.load("src/test/resources/spark-json/single-object.json")
94+
.write().format(CONNECTOR_IDENTIFIER)
95+
.option(Options.CLIENT_URI, makeClientUri())
96+
.option(Options.WRITE_COLLECTIONS, "char-test")
97+
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
98+
.option(Options.WRITE_URI_TEMPLATE, uri)
99+
.mode(SaveMode.Append)
100+
.save();
101+
102+
sparkSession.read().format(CONNECTOR_IDENTIFIER)
103+
.option(Options.CLIENT_URI, makeClientUri())
104+
.option(Options.READ_DOCUMENTS_URIS, uri)
105+
.load()
106+
.write().format(CONNECTOR_IDENTIFIER)
107+
.mode(SaveMode.Append)
108+
.save(tempDir.toFile().getAbsolutePath());
109+
110+
File dir = tempDir.toFile();
111+
assertEquals(1, dir.listFiles().length);
112+
String filename = dir.listFiles()[0].getName();
113+
System.out.println(filename);
114+
assertEquals("has space.json", filename,
115+
"Just like MLCP, if the connector cannot construct a java.net.URI from the document URI (it will fail " +
116+
"due to a space), the error should be logged and the file should be written with its unaltered " +
117+
"document URI used for the file path.");
118+
}
88119
}

0 commit comments

Comments
 (0)