Skip to content

Commit da6f02b

Browse files
authored
Merge pull request #260 from marklogic/feature/file-fix
Fixing issue with URIs that cannot be parsed
2 parents 8feae96 + b962077 commit da6f02b

File tree

3 files changed

+47
-12
lines changed

3 files changed

+47
-12
lines changed

src/main/java/com/marklogic/spark/writer/file/FileUtil.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
package com.marklogic.spark.writer.file;
22

3-
import com.marklogic.spark.ConnectorException;
3+
import com.marklogic.spark.Util;
44

55
import java.net.URI;
66
import java.net.URISyntaxException;
77

88
abstract class FileUtil {
99

1010
static String makePathFromDocumentURI(String documentURI) {
11-
// Copied from MLCP
12-
URI uri;
11+
// Mostly copied from MLCP.
1312
try {
14-
uri = new URI(documentURI);
13+
URI uri = new URI(documentURI);
14+
// The isOpaque check is made because an opaque URI will not have a path.
15+
return uri.isOpaque() ? uri.getSchemeSpecificPart() : uri.getPath();
1516
} catch (URISyntaxException e) {
16-
throw new ConnectorException(String.format("Unable to construct URI from: %s", documentURI), e);
17+
// MLCP logs errors from parsing the URI at the "WARN" level. That seems noisy, as large numbers of URIs
18+
// could e.g. have spaces in them. So DEBUG is used instead.
19+
if (Util.MAIN_LOGGER.isDebugEnabled()) {
20+
Util.MAIN_LOGGER.debug("Unable to parse document URI: {}; will use unparsed URI as file path.", documentURI);
21+
}
22+
return documentURI;
1723
}
18-
// The isOpaque check is made because an opaque URI will not have a path.
19-
return uri.isOpaque() ? uri.getSchemeSpecificPart() : uri.getPath();
2024
}
2125

2226
private FileUtil() {

src/test/java/com/marklogic/spark/writer/file/FileUtilTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package com.marklogic.spark.writer.file;
22

3-
import com.marklogic.spark.ConnectorException;
43
import org.junit.jupiter.api.Test;
54

65
import static org.junit.jupiter.api.Assertions.assertEquals;
7-
import static org.junit.jupiter.api.Assertions.assertThrows;
86

97
class FileUtilTest {
108

@@ -22,7 +20,9 @@ void makePathFromOpaqueURI() {
2220

2321
@Test
2422
void makePathWithInvalidURI() {
25-
ConnectorException ex = assertThrows(ConnectorException.class, () -> FileUtil.makePathFromDocumentURI(":::"));
26-
assertEquals("Unable to construct URI from: :::", ex.getMessage());
23+
String uri = FileUtil.makePathFromDocumentURI("has space.json");
24+
assertEquals("has space.json", uri, "If a java.net.URI cannot be constructed - in this case, it's due to " +
25+
"the space in the string - then the error should be logged at the DEBUG level and the original value " +
26+
"should be returned.");
2727
}
2828
}

src/test/java/com/marklogic/spark/writer/file/WriteDocumentFilesTest.java

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package com.marklogic.spark.writer.file;
22

33
import com.fasterxml.jackson.databind.JsonNode;
4-
import com.fasterxml.jackson.databind.ObjectMapper;
54
import com.marklogic.client.document.DocumentWriteSet;
65
import com.marklogic.client.document.TextDocumentManager;
76
import com.marklogic.client.io.DocumentMetadataHandle;
@@ -85,4 +84,36 @@ void variousURIs(@TempDir Path tempDir) throws Exception {
8584
content = new String(FileCopyUtils.copyToByteArray(files[filenames.indexOf("example2.txt")]));
8685
assertEquals("Opaque URI", content);
8786
}
87+
88+
@Test
89+
void uriHasSpace(@TempDir Path tempDir) {
90+
final String uri = "/has space.json";
91+
92+
newSparkSession().read().format(CONNECTOR_IDENTIFIER)
93+
.load("src/test/resources/spark-json/single-object.json")
94+
.write().format(CONNECTOR_IDENTIFIER)
95+
.option(Options.CLIENT_URI, makeClientUri())
96+
.option(Options.WRITE_COLLECTIONS, "char-test")
97+
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
98+
.option(Options.WRITE_URI_TEMPLATE, uri)
99+
.mode(SaveMode.Append)
100+
.save();
101+
102+
sparkSession.read().format(CONNECTOR_IDENTIFIER)
103+
.option(Options.CLIENT_URI, makeClientUri())
104+
.option(Options.READ_DOCUMENTS_URIS, uri)
105+
.load()
106+
.write().format(CONNECTOR_IDENTIFIER)
107+
.mode(SaveMode.Append)
108+
.save(tempDir.toFile().getAbsolutePath());
109+
110+
File dir = tempDir.toFile();
111+
assertEquals(1, dir.listFiles().length);
112+
String filename = dir.listFiles()[0].getName();
113+
System.out.println(filename);
114+
assertEquals("has space.json", filename,
115+
"Just like MLCP, if the connector cannot construct a java.net.URI from the document URI (it will fail " +
116+
"due to a space), the error should be logged and the file should be written with its unaltered " +
117+
"document URI used for the file path.");
118+
}
88119
}

0 commit comments

Comments
 (0)