Skip to content

Commit 88cfce3

Browse files
authored
Merge pull request #296 from marklogic/feature/17095-swap-order-of-archive
MLE-17059 Changing order of entries in archive
2 parents 9a4ac2d + 37988bc commit 88cfce3

File tree

3 files changed

+43
-6
lines changed

3 files changed

+43
-6
lines changed

src/main/java/com/marklogic/spark/reader/file/ArchiveFileReader.java

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ class ArchiveFileReader implements PartitionReader<InternalRow> {
2929
private int nextFilePathIndex;
3030
private InternalRow nextRowToReturn;
3131

32+
// Legacy = content first, then metadata.
33+
private Boolean isLegacyFormat;
34+
3235
ArchiveFileReader(FilePartition filePartition, FileContext fileContext) {
3336
this.filePartition = filePartition;
3437
this.fileContext = fileContext;
@@ -45,15 +48,23 @@ class ArchiveFileReader implements PartitionReader<InternalRow> {
4548
@Override
4649
public boolean next() {
4750
try {
48-
ZipEntry contentZipEntry = FileUtil.findNextFileEntry(currentZipInputStream);
49-
if (contentZipEntry == null) {
51+
ZipEntry nextZipEntry = FileUtil.findNextFileEntry(currentZipInputStream);
52+
if (nextZipEntry == null) {
5053
return openNextFileAndReadNextEntry();
5154
}
55+
56+
if (isLegacyFormat == null) {
57+
isLegacyFormat = !nextZipEntry.getName().endsWith(".metadata");
58+
}
59+
if (!isLegacyFormat) {
60+
return readMetadataFollowedByContentEntry();
61+
}
62+
5263
byte[] content = fileContext.readBytes(currentZipInputStream);
5364
if (content == null || content.length == 0) {
5465
return openNextFileAndReadNextEntry();
5566
}
56-
final String zipEntryName = contentZipEntry.getName();
67+
final String zipEntryName = nextZipEntry.getName();
5768

5869
byte[] metadataBytes = readMetadataEntry(zipEntryName);
5970
if (metadataBytes == null || metadataBytes.length == 0) {
@@ -86,6 +97,24 @@ public void close() {
8697
IOUtils.closeQuietly(this.currentZipInputStream);
8798
}
8899

100+
private boolean readMetadataFollowedByContentEntry() throws IOException {
101+
byte[] metadataBytes = fileContext.readBytes(currentZipInputStream);
102+
if (metadataBytes == null || metadataBytes.length == 0) {
103+
return openNextFileAndReadNextEntry();
104+
}
105+
106+
ZipEntry contentZipEntry = FileUtil.findNextFileEntry(currentZipInputStream);
107+
byte[] content = fileContext.readBytes(currentZipInputStream);
108+
109+
DocumentMetadataHandle metadata = new DocumentMetadataHandle();
110+
metadata.fromBuffer(metadataBytes);
111+
this.nextRowToReturn = new DocumentRowBuilder(this.metadataCategories)
112+
.withUri(contentZipEntry.getName())
113+
.withContent(content).withMetadata(metadata)
114+
.buildRow();
115+
return true;
116+
}
117+
89118
private void openNextFile() {
90119
this.currentFilePath = fileContext.decodeFilePath(filePartition.getPaths().get(nextFilePathIndex));
91120
nextFilePathIndex++;

src/main/java/com/marklogic/spark/writer/file/ZipFileWriter.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,16 @@ public void write(InternalRow row) throws IOException {
5858
}
5959
final String uri = row.getString(0);
6060
final String entryName = FileUtil.makePathFromDocumentURI(uri);
61-
zipOutputStream.putNextEntry(new ZipEntry(entryName));
62-
this.contentWriter.writeContent(row, zipOutputStream);
63-
zipEntryCounter++;
61+
6462
if (hasMetadata(row)) {
6563
zipOutputStream.putNextEntry(new ZipEntry(entryName + ".metadata"));
6664
this.contentWriter.writeMetadata(row, zipOutputStream);
6765
zipEntryCounter++;
6866
}
67+
68+
zipOutputStream.putNextEntry(new ZipEntry(entryName));
69+
this.contentWriter.writeContent(row, zipOutputStream);
70+
zipEntryCounter++;
6971
}
7072

7173
@Override

src/test/java/com/marklogic/spark/writer/file/WriteArchiveTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ private void verifyMetadataFiles(Path tempDir, String metadataValue) {
6464

6565
assertEquals(4, rows.size(), "Expecting the 2 author JSON entries and 2 entries for metadata.");
6666

67+
for (int i = 0; i < 4; i += 2) {
68+
String entryName = rows.get(i).getString(0);
69+
assertTrue(entryName.endsWith(".metadata"), "The metadata should come before the content entry. " +
70+
"This allows for the content to later be streamed back into MarkLogic. Entry name: " + entryName);
71+
}
72+
6773
final String expectedUriPrefix = "file://" + tempDir.toFile().getAbsolutePath();
6874
for (Row row : rows) {
6975
String uri = row.getString(0);

0 commit comments

Comments
 (0)