@@ -29,6 +29,9 @@ class ArchiveFileReader implements PartitionReader<InternalRow> {
29
29
private int nextFilePathIndex ;
30
30
private InternalRow nextRowToReturn ;
31
31
32
+ // Legacy = content first, then metadata.
33
+ private Boolean isLegacyFormat ;
34
+
32
35
ArchiveFileReader (FilePartition filePartition , FileContext fileContext ) {
33
36
this .filePartition = filePartition ;
34
37
this .fileContext = fileContext ;
@@ -45,15 +48,23 @@ class ArchiveFileReader implements PartitionReader<InternalRow> {
45
48
@ Override
46
49
public boolean next () {
47
50
try {
48
- ZipEntry contentZipEntry = FileUtil .findNextFileEntry (currentZipInputStream );
49
- if (contentZipEntry == null ) {
51
+ ZipEntry nextZipEntry = FileUtil .findNextFileEntry (currentZipInputStream );
52
+ if (nextZipEntry == null ) {
50
53
return openNextFileAndReadNextEntry ();
51
54
}
55
+
56
+ if (isLegacyFormat == null ) {
57
+ isLegacyFormat = !nextZipEntry .getName ().endsWith (".metadata" );
58
+ }
59
+ if (!isLegacyFormat ) {
60
+ return readMetadataFollowedByContentEntry ();
61
+ }
62
+
52
63
byte [] content = fileContext .readBytes (currentZipInputStream );
53
64
if (content == null || content .length == 0 ) {
54
65
return openNextFileAndReadNextEntry ();
55
66
}
56
- final String zipEntryName = contentZipEntry .getName ();
67
+ final String zipEntryName = nextZipEntry .getName ();
57
68
58
69
byte [] metadataBytes = readMetadataEntry (zipEntryName );
59
70
if (metadataBytes == null || metadataBytes .length == 0 ) {
@@ -86,6 +97,24 @@ public void close() {
86
97
IOUtils .closeQuietly (this .currentZipInputStream );
87
98
}
88
99
100
+ private boolean readMetadataFollowedByContentEntry () throws IOException {
101
+ byte [] metadataBytes = fileContext .readBytes (currentZipInputStream );
102
+ if (metadataBytes == null || metadataBytes .length == 0 ) {
103
+ return openNextFileAndReadNextEntry ();
104
+ }
105
+
106
+ ZipEntry contentZipEntry = FileUtil .findNextFileEntry (currentZipInputStream );
107
+ byte [] content = fileContext .readBytes (currentZipInputStream );
108
+
109
+ DocumentMetadataHandle metadata = new DocumentMetadataHandle ();
110
+ metadata .fromBuffer (metadataBytes );
111
+ this .nextRowToReturn = new DocumentRowBuilder (this .metadataCategories )
112
+ .withUri (contentZipEntry .getName ())
113
+ .withContent (content ).withMetadata (metadata )
114
+ .buildRow ();
115
+ return true ;
116
+ }
117
+
89
118
private void openNextFile () {
90
119
this .currentFilePath = fileContext .decodeFilePath (filePartition .getPaths ().get (nextFilePathIndex ));
91
120
nextFilePathIndex ++;
0 commit comments