@@ -134,7 +134,24 @@ public class IndexDatabase {
134
134
135
135
private static final Logger LOGGER = LoggerFactory .getLogger (IndexDatabase .class );
136
136
137
- private static final Comparator <File > FILENAME_COMPARATOR = Comparator .comparing (File ::getName );
137
+ @ VisibleForTesting
138
+ static final Comparator <File > FILENAME_COMPARATOR = Comparator .comparing (File ::getName );
139
+
140
+ @ VisibleForTesting
141
+ static final Comparator <Path > FILEPATH_COMPARATOR = (p1 , p2 ) -> {
142
+ int nameCount = Math .min (p1 .getNameCount (), p2 .getNameCount ());
143
+ int i ;
144
+ for (i = 0 ; i < nameCount ; i ++) {
145
+ var c1 = p1 .getName (i ).toString ();
146
+ var c2 = p2 .getName (i ).toString ();
147
+ if (c1 .equals (c2 )) {
148
+ continue ;
149
+ }
150
+ return c1 .compareTo (c2 );
151
+ }
152
+
153
+ return Integer .compare (p1 .getNameCount (), p2 .getNameCount ());
154
+ };
138
155
139
156
private static final Set <String > CHECK_FIELDS ;
140
157
@@ -197,6 +214,22 @@ public IndexDatabase() throws IOException {
197
214
this (null );
198
215
}
199
216
217
+ /**
218
+ * Anyone using this constructor is supposed to never call {@link #update()}.
219
+ * Do not use for anything besides testing.
220
+ * @param uidIter uid iterator
221
+ * @param writer index writer
222
+ * @throws IOException on error
223
+ */
224
+ @ VisibleForTesting
225
+ IndexDatabase (Project project , TermsEnum uidIter , IndexWriter writer ) throws IOException {
226
+ this (project , new IndexDownArgsFactory ());
227
+ this .uidIter = uidIter ;
228
+ this .writer = writer ;
229
+ this .completer = new PendingFileCompleter ();
230
+ initialize ();
231
+ }
232
+
200
233
/**
201
234
* Create a new instance of an Index Database for a given project.
202
235
*
@@ -709,8 +742,7 @@ public void update() throws IOException {
709
742
if (stat == TermsEnum .SeekStatus .END ) {
710
743
uidIter = null ;
711
744
LOGGER .log (Level .WARNING ,
712
- "Couldn''t find a start term for {0}, empty u field?" ,
713
- startUid );
745
+ "Couldn''t find a start term for {0}, empty u field?" , startUid );
714
746
}
715
747
}
716
748
@@ -819,19 +851,25 @@ private void setupDeletedUids() throws IOException {
819
851
Statistics stat = new Statistics ();
820
852
LOGGER .log (Level .FINEST , "traversing the documents in {0} to collect uids of deleted documents" ,
821
853
indexDirectory );
854
+ StoredFields storedFields = reader .storedFields ();
822
855
for (int i = 0 ; i < reader .maxDoc (); i ++) {
856
+ Document doc = storedFields .document (i , LIVE_CHECK_FIELDS ); // use limited-field version
857
+ IndexableField field = doc .getField (QueryBuilder .U );
823
858
if (!liveDocs .get (i )) {
824
- StoredFields storedFields = reader .storedFields ();
825
- Document doc = storedFields .document (i , LIVE_CHECK_FIELDS ); // use limited-field version
826
- IndexableField field = doc .getField (QueryBuilder .U );
827
859
if (field != null ) {
828
860
if (LOGGER .isLoggable (Level .FINEST )) {
829
861
String uidString = field .stringValue ();
830
- LOGGER .log (Level .FINEST , "adding ''{0}'' at {1} to deleted uid set" ,
831
- new Object []{Util .uid2url (uidString ), Util .uid2date (uidString )});
862
+ LOGGER .log (Level .FINEST , "adding ''{0}'' ({2}) at {1} to deleted uid set" ,
863
+ new Object []{Util .uid2url (uidString ), Util .uid2date (uidString ), i });
832
864
}
833
865
deletedUids .add (field .stringValue ());
834
866
}
867
+ } else {
868
+ if (field != null ) {
869
+ String uidString = field .stringValue ();
870
+ LOGGER .log (Level .FINEST , "live doc: ''{0}'' ({2}) at {1}" ,
871
+ new Object []{Util .uid2url (uidString ), Util .uid2date (uidString ), i });
872
+ }
835
873
}
836
874
}
837
875
stat .report (LOGGER , Level .FINEST , String .format ("found %s deleted documents in %s" ,
@@ -931,12 +969,17 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti
931
969
932
970
try (Progress progress = new Progress (LOGGER , String .format ("collecting files for %s" , project ),
933
971
fileCollector .getFiles ().size ())) {
934
- for (String path : fileCollector .getFiles ()) {
972
+ List <Path > paths = fileCollector .getFiles ().stream ().
973
+ map (Path ::of ).
974
+ sorted (FILEPATH_COMPARATOR ).
975
+ collect (Collectors .toList ());
976
+ LOGGER .log (Level .FINEST , "collected sorted files: {0}" , paths );
977
+ for (Path path : paths ) {
935
978
if (isInterrupted ()) {
936
979
return ;
937
980
}
938
- File file = new File (sourceRoot , path );
939
- processFileHistoryBased (args , file , path );
981
+ File file = new File (sourceRoot , path . toString () );
982
+ processFileHistoryBased (args , file , path . toString () );
940
983
progress .increment ();
941
984
}
942
985
}
@@ -1096,16 +1139,17 @@ private void removeAnnotationFile(String path) {
1096
1139
* and queue the removal of the associated xref file.
1097
1140
*
1098
1141
* @param removeHistory if false, do not remove history cache for this file
1142
+ * @return deleted uid (as string)
1099
1143
* @throws java.io.IOException if an error occurs
1100
1144
*/
1101
- private void removeFile (boolean removeHistory ) throws IOException {
1145
+ private String removeFile (boolean removeHistory ) throws IOException {
1102
1146
String path = Util .uid2url (uidIter .term ().utf8ToString ());
1103
1147
1104
1148
for (IndexChangedListener listener : listeners ) {
1105
1149
listener .fileRemove (path );
1106
1150
}
1107
1151
1108
- removeFileDocUid (path );
1152
+ String deletedUid = removeFileDocUid (path );
1109
1153
1110
1154
removeXrefFile (path );
1111
1155
@@ -1122,9 +1166,11 @@ private void removeFile(boolean removeHistory) throws IOException {
1122
1166
for (IndexChangedListener listener : listeners ) {
1123
1167
listener .fileRemoved (path );
1124
1168
}
1169
+
1170
+ return deletedUid ;
1125
1171
}
1126
1172
1127
- private void removeFileDocUid (String path ) throws IOException {
1173
+ private String removeFileDocUid (String path ) throws IOException {
1128
1174
1129
1175
// Determine if a reversal of counts is necessary, and execute if so.
1130
1176
if (isCountingDeltas ) {
@@ -1141,6 +1187,8 @@ private void removeFileDocUid(String path) throws IOException {
1141
1187
}
1142
1188
1143
1189
writer .deleteDocuments (new Term (QueryBuilder .U , uidIter .term ()));
1190
+
1191
+ return uidIter .term ().utf8ToString ();
1144
1192
}
1145
1193
1146
1194
private void decrementLOCforDoc (String path , Document doc ) {
@@ -1648,6 +1696,17 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
1648
1696
}
1649
1697
}
1650
1698
1699
+ /**
1700
+ * wrapper for fatal errors during indexing.
1701
+ */
1702
+ public static class IndexerFault extends RuntimeException {
1703
+ private static final long serialVersionUID = -1 ;
1704
+
1705
+ public IndexerFault (String message ) {
1706
+ super (message );
1707
+ }
1708
+ }
1709
+
1651
1710
/**
1652
1711
* Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments
1653
1712
* represent files that have actually changed in some way, while the other method's argument represent
@@ -1660,12 +1719,14 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
1660
1719
@ VisibleForTesting
1661
1720
void processFileHistoryBased (IndexDownArgs args , File file , String path ) throws IOException {
1662
1721
final boolean fileExists = file .exists ();
1663
-
1722
+ final Set < String > deletedUidsHere = new HashSet <>();
1664
1723
path = Util .fixPathIfWindows (path );
1724
+
1665
1725
// Traverse terms until reaching document beyond path of given file.
1666
- while (uidIter != null && uidIter .term () != null
1667
- && uidIter .term ().compareTo (emptyBR ) != 0
1668
- && Util .uid2url (uidIter .term ().utf8ToString ()).compareTo (path ) <= 0 ) {
1726
+ while (uidIter != null && uidIter .term () != null && uidIter .term ().compareTo (emptyBR ) != 0
1727
+ && FILEPATH_COMPARATOR .compare (
1728
+ Path .of (Util .uid2url (uidIter .term ().utf8ToString ())),
1729
+ Path .of (path )) <= 0 ) {
1669
1730
1670
1731
if (deletedUids .contains (uidIter .term ().utf8ToString ())) {
1671
1732
logIgnoredUid (uidIter .term ().utf8ToString ());
@@ -1688,9 +1749,10 @@ void processFileHistoryBased(IndexDownArgs args, File file, String path) throws
1688
1749
if (!matchOK ) {
1689
1750
removeFile (false );
1690
1751
addWorkHistoryBased (args , termFile , termPath );
1752
+ deletedUidsHere .add (removeFile (false ));
1691
1753
}
1692
1754
} else {
1693
- removeFile (!fileExists );
1755
+ deletedUidsHere . add ( removeFile (!fileExists ) );
1694
1756
}
1695
1757
1696
1758
BytesRef next = uidIter .next ();
@@ -1703,6 +1765,18 @@ void processFileHistoryBased(IndexDownArgs args, File file, String path) throws
1703
1765
// That said, it is necessary to check whether the file can be accepted. This is done in the function below.
1704
1766
// Also, allow for broken symbolic links (File.exists() returns false for these).
1705
1767
if (fileExists || Files .isSymbolicLink (file .toPath ())) {
1768
+ // This assumes that the last modified time is indeed what the indexer uses when adding the document.
1769
+ String time = DateTools .timeToString (file .lastModified (), DateTools .Resolution .MILLISECOND );
1770
+ if (deletedUidsHere .contains (Util .path2uid (path , time ))) {
1771
+ //
1772
+ // Adding document with the same date of a pre-existing document which is being removed
1773
+ // will lead to index corruption (duplicate documents). Hence, make the indexer to fail hard.
1774
+ //
1775
+ throw new IndexerFault (
1776
+ String .format ("attempting to add file '%s' with date matching deleted document: %s" ,
1777
+ path , time ));
1778
+ }
1779
+
1706
1780
addWorkHistoryBased (args , file , path );
1707
1781
}
1708
1782
}
0 commit comments