18
18
*/
19
19
20
20
/*
21
- * Copyright (c) 2018, 2023 , Oracle and/or its affiliates. All rights reserved.
21
+ * Copyright (c) 2018, 2024 , Oracle and/or its affiliates. All rights reserved.
22
22
* Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
23
23
*/
24
24
package org .opengrok .indexer .index ;
38
38
import java .util .Map ;
39
39
import java .util .Objects ;
40
40
import java .util .Set ;
41
+ import java .util .TreeSet ;
41
42
import java .util .concurrent .ConcurrentHashMap ;
42
43
import java .util .concurrent .ExecutionException ;
43
44
import java .util .concurrent .ExecutorService ;
63
64
import org .apache .lucene .util .Bits ;
64
65
import org .apache .lucene .util .Version ;
65
66
import org .jetbrains .annotations .NotNull ;
66
- import org .jetbrains .annotations .Nullable ;
67
67
import org .jetbrains .annotations .VisibleForTesting ;
68
68
import org .opengrok .indexer .analysis .Definitions ;
69
69
import org .opengrok .indexer .configuration .Configuration ;
@@ -232,7 +232,9 @@ public void check(IndexCheckMode mode) throws IOException, IndexCheckException {
232
232
/**
233
233
* Perform specified check on given index directory. All exceptions except {@code IOException} are swallowed
234
234
* and result in return value of 1.
235
+ * @param sourcePath source root path
235
236
* @param indexPath directory with index
237
+ * @param mode index check mode
236
238
* @throws IOException on I/O error
237
239
* @throws IndexCheckException if the index failed given check
238
240
*/
@@ -270,7 +272,7 @@ void checkDir(Path sourcePath, Path indexPath, IndexCheckMode mode)
270
272
checkVersion (sourcePath , indexPath );
271
273
break ;
272
274
case DOCUMENTS :
273
- checkDuplicateDocuments (sourcePath , indexPath );
275
+ checkDocuments (sourcePath , indexPath );
274
276
break ;
275
277
case DEFINITIONS :
276
278
checkDefinitions (sourcePath , indexPath );
@@ -410,14 +412,14 @@ private void checkDefinitions(Path sourcePath, Path indexPath) throws IOExceptio
410
412
errors ++;
411
413
}
412
414
} catch (Exception e ) {
413
- LOGGER .log (Level .WARNING , "failure when checking definitions" , e );
415
+ LOGGER .log (Level .WARNING , String . format ( "failure when checking definitions for '%s'" , indexPath ) , e );
414
416
final Throwable cause = e .getCause ();
415
417
if (cause instanceof IOException ) {
416
418
ioException = (IOException ) cause ;
417
419
}
418
420
}
419
421
}
420
- statistics .report (LOGGER , Level .FINE , String .format ("checked %d files" , paths .size ()));
422
+ statistics .report (LOGGER , Level .FINE , String .format ("checked %d files for '%s' " , paths .size (), indexPath ));
421
423
422
424
// If there were multiple cases of IOException, they were logged above.
423
425
// Propagate the last one so that upper layers can properly decide on how to treat the index check.
@@ -426,17 +428,17 @@ private void checkDefinitions(Path sourcePath, Path indexPath) throws IOExceptio
426
428
}
427
429
428
430
if (errors > 0 ) {
429
- throw new IndexDocumentException (String .format ("document check failed for (%d documents out of %d)" ,
430
- errors , paths .size ()), indexPath );
431
+ throw new IndexDocumentException (String .format ("definitions check failed for '%s' (%d documents out of %d)" ,
432
+ indexPath , errors , paths .size ()), sourcePath );
431
433
}
432
434
}
433
435
434
436
/**
435
- * @param sourcePath path to the source
436
- * @param indexPath path to the index directory
437
- * @throws IOException on I/O error
437
+ * @param sourcePath source path
438
+ * @param indexPath path to the index directory
439
+ * @throws IOException on I/O error
438
440
* @throws IndexVersionException if the version stored in the document does not match the version
439
- * used by the running program
441
+ * used by the running program
440
442
*/
441
443
private void checkVersion (Path sourcePath , Path indexPath ) throws IOException , IndexVersionException {
442
444
LockFactory lockFactory = NativeFSLockFactory .INSTANCE ;
@@ -456,7 +458,7 @@ private void checkVersion(Path sourcePath, Path indexPath) throws IOException, I
456
458
new Object []{indexPath , segVersion , Version .LATEST .major });
457
459
if (segVersion != Version .LATEST .major ) {
458
460
throw new IndexVersionException (
459
- String .format ("Index for '%s' has index version discrepancy" , sourcePath ), sourcePath ,
461
+ String .format ("Index in '%s' has index version discrepancy" , indexPath ), sourcePath ,
460
462
Version .LATEST .major , segVersion );
461
463
}
462
464
}
@@ -506,72 +508,88 @@ static Set<String> getDeletedUids(Path indexPath) throws IOException {
506
508
* or {@code null} if live documents cannot be retrieved.
507
509
* @throws IOException on I/O error
508
510
*/
509
- @ Nullable
510
511
@ VisibleForTesting
511
- static List <String > getLiveDocumentPaths (Path indexPath ) throws IOException {
512
+ static List <Path > getLiveDocumentPaths (Path indexPath ) throws IOException {
512
513
try (IndexReader indexReader = getIndexReader (indexPath )) {
513
- List <String > livePaths = new ArrayList <>();
514
+ List <Path > livePaths = new ArrayList <>();
514
515
515
516
Bits liveDocs = MultiBits .getLiveDocs (indexReader );
516
- if (liveDocs == null ) { // the index has no deletions
517
- return null ;
518
- }
519
517
520
518
for (int i = 0 ; i < indexReader .maxDoc (); i ++) {
521
519
Document doc = indexReader .storedFields ().document (i );
522
520
523
- if (!liveDocs .get (i )) {
521
+ // liveDocs is null if the index has no deletions.
522
+ if (liveDocs != null && !liveDocs .get (i )) {
524
523
continue ;
525
524
}
526
525
527
526
// This should avoid the special LOC documents.
528
527
IndexableField field = doc .getField (QueryBuilder .U );
529
528
if (field != null ) {
530
529
String uid = field .stringValue ();
531
- livePaths .add (Util .uid2url (uid ));
530
+ livePaths .add (Path . of ( Util .uid2url (uid ) ));
532
531
}
533
532
}
534
533
535
534
return livePaths ;
536
535
}
537
536
}
538
537
539
- private static void checkDuplicateDocuments (Path sourcePath , Path indexPath ) throws IOException , IndexDocumentException {
538
+ /**
539
+ * Check live (not deleted) documents in the index whether they have the following properties.
540
+ * <ul>
541
+ * <li>they have corresponding file under source root</li>
542
+ * <li>there is exactly one document with the same path</li>
543
+ * </ul>
544
+ * @param sourcePath source root path
545
+ * @param indexPath index path
546
+ * @throws IOException on I/O error
547
+ * @throws IndexDocumentException if the index failed the check
548
+ */
549
+ private void checkDocuments (Path sourcePath , Path indexPath ) throws IOException , IndexDocumentException {
540
550
541
- LOGGER .log (Level .FINE , "Checking duplicate documents in ''{0}''" , indexPath );
542
551
Statistics stat = new Statistics ();
543
- List <String > livePaths = getLiveDocumentPaths (indexPath );
544
- if (livePaths == null ) {
545
- throw new IndexDocumentException (String .format ("cannot determine live paths for '%s'" , indexPath ),
546
- indexPath );
552
+ List <Path > livePaths = getLiveDocumentPaths (indexPath );
553
+
554
+ LOGGER .log (Level .FINE , "checking documents in ''{0}}'' have corresponding file under source root ''{1}''" ,
555
+ new Object []{indexPath , sourcePath });
556
+ Set <Path > missingPaths = new TreeSet <>();
557
+ for (Path relativePath : livePaths ) {
558
+ Path absolutePath = Path .of (configuration .getSourceRoot (), relativePath .toString ());
559
+ if (!Files .exists (absolutePath )) {
560
+ LOGGER .log (Level .FINER , "path ''{0}'' does not exist" , absolutePath );
561
+ missingPaths .add (absolutePath );
562
+ }
547
563
}
548
- HashSet <String > pathSet = new HashSet <>(livePaths );
549
- Map <String , Integer > fileMap = new ConcurrentHashMap <>();
564
+
565
+ LOGGER .log (Level .FINE , "Checking duplicate documents in ''{0}''" , indexPath );
566
+ HashSet <Path > pathSet = new HashSet <>(livePaths );
567
+ Map <Path , Integer > duplicatePathMap = new ConcurrentHashMap <>();
550
568
if (pathSet .size () != livePaths .size ()) {
551
569
LOGGER .log (Level .FINE ,
552
570
"index in ''{0}'' has document path set ({1}) vs document list ({2}) discrepancy" ,
553
571
new Object []{indexPath , pathSet .size (), livePaths .size ()});
554
- for (String path : livePaths ) {
572
+ for (Path path : livePaths ) {
555
573
if (pathSet .contains (path )) {
556
- fileMap .putIfAbsent (path , 0 );
557
- fileMap .put (path , fileMap .get (path ) + 1 );
574
+ duplicatePathMap .putIfAbsent (path , 0 );
575
+ duplicatePathMap .put (path , duplicatePathMap .get (path ) + 1 );
558
576
}
559
577
}
560
578
}
561
579
562
580
// Traverse the file map and leave only duplicate entries.
563
- for (String path : fileMap .keySet ()) {
564
- if (fileMap .get (path ) > 1 ) {
581
+ for (Path path : duplicatePathMap .keySet ()) {
582
+ if (duplicatePathMap .get (path ) > 1 ) {
565
583
LOGGER .log (Level .FINER , "duplicate path: ''{0}''" , path );
566
584
} else {
567
- fileMap .remove (path );
585
+ duplicatePathMap .remove (path );
568
586
}
569
587
}
570
588
571
- stat .report (LOGGER , Level .FINE , String .format ("duplicate check in '%s' done" , indexPath ));
572
- if (!fileMap .isEmpty ()) {
573
- throw new IndexDocumentException (String .format ("index for '%s' contains duplicate live documents " ,
574
- sourcePath ), sourcePath , fileMap );
589
+ stat .report (LOGGER , Level .FINE , String .format ("document check in '%s' done" , indexPath ));
590
+ if (!duplicatePathMap . isEmpty () || ! missingPaths .isEmpty ()) {
591
+ throw new IndexDocumentException (String .format ("index '%s' failed document check " ,
592
+ indexPath ), sourcePath , duplicatePathMap , missingPaths );
575
593
}
576
594
}
577
595
}
0 commit comments