Skip to content

Commit b753f02

Browse files
committed
check missing source files for documents
1 parent 8880f41 commit b753f02

File tree

6 files changed

+145
-59
lines changed

6 files changed

+145
-59
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexCheck.java

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
2323
*/
2424
package org.opengrok.indexer.index;
@@ -38,6 +38,7 @@
3838
import java.util.Map;
3939
import java.util.Objects;
4040
import java.util.Set;
41+
import java.util.TreeSet;
4142
import java.util.concurrent.ConcurrentHashMap;
4243
import java.util.concurrent.ExecutionException;
4344
import java.util.concurrent.ExecutorService;
@@ -63,7 +64,6 @@
6364
import org.apache.lucene.util.Bits;
6465
import org.apache.lucene.util.Version;
6566
import org.jetbrains.annotations.NotNull;
66-
import org.jetbrains.annotations.Nullable;
6767
import org.jetbrains.annotations.VisibleForTesting;
6868
import org.opengrok.indexer.analysis.Definitions;
6969
import org.opengrok.indexer.configuration.Configuration;
@@ -232,7 +232,9 @@ public void check(IndexCheckMode mode) throws IOException, IndexCheckException {
232232
/**
233233
* Perform specified check on given index directory. All exceptions except {@code IOException} are swallowed
234234
* and result in return value of 1.
235+
* @param sourcePath source root path
235236
* @param indexPath directory with index
237+
* @param mode index check mode
236238
* @throws IOException on I/O error
237239
* @throws IndexCheckException if the index failed given check
238240
*/
@@ -270,7 +272,7 @@ void checkDir(Path sourcePath, Path indexPath, IndexCheckMode mode)
270272
checkVersion(sourcePath, indexPath);
271273
break;
272274
case DOCUMENTS:
273-
checkDuplicateDocuments(sourcePath, indexPath);
275+
checkDocuments(sourcePath, indexPath);
274276
break;
275277
case DEFINITIONS:
276278
checkDefinitions(sourcePath, indexPath);
@@ -410,14 +412,14 @@ private void checkDefinitions(Path sourcePath, Path indexPath) throws IOExceptio
410412
errors++;
411413
}
412414
} catch (Exception e) {
413-
LOGGER.log(Level.WARNING, "failure when checking definitions", e);
415+
LOGGER.log(Level.WARNING, String.format("failure when checking definitions for '%s'", indexPath), e);
414416
final Throwable cause = e.getCause();
415417
if (cause instanceof IOException) {
416418
ioException = (IOException) cause;
417419
}
418420
}
419421
}
420-
statistics.report(LOGGER, Level.FINE, String.format("checked %d files", paths.size()));
422+
statistics.report(LOGGER, Level.FINE, String.format("checked %d files for '%s'", paths.size(), indexPath));
421423

422424
// If there were multiple cases of IOException, they were logged above.
423425
// Propagate the last one so that upper layers can properly decide on how to treat the index check.
@@ -426,17 +428,17 @@ private void checkDefinitions(Path sourcePath, Path indexPath) throws IOExceptio
426428
}
427429

428430
if (errors > 0) {
429-
throw new IndexDocumentException(String.format("document check failed for (%d documents out of %d)",
430-
errors, paths.size()), indexPath);
431+
throw new IndexDocumentException(String.format("definitions check failed for '%s' (%d documents out of %d)",
432+
indexPath, errors, paths.size()), sourcePath);
431433
}
432434
}
433435

434436
/**
435-
* @param sourcePath path to the source
436-
* @param indexPath path to the index directory
437-
* @throws IOException on I/O error
437+
* @param sourcePath source path
438+
* @param indexPath path to the index directory
439+
* @throws IOException on I/O error
438440
* @throws IndexVersionException if the version stored in the document does not match the version
439-
* used by the running program
441+
* used by the running program
440442
*/
441443
private void checkVersion(Path sourcePath, Path indexPath) throws IOException, IndexVersionException {
442444
LockFactory lockFactory = NativeFSLockFactory.INSTANCE;
@@ -456,7 +458,7 @@ private void checkVersion(Path sourcePath, Path indexPath) throws IOException, I
456458
new Object[]{indexPath, segVersion, Version.LATEST.major});
457459
if (segVersion != Version.LATEST.major) {
458460
throw new IndexVersionException(
459-
String.format("Index for '%s' has index version discrepancy", sourcePath), sourcePath,
461+
String.format("Index in '%s' has index version discrepancy", indexPath), sourcePath,
460462
Version.LATEST.major, segVersion);
461463
}
462464
}
@@ -506,72 +508,88 @@ static Set<String> getDeletedUids(Path indexPath) throws IOException {
506508
* or {@code null} if live documents cannot be retrieved.
507509
* @throws IOException on I/O error
508510
*/
509-
@Nullable
510511
@VisibleForTesting
511-
static List<String> getLiveDocumentPaths(Path indexPath) throws IOException {
512+
static List<Path> getLiveDocumentPaths(Path indexPath) throws IOException {
512513
try (IndexReader indexReader = getIndexReader(indexPath)) {
513-
List<String> livePaths = new ArrayList<>();
514+
List<Path> livePaths = new ArrayList<>();
514515

515516
Bits liveDocs = MultiBits.getLiveDocs(indexReader);
516-
if (liveDocs == null) { // the index has no deletions
517-
return null;
518-
}
519517

520518
for (int i = 0; i < indexReader.maxDoc(); i++) {
521519
Document doc = indexReader.storedFields().document(i);
522520

523-
if (!liveDocs.get(i)) {
521+
// liveDocs is null if the index has no deletions.
522+
if (liveDocs != null && !liveDocs.get(i)) {
524523
continue;
525524
}
526525

527526
// This should avoid the special LOC documents.
528527
IndexableField field = doc.getField(QueryBuilder.U);
529528
if (field != null) {
530529
String uid = field.stringValue();
531-
livePaths.add(Util.uid2url(uid));
530+
livePaths.add(Path.of(Util.uid2url(uid)));
532531
}
533532
}
534533

535534
return livePaths;
536535
}
537536
}
538537

539-
private static void checkDuplicateDocuments(Path sourcePath, Path indexPath) throws IOException, IndexDocumentException {
538+
/**
539+
* Check live (not deleted) documents in the index whether they have the following properties.
540+
* <ul>
541+
* <li>they have corresponding file under source root</li>
542+
* <li>there is exactly one document with the same path</li>
543+
* </ul>
544+
* @param sourcePath source root path
545+
* @param indexPath index path
546+
* @throws IOException on I/O error
547+
* @throws IndexDocumentException if the index failed the check
548+
*/
549+
private void checkDocuments(Path sourcePath, Path indexPath) throws IOException, IndexDocumentException {
540550

541-
LOGGER.log(Level.FINE, "Checking duplicate documents in ''{0}''", indexPath);
542551
Statistics stat = new Statistics();
543-
List<String> livePaths = getLiveDocumentPaths(indexPath);
544-
if (livePaths == null) {
545-
throw new IndexDocumentException(String.format("cannot determine live paths for '%s'", indexPath),
546-
indexPath);
552+
List<Path> livePaths = getLiveDocumentPaths(indexPath);
553+
554+
LOGGER.log(Level.FINE, "checking documents in ''{0}}'' have corresponding file under source root ''{1}''",
555+
new Object[]{indexPath, sourcePath});
556+
Set<Path> missingPaths = new TreeSet<>();
557+
for (Path relativePath : livePaths) {
558+
Path absolutePath = Path.of(configuration.getSourceRoot(), relativePath.toString());
559+
if (!Files.exists(absolutePath)) {
560+
LOGGER.log(Level.FINER, "path ''{0}'' does not exist", absolutePath);
561+
missingPaths.add(absolutePath);
562+
}
547563
}
548-
HashSet<String> pathSet = new HashSet<>(livePaths);
549-
Map<String, Integer> fileMap = new ConcurrentHashMap<>();
564+
565+
LOGGER.log(Level.FINE, "Checking duplicate documents in ''{0}''", indexPath);
566+
HashSet<Path> pathSet = new HashSet<>(livePaths);
567+
Map<Path, Integer> duplicatePathMap = new ConcurrentHashMap<>();
550568
if (pathSet.size() != livePaths.size()) {
551569
LOGGER.log(Level.FINE,
552570
"index in ''{0}'' has document path set ({1}) vs document list ({2}) discrepancy",
553571
new Object[]{indexPath, pathSet.size(), livePaths.size()});
554-
for (String path : livePaths) {
572+
for (Path path : livePaths) {
555573
if (pathSet.contains(path)) {
556-
fileMap.putIfAbsent(path, 0);
557-
fileMap.put(path, fileMap.get(path) + 1);
574+
duplicatePathMap.putIfAbsent(path, 0);
575+
duplicatePathMap.put(path, duplicatePathMap.get(path) + 1);
558576
}
559577
}
560578
}
561579

562580
// Traverse the file map and leave only duplicate entries.
563-
for (String path: fileMap.keySet()) {
564-
if (fileMap.get(path) > 1) {
581+
for (Path path: duplicatePathMap.keySet()) {
582+
if (duplicatePathMap.get(path) > 1) {
565583
LOGGER.log(Level.FINER, "duplicate path: ''{0}''", path);
566584
} else {
567-
fileMap.remove(path);
585+
duplicatePathMap.remove(path);
568586
}
569587
}
570588

571-
stat.report(LOGGER, Level.FINE, String.format("duplicate check in '%s' done", indexPath));
572-
if (!fileMap.isEmpty()) {
573-
throw new IndexDocumentException(String.format("index for '%s' contains duplicate live documents",
574-
sourcePath), sourcePath, fileMap);
589+
stat.report(LOGGER, Level.FINE, String.format("document check in '%s' done", indexPath));
590+
if (!duplicatePathMap.isEmpty() || !missingPaths.isEmpty()) {
591+
throw new IndexDocumentException(String.format("index '%s' failed document check",
592+
indexPath), sourcePath, duplicatePathMap, missingPaths);
575593
}
576594
}
577595
}

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexCheckException.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
2222
*/
2323
package org.opengrok.indexer.index;
2424

@@ -35,16 +35,19 @@ public class IndexCheckException extends Exception {
3535

3636
private final Set<Path> failedPaths = new HashSet<>();
3737

38-
public IndexCheckException(String s, Path path) {
39-
super(s);
38+
public IndexCheckException(String message, Path path) {
39+
super(message);
4040
failedPaths.add(path);
4141
}
4242

43-
public IndexCheckException(String s, Set<Path> paths) {
44-
super(s);
43+
public IndexCheckException(String message, Set<Path> paths) {
44+
super(message);
4545
failedPaths.addAll(paths);
4646
}
4747

48+
/**
49+
* @return set of source paths which failed the check
50+
*/
4851
public Set<Path> getFailedPaths() {
4952
return Collections.unmodifiableSet(failedPaths);
5053
}

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDocumentException.java

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,48 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
2222
*/
2323
package org.opengrok.indexer.index;
2424

2525
import java.nio.file.Path;
2626
import java.util.Map;
27+
import java.util.Set;
2728

2829
/**
29-
* Exception thrown when index contains duplicate live documents.
30+
* Exception thrown when index document check fails.
3031
*/
3132
public class IndexDocumentException extends IndexCheckException {
32-
private static final long serialVersionUID = 5693446916108385595L;
33+
private static final long serialVersionUID = -277429315137557112L;
3334

34-
private final Map<String, Integer> fileMap;
35+
private final Map<Path, Integer> duplicatePathMap;
36+
private final Set<Path> missingPaths;
3537

3638
public IndexDocumentException(String s, Path path) {
3739
super(s, path);
38-
this.fileMap = null;
40+
this.duplicatePathMap = null;
41+
this.missingPaths = null;
3942
}
4043

41-
public IndexDocumentException(String s, Path path, Map<String, Integer> fileMap) {
42-
super(s, path);
43-
this.fileMap = fileMap;
44+
public IndexDocumentException(String message, Path indexPath, Map<Path, Integer> duplicateFileMap, Set<Path> missingPaths) {
45+
super(message, indexPath);
46+
this.duplicatePathMap = duplicateFileMap;
47+
this.missingPaths = missingPaths;
4448
}
4549

4650
@Override
4751
public String toString() {
48-
return getMessage() + ": " + (fileMap == null ? "" : fileMap);
52+
StringBuilder stringBuilder = new StringBuilder();
53+
stringBuilder.append(getMessage());
54+
stringBuilder.append(":");
55+
if (!duplicatePathMap.isEmpty()) {
56+
stringBuilder.append(" duplicate paths = ");
57+
stringBuilder.append(duplicatePathMap);
58+
}
59+
if (!missingPaths.isEmpty()) {
60+
stringBuilder.append(" missing paths = ");
61+
stringBuilder.append(missingPaths);
62+
}
63+
return stringBuilder.toString();
4964
}
5065
}

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexVersionException.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ public class IndexVersionException extends IndexCheckException {
3434
private final int luceneIndexVersion;
3535
private final int indexVersion;
3636

37-
public IndexVersionException(String s, Path path, int luceneIndexVersion, int indexVersion) {
38-
super(s, path);
37+
public IndexVersionException(String message, Path path, int luceneIndexVersion, int indexVersion) {
38+
super(message, path);
3939
this.indexVersion = indexVersion;
4040
this.luceneIndexVersion = luceneIndexVersion;
4141
}

opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexCheckTest.java

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2019, Chris Fraire <cfraire@me.com>.
2323
*/
2424
package org.opengrok.indexer.index;
@@ -107,6 +107,12 @@ private void testIndex(boolean projectsEnabled, List<String> subFiles, IndexChec
107107
null, null);
108108
Indexer.getInstance().doIndexerExecution(null, null);
109109

110+
// The configuration needs to be populated with projects discovered in prepareIndexer()
111+
// for the project based index check to work as it uses configuration rather than RuntimeEnvironment.
112+
if (projectsEnabled) {
113+
configuration.setProjects(env.getProjects());
114+
}
115+
110116
try (IndexCheck indexCheck = new IndexCheck(configuration, subFiles)) {
111117
assertDoesNotThrow(() -> indexCheck.check(mode));
112118
}
@@ -249,6 +255,10 @@ void testMultipleTestsWithSameInstance() throws Exception {
249255
null, null);
250256
Indexer.getInstance().doIndexerExecution(null, null);
251257

258+
// The configuration needs to be populated with projects discovered in prepareIndexer()
259+
// for the project based index check to work as it uses configuration rather than RuntimeEnvironment.
260+
configuration.setProjects(env.getProjects());
261+
252262
try (IndexCheck indexCheck = new IndexCheck(configuration)) {
253263
for (int i = 0; i < 3; i++) {
254264
assertDoesNotThrow(() -> indexCheck.check(IndexCheck.IndexCheckMode.VERSION));
@@ -259,8 +269,46 @@ void testMultipleTestsWithSameInstance() throws Exception {
259269
@Test
260270
void testNullConfiguration() throws Exception {
261271
assertThrows(NullPointerException.class, () -> {
262-
new IndexCheck(null);
263-
}
272+
new IndexCheck(null);
273+
}
264274
);
265275
}
276+
277+
@ParameterizedTest
278+
@ValueSource(booleans = {true, false})
279+
void testMissingSourceDocumentCheck(boolean projectsEnabled) throws Exception {
280+
env.setProjectsEnabled(projectsEnabled);
281+
configuration.setProjectsEnabled(projectsEnabled);
282+
Indexer.getInstance().prepareIndexer(env, true, projectsEnabled,
283+
null, null);
284+
Indexer.getInstance().doIndexerExecution(null, null);
285+
286+
final String sourceRoot = env.getSourceRootPath();
287+
Path originPath = Path.of(sourceRoot, "git", "main.c");
288+
assertTrue(originPath.toFile().isFile());
289+
Path tempPath = Path.of(sourceRoot, "git", "main.c.tmp");
290+
Files.move(originPath, tempPath);
291+
292+
// The configuration needs to be populated with projects discovered in prepareIndexer()
293+
// for the project based index check to work as it uses configuration rather than RuntimeEnvironment.
294+
if (projectsEnabled) {
295+
configuration.setProjects(env.getProjects());
296+
}
297+
298+
try (IndexCheck indexCheck = new IndexCheck(configuration)) {
299+
IndexCheckException exception = assertThrows(IndexCheckException.class,
300+
() -> indexCheck.check(IndexCheck.IndexCheckMode.DOCUMENTS));
301+
assertEquals(1, exception.getFailedPaths().size());
302+
Path expectedPath;
303+
if (projectsEnabled) {
304+
expectedPath = Path.of(sourceRoot, "git");
305+
} else {
306+
expectedPath = Path.of(sourceRoot);
307+
}
308+
assertTrue(exception.getFailedPaths().contains(expectedPath));
309+
}
310+
311+
// cleanup
312+
Files.move(tempPath, originPath);
313+
}
266314
}

0 commit comments

Comments
 (0)