Skip to content

Commit 53dd75c

Browse files
kopporcalixtus
andauthored
Fix indexing (#13498)
* Add support for short output for BibEntry * Add trace logging for entry addition * Fix maxlength handling * Refine coment * Use LatexToUnicodeAdapter * Fix exception on empty field when indexing * Use @nonnull annotation * Add PR number * Apply suggestion from @calixtus Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com> * Fix checkstyle * Remove CHANGELOG entry --------- Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com>
1 parent 78229eb commit 53dd75c

File tree

6 files changed

+45
-34
lines changed

6 files changed

+45
-34
lines changed

jablib/src/main/java/org/jabref/logic/search/indexing/BibFieldsIndexer.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import java.util.regex.Pattern;
1212

1313
import org.jabref.logic.l10n.Localization;
14-
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
1514
import org.jabref.logic.util.BackgroundTask;
1615
import org.jabref.logic.util.HeadlessExecutorService;
1716
import org.jabref.model.database.BibDatabaseContext;
@@ -23,6 +22,7 @@
2322
import org.jabref.model.entry.field.FieldProperty;
2423
import org.jabref.model.entry.field.StandardField;
2524
import org.jabref.model.search.PostgreConstants;
25+
import org.jabref.model.strings.LatexToUnicodeAdapter;
2626

2727
import io.github.thibaultmeyer.cuid.CUID;
2828
import org.slf4j.Logger;
@@ -37,7 +37,7 @@
3737

3838
public class BibFieldsIndexer {
3939
private static final Logger LOGGER = LoggerFactory.getLogger(BibFieldsIndexer.class);
40-
private static final LatexToUnicodeFormatter LATEX_TO_UNICODE_FORMATTER = new LatexToUnicodeFormatter();
40+
4141
private static final Pattern GROUPS_SEPARATOR_REGEX = Pattern.compile("\s*,\s*");
4242
private static final Set<Field> DATE_FIELDS = Set.of(StandardField.DATE, StandardField.YEAR, StandardField.MONTH, StandardField.DAY);
4343

@@ -209,20 +209,21 @@ private void addToIndex(BibEntry bibEntry) {
209209
try (PreparedStatement preparedStatement = connection.prepareStatement(insertFieldQuery);
210210
PreparedStatement preparedStatementSplitValues = connection.prepareStatement(insertIntoSplitTable)) {
211211
String entryId = bibEntry.getId();
212+
LOGGER.atTrace().setMessage("Adding entry {}").addArgument(() -> bibEntry.getKeyAuthorTitleYear()).log();
212213
for (Map.Entry<Field, String> fieldPair : bibEntry.getFieldMap().entrySet()) {
213214
Field field = fieldPair.getKey();
214215
String value = fieldPair.getValue();
215216

216-
// If a field exists, there also exists a resolved field latex free.
217-
// We add a `.orElse("")` only because there could be some flaw in the future in the code - and we want to have search working even if the flaws are present.
218-
// To uncover these flaws, we add the "assert" statement.
219-
// One potential future flaw is that the bibEntry is modified concurrently and the field being deleted.
220217
// Skip indexing of date-related fields separately to ensure proper handling later in the process.
221218
if (!DATE_FIELDS.contains(field)) {
222-
Optional<String> resolvedFieldLatexFree = bibEntry.getResolvedFieldOrAliasLatexFree(field, this.databaseContext.getDatabase());
223-
assert resolvedFieldLatexFree.isPresent();
224-
addBatch(preparedStatement, entryId, field, value, resolvedFieldLatexFree.orElse(""));
219+
// If a field exists, there also exists a resolved field latex free.
220+
// Only exception: If the content of the field is empty, then the resolved field is also empty. Example: `series = {{}}`.
221+
String resolvedFieldLatexFree = bibEntry.getResolvedFieldOrAliasLatexFree(field, this.databaseContext.getDatabase()).orElse("");
222+
223+
// One potential future flaw is that the bibEntry is modified concurrently and the field being deleted.
224+
addBatch(preparedStatement, entryId, field, value, resolvedFieldLatexFree);
225225
}
226+
226227
// region Handling of known multi-value fields
227228
// split and convert to Unicode
228229
if (field.getProperties().contains(FieldProperty.PERSON_NAMES)) {
@@ -457,7 +458,7 @@ private static void addAuthors(String value, PreparedStatement preparedStatement
457458
}
458459

459460
private static void addBatch(PreparedStatement preparedStatement, String entryId, Field field, String value) {
460-
addBatch(preparedStatement, entryId, field, value, LATEX_TO_UNICODE_FORMATTER.format(value));
461+
addBatch(preparedStatement, entryId, field, value, LatexToUnicodeAdapter.format(value));
461462
}
462463

463464
/**

jablib/src/main/java/org/jabref/model/database/BibDatabase.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
import com.google.common.eventbus.EventBus;
4343
import com.google.common.eventbus.Subscribe;
44+
import org.jspecify.annotations.NonNull;
4445
import org.jspecify.annotations.Nullable;
4546
import org.slf4j.Logger;
4647
import org.slf4j.LoggerFactory;
@@ -419,8 +420,7 @@ public synchronized boolean hasStringByName(String label) {
419420
* Resolves any references to strings contained in this field content,
420421
* if possible.
421422
*/
422-
public String resolveForStrings(String content) {
423-
Objects.requireNonNull(content, "Content for resolveForStrings must not be null.");
423+
public String resolveForStrings(@NonNull String content) {
424424
return resolveContent(content, new HashSet<>(), new HashSet<>());
425425
}
426426

jablib/src/main/java/org/jabref/model/entry/BibEntry.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ public Object clone() {
717717
/// as is. In case the JabRef "hack" for distinguishing "field = value" and "field = {value}" (in .bib files) is
718718
/// used, it is output as "field = {#value#}", which may cause headaches in debugging.
719719
///
720-
/// Alternative for some more readable output: [#getAuthorTitleYear(int)]
720+
/// Alternative for some more readable output: [#getAuthorTitleYear(int)] or [#getKeyAuthorTitleYear(int)].
721721
///
722722
/// @return A user-readable string NOT A VALID BibTeX string
723723
@Override
@@ -729,6 +729,10 @@ public String getAuthorTitleYear() {
729729
return getAuthorTitleYear(0);
730730
}
731731

732+
public String getKeyAuthorTitleYear() {
733+
return getKeyAuthorTitleYear(0);
734+
}
735+
732736
/**
733737
* Creates a short textual description of the entry in the format: <code>Author1, Author2: Title (Year)</code>
734738
*
@@ -754,12 +758,14 @@ public String getAuthorTitleYear(int maxCharacters) {
754758
.append("\" (")
755759
.append(yearField)
756760
.append(')');
757-
String text = textBuilder.toString();
758761

759-
if ((maxCharacters <= 0) || (text.length() <= maxCharacters)) {
760-
return text;
761-
}
762-
return text.substring(0, maxCharacters + 1) + "...";
762+
return StringUtil.limitStringLength(textBuilder.toString(), maxCharacters);
763+
}
764+
765+
public String getKeyAuthorTitleYear(int maxCharacters) {
766+
String citationKey = getCitationKey().orElse("N/A");
767+
String result = citationKey + " - " + getAuthorTitleYear(0);
768+
return StringUtil.limitStringLength(result, maxCharacters);
763769
}
764770

765771
/**

jablib/src/main/java/org/jabref/model/strings/LatexToUnicodeAdapter.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package org.jabref.model.strings;
22

33
import java.text.Normalizer;
4-
import java.util.Objects;
54
import java.util.Optional;
65
import java.util.regex.Pattern;
76

87
import com.github.tomtung.latex2unicode.LaTeX2Unicode;
98
import fastparse.Parsed;
9+
import org.jspecify.annotations.NonNull;
1010

1111
/**
1212
* Adapter class for the latex2unicode lib. This is an alternative to our LatexToUnicode class
@@ -25,8 +25,7 @@ public class LatexToUnicodeAdapter {
2525
* @param inField a String containing LaTeX
2626
* @return a String with LaTeX resolved into Unicode, or the original String if the LaTeX could not be parsed
2727
*/
28-
public static String format(String inField) {
29-
Objects.requireNonNull(inField);
28+
public static String format(@NonNull String inField) {
3029
return parse(inField).orElse(Normalizer.normalize(inField, Normalizer.Form.NFC));
3130
}
3231

@@ -36,8 +35,7 @@ public static String format(String inField) {
3635
* @param inField a String containing LaTeX
3736
* @return an {@code Optional<String>} with LaTeX resolved into Unicode or {@code empty} on failure.
3837
*/
39-
public static Optional<String> parse(String inField) {
40-
Objects.requireNonNull(inField);
38+
public static Optional<String> parse(@NonNull String inField) {
4139
String toFormat = UNDERSCORE_MATCHER.matcher(inField).replaceAll(REPLACEMENT_CHAR);
4240
Parsed<String> parsingResult = LaTeX2Unicode.parse(toFormat);
4341
if (parsingResult instanceof Parsed.Success) {

jablib/src/main/java/org/jabref/model/strings/StringUtil.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,16 +569,21 @@ public static List<String> tokenizeToList(String buf, String delimstr) {
569569
return list;
570570
}
571571

572+
/// Limits the length of a string to a maximum length.
573+
///
574+
/// Note the implementation is different from [StringUtils.substring](https://commons.apache.org/proper/commons-lang/javadocs/api-2.6/org/apache/commons/lang/StringUtils.html#substring%28java.lang.String,%20int,%20int%29), because it accepts parameters smaller than 4.
575+
///
576+
/// @param maxLength the maximum length of the string - <= 0 means no limit
572577
public static String limitStringLength(String s, int maxLength) {
573578
if (s == null) {
574579
return "";
575580
}
576581

577-
if (s.length() <= maxLength) {
582+
if (maxLength <= 0 || s.length() <= maxLength) {
578583
return s;
579584
}
580585

581-
return s.substring(0, maxLength - 3) + "...";
586+
return s.substring(0, Math.max(0, maxLength - 3)) + "...";
582587
}
583588

584589
/**

jablib/src/test/java/org/jabref/model/strings/StringUtilTest.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.junit.jupiter.api.Test;
1616
import org.junit.jupiter.params.ParameterizedTest;
1717
import org.junit.jupiter.params.provider.Arguments;
18+
import org.junit.jupiter.params.provider.CsvSource;
1819
import org.junit.jupiter.params.provider.MethodSource;
1920

2021
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
@@ -294,22 +295,22 @@ void intValueOfWithNullExceptionfIfStringEmpty() {
294295
assertEquals(Optional.empty(), StringUtil.intValueOfOptional(""));
295296
}
296297

297-
@Test
298-
void limitStringLengthShort() {
299-
assertEquals("Test", StringUtil.limitStringLength("Test", 20));
298+
@ParameterizedTest
299+
@CsvSource({
300+
"'Test', 'Test', 20",
301+
"'...', 'Test', 3",
302+
"'TestTes...', 'TestTestTestTestTest', 10",
303+
"'', , 10"
304+
})
305+
void limitStringLength(String expected, String input, int maxLength) {
306+
assertEquals(expected, StringUtil.limitStringLength(input, maxLength));
300307
}
301308

302309
@Test
303310
void limitStringLengthLimiting() {
304-
assertEquals("TestTes...", StringUtil.limitStringLength("TestTestTestTestTest", 10));
305311
assertEquals(10, StringUtil.limitStringLength("TestTestTestTestTest", 10).length());
306312
}
307313

308-
@Test
309-
void limitStringLengthNullInput() {
310-
assertEquals("", StringUtil.limitStringLength(null, 10));
311-
}
312-
313314
@Test
314315
void replaceSpecialCharacters() {
315316
assertEquals("Hallo Arger", StringUtil.replaceSpecialCharacters("Hallo Arger"));

0 commit comments

Comments
 (0)