Skip to content

Commit c790b54

Browse files
[SOLR-17726] Fix CloudMLTQParser to support copyField in qf (#3328)
using copy field source for more like this + tests --------- Co-authored-by: Alessandro Benedetti <a.benedetti@sease.io> (cherry picked from commit d249593)
1 parent 996a245 commit c790b54

File tree

4 files changed

+138
-3
lines changed

4 files changed

+138
-3
lines changed

solr/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ Bug Fixes
130130

131131
* SOLR-17790: Allow the -j or --jettyconfig option to start with a dash (-). (Houston Putman)
132132

133+
* SOLR-17726: MoreLikeThis to support copy-fields (Ilaria Petreti via Alessandro Benedetti)
134+
133135
Dependency Upgrades
134136
---------------------
135137
* SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)

solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ protected Query likeDoc(MoreLikeThis moreLikeThis, SolrDocument doc) throws IOEx
6767
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
6868

6969
for (String field : moreLikeThis.getFieldNames()) {
70-
Collection<Object> fieldValues = doc.getFieldValues(field);
70+
Collection<Object> fieldValues = getFieldValuesIncludingCopyField(doc, field);
71+
7172
if (fieldValues != null) {
7273
Collection<Object> values = new ArrayList<>();
7374
for (Object val : fieldValues) {
@@ -110,4 +111,21 @@ private SolrDocument getDocument(String id) {
110111

111112
return (SolrDocument) response.get("doc");
112113
}
114+
115+
private Collection<Object> getFieldValuesIncludingCopyField(SolrDocument doc, String field) {
116+
Collection<Object> fieldValues = doc.getFieldValues(field);
117+
if (fieldValues != null) return fieldValues;
118+
// Fields created using copyField are not included in documents returned by RealTime Get.
119+
// So if a copyField destination is used in the MLT query (qf), we need to get the values
120+
// from its source field instead. If there are multiple source fields, their values must be
121+
// combined.
122+
Collection<Object> combinedValues = new ArrayList<>();
123+
for (String sourceField : req.getSchema().getCopySources(field)) {
124+
Collection<Object> sourceValues = doc.getFieldValues(sourceField);
125+
if (sourceValues != null) {
126+
combinedValues.addAll(sourceValues);
127+
}
128+
}
129+
return combinedValues.isEmpty() ? null : combinedValues;
130+
}
113131
}

solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,18 @@
224224
<field name="uniq3" type="string" indexed="true" stored="true"/>
225225
<field name="nouniq" type="string" indexed="true" stored="true" multiValued="true"/>
226226

227+
<field name="payload" type="sortable_binary" indexed="false"
228+
stored="true" multiValued="false"/>
227229

230+
<!-- to test copyField in MLT qf -->
228231
<field name="copyfield_source" type="string" indexed="true" stored="true" multiValued="true"/>
232+
<field name="copyfield_dest" type="nametext" indexed="true" stored="true" multiValued="true"/>
233+
<copyField source="copyfield_source" dest="copyfield_dest" />
229234

230-
<field name="payload" type="sortable_binary" indexed="false"
231-
stored="true" multiValued="false"/>
235+
<field name="copyfield_source_2" type="string" indexed="true" stored="true" multiValued="true"/>
236+
<field name="copyfield_dest_multiple_sources" type="nametext" indexed="true" stored="true" multiValued="true"/>
237+
<copyField source="copyfield_source" dest="copyfield_dest_multiple_sources" />
238+
<copyField source="copyfield_source_2" dest="copyfield_dest_multiple_sources" />
232239

233240
<!-- for versioning -->
234241
<field name="_version_" type="long" indexed="true" stored="true"/>

solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.solr.common.SolrDocument;
2828
import org.apache.solr.common.SolrDocumentList;
2929
import org.apache.solr.common.SolrException;
30+
import org.apache.solr.common.util.NamedList;
3031
import org.junit.After;
3132
import org.junit.Before;
3233
import org.junit.Test;
@@ -50,6 +51,8 @@ static void indexDocs()
5051
String id = "id";
5152
String FIELD1 = "lowerfilt_u";
5253
String FIELD2 = "lowerfilt1_u";
54+
String FIELD3 = "copyfield_source";
55+
String FIELD4 = "copyfield_source_2";
5356

5457
new UpdateRequest()
5558
.add(sdoc(id, "1", FIELD1, "toyota"))
@@ -119,6 +122,9 @@ static void indexDocs()
119122
"The slim red fox jumped over the lazy brown dogs.",
120123
FIELD2,
121124
"yellow white black"))
125+
.add(sdoc(id, "33", FIELD3, "hard rock", FIELD4, "instrumental version"))
126+
.add(sdoc(id, "34", FIELD3, "hard rock", FIELD4, "instrumental version"))
127+
.add(sdoc(id, "35", FIELD3, "pop rock"))
122128
.commit(client, COLLECTION);
123129
}
124130

@@ -340,4 +346,106 @@ public void testInvalidSourceDocument() {
340346
.getSolrClient()
341347
.query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999")));
342348
}
349+
350+
@Test
351+
public void testUsesACopyFieldInQf_shouldReturnExpectResults() throws Exception {
352+
// Verifies that when a copyField destination is used in the qf parameter, the field values are
353+
// correctly retrieved from the source field(s) and the MLT query returns the expected results.
354+
QueryResponse queryResponse =
355+
cluster
356+
.getSolrClient()
357+
.query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}33"));
358+
SolrDocumentList solrDocuments = queryResponse.getResults();
359+
int[] expectedIds = new int[] {34, 35};
360+
int[] actualIds = new int[solrDocuments.size()];
361+
int i = 0;
362+
for (SolrDocument solrDocument : solrDocuments) {
363+
actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
364+
}
365+
366+
Arrays.sort(actualIds);
367+
Arrays.sort(expectedIds);
368+
assertArrayEquals(expectedIds, actualIds);
369+
}
370+
371+
@Test
372+
public void testUsesACopyFieldInQf_shouldGenerateNonEmptyQuery() throws Exception {
373+
// Verifies that the MLT query correctly uses the content of the source field(s) when a
374+
// copyField destination is specified in the qf parameter.
375+
QueryResponse queryResponse =
376+
cluster
377+
.getSolrClient()
378+
.query(
379+
COLLECTION,
380+
new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}33").setShowDebugInfo(true));
381+
382+
NamedList<?> debugInfo = (NamedList<?>) queryResponse.getResponse().get("debug");
383+
// Extract the parsed query string
384+
String parsedQuery = (String) debugInfo.get("parsedquery_toString");
385+
// Assert it matches the expected query string
386+
assertEquals("+(copyfield_dest:rock copyfield_dest:hard) -id:33", parsedQuery);
387+
// Assert it is not the incorrect fallback
388+
assertNotEquals("+() -id:33", parsedQuery);
389+
}
390+
391+
@Test
392+
public void testCopyFieldSourceMissing_shouldReturnNoResults() throws Exception {
393+
// Ensures that no results are returned when the copyField source field is missing in the source
394+
// document.
395+
QueryResponse queryResponse =
396+
cluster
397+
.getSolrClient()
398+
.query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}30"));
399+
SolrDocumentList solrDocuments = queryResponse.getResults();
400+
assertEquals("Expected no results if source field is missing", 0, solrDocuments.size());
401+
}
402+
403+
@Test
404+
public void testCopyFieldDestinMultipleSources_shouldReturnExpectResults() throws Exception {
405+
// Validates that when multiple source fields map to a single copyField destination, their
406+
// values are correctly combined and expected results are returned.
407+
QueryResponse queryResponse =
408+
cluster
409+
.getSolrClient()
410+
.query(
411+
COLLECTION,
412+
new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources mindf=0 mintf=1}33"));
413+
SolrDocumentList solrDocuments = queryResponse.getResults();
414+
int[] expectedIds = new int[] {34, 35};
415+
int[] actualIds = new int[solrDocuments.size()];
416+
int i = 0;
417+
for (SolrDocument solrDocument : solrDocuments) {
418+
actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
419+
}
420+
421+
Arrays.sort(actualIds);
422+
Arrays.sort(expectedIds);
423+
assertArrayEquals(expectedIds, actualIds);
424+
}
425+
426+
@Test
427+
public void
428+
testCopyFieldDestinationMultipleSources_shouldGenerateQueryUsingMultipleSourcesValues()
429+
throws Exception {
430+
// Validates that when multiple source fields map to a single copyField destination, their
431+
// values are
432+
// correctly combined and the resulting MLT query is properly constructed.
433+
QueryResponse queryResponse =
434+
cluster
435+
.getSolrClient()
436+
.query(
437+
COLLECTION,
438+
new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources mindf=0 mintf=1}33")
439+
.setShowDebugInfo(true));
440+
441+
NamedList<?> debugInfo = (NamedList<?>) queryResponse.getResponse().get("debug");
442+
// Extract the parsed query string
443+
String parsedQuery = (String) debugInfo.get("parsedquery_toString");
444+
// Assert it matches the expected query string
445+
assertEquals(
446+
"+(copyfield_dest_multiple_sources:rock copyfield_dest_multiple_sources:version copyfield_dest_multiple_sources:hard copyfield_dest_multiple_sources:instrumental) -id:33",
447+
parsedQuery);
448+
// Assert it is not the incorrect fallback
449+
assertNotEquals("+() -id:33", parsedQuery);
450+
}
343451
}

0 commit comments

Comments
 (0)