From c87e489741d37b2246c1b7f2fe090eab30455517 Mon Sep 17 00:00:00 2001 From: luffy-orf Date: Sat, 24 May 2025 09:48:37 +0530 Subject: [PATCH] fix: ensure exact:true matches entire property value, not tokens --- packages/orama/src/methods/search-fulltext.ts | 38 +++++++++++++++++-- packages/orama/src/types.ts | 19 +++++++++- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/packages/orama/src/methods/search-fulltext.ts b/packages/orama/src/methods/search-fulltext.ts index b413d730..b71ffc3e 100644 --- a/packages/orama/src/methods/search-fulltext.ts +++ b/packages/orama/src/methods/search-fulltext.ts @@ -1,7 +1,7 @@ import { getFacets } from '../components/facets.js' import { getGroups } from '../components/groups.js' import { runAfterSearch, runBeforeSearch } from '../components/hooks.js' -import { getInternalDocumentId } from '../components/internal-document-id-store.js' +import { getInternalDocumentId, InternalDocumentID } from '../components/internal-document-id-store.js' import { Language } from '../components/tokenizer/languages.js' import { createError } from '../errors.js' import type { @@ -14,7 +14,7 @@ import type { TokenScore, TypedDocument } from '../types.js' -import { getNanosecondsTime, removeVectorsFromHits, sortTokenScorePredicate } from '../utils.js' +import { getNanosecondsTime, removeVectorsFromHits, sortTokenScorePredicate, getNested } from '../utils.js' import { count } from './docs.js' import { fetchDocuments, fetchDocumentsWithDistinct } from './search.js' @@ -66,7 +66,39 @@ export function innerFullTextSearch( // in this case, we need to return all the documents that contains at least one of the given properties const threshold = params.threshold !== undefined && params.threshold !== null ? params.threshold : 1 - if (term || properties) { + /** + * Property-value exactness: + * If `params.exact` is true and a search term is provided, iterate all documents and check if any of the specified properties + * (or all string properties if none specified) match the search term exactly (case-insensitive). + * This is different from token-based exactness, which is handled by `exactToken`. + * + * Example: + * - If a document property value is "First Note.md" and the search term is "first" with `exact: true`, + * it will NOT match (property-value is not exactly "first"). + * - If a document property value is "first" and the search term is "first" with `exact: true`, + * it WILL match. + * + * For token-based exactness (matching individual words/tokens), use `exactToken` instead. + */ + if (params.exact && term) { + const docs = orama.documentsStore.getAll(orama.data.docs) as Record> + const normalizeTerm= term.toLowerCase() + + uniqueDocsIDs = Object.entries(docs) + .filter(([, doc]) => { + return propertiesToSearch.some((prop) => { + const value = getNested(doc, prop) + if (typeof value === 'string') { + return value.toLowerCase() === normalizeTerm + } + if (Array.isArray(value)) { + return value.some((v) => typeof v === 'string' && v.toLowerCase() === normalizeTerm) + } + return false + }) + }) + .map(([id,]) => [+id, 0] as TokenScore) + } else if (term || properties) { const docsCount = count(orama) uniqueDocsIDs = orama.index.search( index, diff --git a/packages/orama/src/types.ts b/packages/orama/src/types.ts index 38d768b4..63f56aa6 100644 --- a/packages/orama/src/types.ts +++ b/packages/orama/src/types.ts @@ -314,10 +314,27 @@ export interface SearchParamsFullText /** - * Whether to match the term exactly. + * Whether to match the term exactly against the entire property value (case-insensitive). + * + * If true, only documents where the specified property (or all string properties if none specified) + * matches the search term exactly (as a whole string, not tokenized) will be returned. + * + * Example: + * - If a property value is "First Note.md" and the search term is "first", this will NOT match. + * - If a property value is "first" and the search term is "first", this WILL match. + * + * For token-level exactness (matching individual words/tokens), use `exactToken` instead. */ exact?: boolean + /** + * Whether each token should be matched exactly (token-level exactness). + * + * If true, only documents where individual tokens (words) in the property match the search term exactly will be returned. + * This is different from `exact`, which matches the entire property value. + */ + exactToken?: boolean + /** * The maximum [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) * between the term and the searchable property.