Skip to content

fix: search with exact: true #941

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions packages/orama/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@
}
},
"types": "./dist/commonjs/index.d.ts",
"files": ["dist"],
"files": [
"dist"
],
"repository": {
"type": "git",
"url": "https://github.com/oramasearch/orama"
Expand Down Expand Up @@ -169,7 +171,10 @@
"./components": "./src/components.ts",
"./trees": "./src/trees.ts"
},
"esmDialects": ["deno", "browser"]
"esmDialects": [
"deno",
"browser"
]
},
"module": "./dist/esm/index.js"
}
4 changes: 2 additions & 2 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,8 @@ export function search(
tokenizer: Tokenizer,
language: string | undefined,
propertiesToSearch: string[],
exact: boolean,
tolerance: number,
exactToken: boolean,
boost: Record<string, number>,
relevance: Required<BM25Params>,
docsCount: number,
Expand Down Expand Up @@ -500,7 +500,7 @@ export function search(
const tokenLength = tokens.length
for (let i = 0; i < tokenLength; i++) {
const token = tokens[i]
const searchResult = tree.node.find({ term: token, exact, tolerance })
const searchResult = tree.node.find({ term: token, exact: exactToken, tolerance })

// See if this token was found (for threshold=0 filtering)
const termsFound = Object.keys(searchResult)
Expand Down
28 changes: 24 additions & 4 deletions packages/orama/src/methods/search-fulltext.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { getFacets } from '../components/facets.js'
import { getGroups } from '../components/groups.js'
import { runAfterSearch, runBeforeSearch } from '../components/hooks.js'
import { getInternalDocumentId } from '../components/internal-document-id-store.js'
import { getInternalDocumentId, InternalDocumentID } from '../components/internal-document-id-store.js'
import { Language } from '../components/tokenizer/languages.js'
import { createError } from '../errors.js'
import { getNested } from '../utils.js'
import type {
AnyOrama,
BM25Params,
Expand All @@ -22,7 +23,7 @@ export function innerFullTextSearch<T extends AnyOrama>(
orama: T,
params: Pick<
SearchParamsFullText<T>,
'term' | 'properties' | 'where' | 'exact' | 'tolerance' | 'boost' | 'relevance' | 'threshold'
'term' | 'properties' | 'where' | 'exact' | 'tolerance' | 'boost' | 'relevance' | 'threshold' | 'exactToken'
>,
language: Language | undefined
) {
Expand Down Expand Up @@ -66,16 +67,35 @@ export function innerFullTextSearch<T extends AnyOrama>(
// in this case, we need to return all the documents that contains at least one of the given properties
const threshold = params.threshold !== undefined && params.threshold !== null ? params.threshold : 1

if (term || properties) {
if (params.exact && term) {
const docs = orama.documentsStore.getAll(orama.data.docs) as Record<InternalDocumentID, TypedDocument<T>>
const normalizeTerm= term.toLowerCase()

uniqueDocsIDs = Object.entries(docs)
.filter(([, doc]) => {
return propertiesToSearch.some((prop) => {
const value = getNested(doc, prop)
if (typeof value === 'string') {
return value.toLowerCase() === normalizeTerm
}
if (Array.isArray(value)) {
return value.some((v) => typeof v === 'string' && v.toLowerCase() === normalizeTerm)
}
return false
})
})
.map(([id,]) => [+id, 0] as TokenScore)
}
else if (term || properties) {
const docsCount = count(orama)
uniqueDocsIDs = orama.index.search(
index,
term || '',
orama.tokenizer,
language,
propertiesToSearch,
params.exact || false,
params.tolerance || 0,
params.exactToken || false,
params.boost || {},
applyDefault(params.relevance),
docsCount,
Expand Down
3 changes: 0 additions & 3 deletions packages/orama/src/package.json

This file was deleted.

7 changes: 6 additions & 1 deletion packages/orama/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,11 @@ export interface SearchParamsFullText<T extends AnyOrama, ResultDocument = Typed
*/
exact?: boolean

/**
* Whether each token should be matched exactly.
*/
exactToken?: boolean

/**
* The maximum [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
* between the term and the searchable property.
Expand Down Expand Up @@ -982,8 +987,8 @@ export interface IIndex<I extends AnyIndexStore> {
tokenizer: Tokenizer,
language: string | undefined,
propertiesToSearch: string[],
exact: boolean,
tolerance: number,
exactToken: boolean,
boost: Partial<Record<OnlyStrings<FlattenSchemaProperty<T>[]>, number>>,
relevance: Required<BM25Params>,
docsCount: number,
Expand Down
21 changes: 10 additions & 11 deletions packages/orama/tests/dataset.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ t.test("orama.dataset", async (t) => {
Object.keys((db.data.docs as DocumentsStore).docs).length,
(dataset as EventJson).result.events.length,
);
t.equal(s1.count, 1117);
t.equal(s2.count, 7314);
t.equal(s3.count, 7314);
t.equal(s1.count, 1081);
t.equal(s2.count, 0);
t.equal(s3.count, 1842);

t.end();
});
Expand Down Expand Up @@ -143,7 +143,7 @@ t.test("orama.dataset", async (t) => {
const s1 = removeVariadicData(
await search(db, {
term: "war",
exact: true,
exactToken: true,
// eslint-disable-next-line
// @ts-ignore
properties: ["description"],
Expand All @@ -155,7 +155,7 @@ t.test("orama.dataset", async (t) => {
const s2 = removeVariadicData(
await search(db, {
term: "war",
exact: true,
exactToken: true,
properties: ["description"],
limit: 10,
offset: 10,
Expand All @@ -165,7 +165,7 @@ t.test("orama.dataset", async (t) => {
const s3 = removeVariadicData(
await search(db, {
term: "war",
exact: true,
exactToken: true,
properties: ["description"],
limit: 10,
offset: 20,
Expand All @@ -174,15 +174,15 @@ t.test("orama.dataset", async (t) => {

const s4 = await search(db, {
term: "war",
exact: true,
exactToken: true,
properties: ["description"],
limit: 2240,
offset: 0,
});

const s5 = await search(db, {
term: "war",
exact: true,
exactToken: true,
properties: ["description"],
limit: 10,
offset: 2239,
Expand Down Expand Up @@ -223,7 +223,7 @@ t.test("orama.dataset", async (t) => {
t.test("should correctly delete documents", async (t) => {
const documentsToDelete = await search(db, {
term: "war",
exact: true,
exactToken: true,
properties: ["description"],
limit: 10,
offset: 0,
Expand All @@ -235,13 +235,12 @@ t.test("orama.dataset", async (t) => {

const newSearch = await search(db, {
term: "war",
exact: true,
properties: ["description"],
limit: 10,
offset: 0,
});

t.equal(newSearch.count, 2347);
t.equal(newSearch.count, 2743);

t.end();
});
Expand Down
12 changes: 6 additions & 6 deletions packages/orama/tests/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ t.test('search method', async (t) => {
await insert(db, { quote: 'I like cats. They are the best.', author: 'Jane Doe' })

// Exact search
const result1 = await search(db, { term: 'fox', exact: true })
const result1 = await search(db, { term: 'John Doe', properties: ["author"], exact: true })
const result2 = await search(db, { term: 'dog', exact: true })

t.equal(result1.count, 2)
t.equal(result2.count, 3)
t.equal(result2.count, 0)

// Prefix search
const result3 = await search(db, { term: 'fox', exact: false })
Expand Down Expand Up @@ -193,15 +193,15 @@ t.test('search method', async (t) => {

const partialSearch = await search(db, {
term: 'alr',
exact: true
exactToken: true
})

t.equal(partialSearch.count, 0)
t.strictSame(partialSearch.hits, [])

const exactSearch = await search(db, {
term: 'already',
exact: true
exactToken: true
})

t.equal(exactSearch.count, 1)
Expand Down Expand Up @@ -719,11 +719,11 @@ t.test('search method', async (t) => {

const result1 = await search(db, { term: 'foxes', exact: true })
const result2 = await search(db, { term: 'cats', exact: true })
const result3 = await search(db, { term: 'brown', exact: true })
const result3 = await search(db, { term: 'John Doe', exact: true })

t.equal(result1.count, 0)
t.equal(result2.count, 0)
t.equal(result3.count, 1)
t.equal(result3.count, 2)
t.end()
})

Expand Down
9 changes: 7 additions & 2 deletions packages/tokenizers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"dependencies": {
"@orama/orama": "workspace:*"
},
"files": ["dist"],
"files": [
"dist"
],
"repository": {
"type": "git",
"url": "https://github.com/oramasearch/orama"
Expand Down Expand Up @@ -65,7 +67,10 @@
"node": ">= 20.0.0"
},
"tshy": {
"dialects": ["esm", "commonjs"],
"dialects": [
"esm",
"commonjs"
],
"exports": {
"./japanese": "./src/japanese.ts",
"./mandarin": "./src/mandarin.ts",
Expand Down
Loading