Skip to content

Commit 4ded31a

Browse files
committed
Better normalization cache
They key seems to be too specific. Specially by using the prop, which basically makes it redudant to cache tokens that are found in different props. The goal of that cache seems to be to trade memory for time, but right now seems to be storing equal computations in different keys which basically is inefficient. The only thing that the prop is needed for is the `stemmerSkipProperties`.
1 parent 19df111 commit 4ded31a

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

packages/orama/src/components/tokenizer/index.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,13 @@ export interface DefaultTokenizer extends Tokenizer {
1616
}
1717

1818
export function normalizeToken(this: DefaultTokenizer, prop: string, token: string): string {
19-
const key = `${this.language}:${prop}:${token}`
20-
21-
if (this.normalizationCache.has(key)) {
22-
return this.normalizationCache.get(key)!
19+
if (this.normalizationCache.has(token)) {
20+
return this.normalizationCache.get(token)!
2321
}
2422

2523
// Remove stopwords if enabled
26-
if (this.stopWords?.includes(token)) {
27-
this.normalizationCache.set(key, '')
24+
if (this.stopWords?.has(token)) {
25+
this.normalizationCache.set(token, '')
2826
return ''
2927
}
3028

@@ -34,7 +32,7 @@ export function normalizeToken(this: DefaultTokenizer, prop: string, token: stri
3432
}
3533

3634
token = replaceDiacritics(token)
37-
this.normalizationCache.set(key, token)
35+
this.normalizationCache.set(token, token)
3836
return token
3937
}
4038

packages/orama/tests/search.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ t.test('search method', (t) => {
725725
t.test('with custom tokenizer', async (t) => {
726726
t.plan(4)
727727

728-
const normalizationCache = new Map([['english:foo:dogs', 'Dogs']])
728+
const normalizationCache = new Map([['dogs', 'Dogs']])
729729

730730
const db = await create({
731731
schema: {

0 commit comments

Comments
 (0)