Skip to content

Commit 77d48f7

Browse files
committed
refactor - optimize snippets search
1 parent 558e191 commit 77d48f7

11 files changed

+5189
-2021
lines changed

backend/jest.config.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
module.exports = {
2+
testEnvironment: 'node',
3+
testMatch: ['**/*.spec.js'],
4+
};

backend/package-lock.json

Lines changed: 4787 additions & 1825 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/package.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,23 @@
55
"scripts": {
66
"start": "node ./bin/www",
77
"debug": "nodemon --inspect ./bin/www --watch src --watch docs/openapi/openapi.yaml",
8-
"integration-tests": "NODE_ENV=test nyc mocha '**/*.spec.js' --exit"
8+
"integration-tests": "NODE_ENV=test nyc mocha '**/*.spec.js' --exit",
9+
"test": "jest --config ./jest.config.js"
910
},
1011
"dependencies": {
1112
"aws-sdk": "^2.1048.0",
12-
"body-parser": "^1.19.1",
13+
"body-parser": "^1.20.1",
1314
"cheerio": "^1.0.0-rc.10",
1415
"cookie-parser": "^1.4.6",
1516
"debug": "^4.3.3",
1617
"escape-string-regexp": "^1.0.5",
17-
"express": "^4.17.2",
18+
"express": "^4.18.2",
1819
"express-async-errors": "^3.1.1",
1920
"fs-extra": "^7.0.1",
2021
"helmet": "^3.23.3",
2122
"http-status-codes": "^1.4.0",
2223
"keycloak-connect": "12.0.4",
23-
"mongoose": "^5.13.14",
24+
"mongoose": "^5.13.15",
2425
"morgan": "^1.10.0",
2526
"multer": "^1.4.4",
2627
"multer-s3": "^2.10.0",
@@ -38,6 +39,7 @@
3839
"chai": "^4.3.4",
3940
"chai-as-promised": "^7.1.1",
4041
"eslint": "^8.5.0",
42+
"jest": "^29.4.2",
4143
"jsonwebtoken": "^8.5.1",
4244
"mocha": "^9.1.3",
4345
"nodemon": "^2.0.20",

backend/src/common/search.service.js

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
const Snippet = require('../model/snippet');
2+
3+
const searchUtils = require('./search.utils');
4+
5+
let findItems = async function (type, isPublic, userId, query, page, limit, searchInclude) {
6+
//split in text and tags
7+
const searchedTermsAndTags = searchUtils.splitSearchQuery(query);
8+
let searchedTerms = searchedTermsAndTags.terms;
9+
const searchedTags = searchedTermsAndTags.tags;
10+
let snippets = [];
11+
12+
const {specialSearchFilters, fulltextSearchTerms} = searchUtils.extractFulltextAndSpecialSearchTerms(searchedTerms);
13+
14+
if ( searchedTerms.length > 0 && searchedTags.length > 0 ) {
15+
snippets = await getItemsForTagsAndTerms(type, isPublic, userId, searchedTags, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude);
16+
} else if ( searchedTerms.length > 0 ) {
17+
snippets = await getItemsForSearchedTerms(type, isPublic, userId, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude);
18+
} else {
19+
snippets = await getItemsForSearchedTags(type, isPublic, userId, searchedTags, page, limit, specialSearchFilters);
20+
}
21+
22+
return snippets;
23+
}
24+
25+
let getItemsForTagsAndTerms = async function (type, isPublic, userId, searchedTags, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude) {
26+
let filter = {
27+
tags:
28+
{
29+
$all: searchedTags
30+
}
31+
}
32+
if ( userId ){
33+
filter['userId'] = userId;
34+
}
35+
if (isPublic) {
36+
filter['public'] = true;
37+
}
38+
39+
filter = searchUtils.includeFulltextSearchTermsInFilter(fulltextSearchTerms, filter);
40+
41+
addSpecialSearchFiltersToMongoFilter(specialSearchFilters, filter);
42+
43+
let snippets = await Snippet.find(
44+
filter,
45+
{
46+
score: {$meta: "textScore"}
47+
}
48+
)
49+
.sort({score: {$meta: "textScore"}})
50+
.skip((page - 1) * limit)
51+
.limit(limit)
52+
.lean()
53+
.exec();
54+
55+
return snippets;
56+
}
57+
58+
59+
let getItemsForSearchedTerms = async function (type, isPublic, userId, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude) {
60+
61+
let filter = {};
62+
if ( userId ){
63+
filter['userId'] = userId;
64+
} else {
65+
filter['public'] = true;
66+
}
67+
68+
if ( fulltextSearchTerms.length > 0 ) {
69+
if ( searchInclude === 'any' ) {
70+
filter.$text = {$search: fulltextSearchTerms.join(' ')}
71+
} else {
72+
filter.$text = {$search: searchUtils.generateFullSearchText(fulltextSearchTerms)};
73+
}
74+
}
75+
76+
addSpecialSearchFiltersToMongoFilter(specialSearchFilters, filter);
77+
78+
let snippets = await Snippet.find(
79+
filter,
80+
{
81+
score: {$meta: "textScore"}
82+
}
83+
)
84+
.sort({score: {$meta: "textScore"}})
85+
.skip((page - 1) * limit)
86+
.limit(limit)
87+
.lean()
88+
.exec();
89+
90+
return snippets;
91+
}
92+
93+
94+
let getItemsForSearchedTags = async function (type, isPublic, userId, searchedTags, page, limit, specialSearchFilters) {
95+
let filter = {
96+
tags:
97+
{
98+
$all: searchedTags
99+
}
100+
}
101+
if ( userId ) {
102+
filter['userId'] = userId;
103+
} else {
104+
filter['public'] = true;
105+
}
106+
107+
addSpecialSearchFiltersToMongoFilter(specialSearchFilters, filter);
108+
109+
let snippets = await Snippet.find(filter)
110+
.sort({createdAt: -1})
111+
.skip((page - 1) * limit)
112+
.limit(limit)
113+
.lean()
114+
.exec();
115+
116+
return snippets;
117+
}
118+
119+
let addSpecialSearchFiltersToMongoFilter = function (specialSearchFilters, filter) {
120+
if ( specialSearchFilters.userId ) {
121+
filter.userId = specialSearchFilters.userId;
122+
} else if ( specialSearchFilters.privateOnly ) {
123+
filter.public = false;
124+
}
125+
126+
if ( specialSearchFilters.site ) {
127+
filter.sourceUrl = new RegExp(specialSearchFilters.site, 'i'); //TODO when performance becomes an issue extract domains from URLs and make a direct comparison with the domain
128+
}
129+
};
130+
131+
132+
module.exports = {
133+
findSnippets: findItems
134+
}

backend/src/common/search.utils.js

Lines changed: 26 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -58,63 +58,10 @@ let splitSearchQuery = function (query) {
5858
return result;
5959
}
6060

61-
let bookmarkContainsSearchedTerm = function (bookmark, searchedTerm) {
62-
let result = false;
63-
// const escapedSearchPattern = '\\b' + this.escapeRegExp(searchedTerm.toLowerCase()) + '\\b'; word boundary was not enough, especially for special characters which can happen in coding
64-
// https://stackoverflow.com/questions/23458872/javascript-regex-word-boundary-b-issue
65-
const separatingChars = '\\s\\.,;#\\-\\/_\\[\\]\\(\\)\\*\\+';
66-
const escapedSearchPattern = `(^|[${separatingChars}])(${escapeRegExp(searchedTerm.toLowerCase())})(?=$|[${separatingChars}])`;
67-
const pattern = new RegExp(escapedSearchPattern);
68-
if ( (bookmark.name && pattern.test(bookmark.name.toLowerCase()))
69-
|| (bookmark.location && pattern.test(bookmark.location.toLowerCase()))
70-
|| (bookmark.description && pattern.test(bookmark.description.toLowerCase()))
71-
|| (bookmark.sourceCodeURL && pattern.test(bookmark.sourceCodeURL.toLowerCase()))
72-
) {
73-
result = true;
74-
}
75-
76-
if ( result ) {
77-
return true;
78-
} else {
79-
// if not found already look through the tags also
80-
bookmark.tags.forEach(tag => {
81-
if ( pattern.test(tag.toLowerCase()) ) {
82-
result = true;
83-
}
84-
});
85-
}
8661

87-
return result;
88-
}
89-
90-
function escapeRegExp(str) {
91-
const specials = [
92-
// order matters for these
93-
'-'
94-
, '['
95-
, ']'
96-
// order doesn't matter for any of these
97-
, '/'
98-
, '{'
99-
, '}'
100-
, '('
101-
, ')'
102-
, '*'
103-
, '+'
104-
, '?'
105-
, '.'
106-
, '\\'
107-
, '^'
108-
, '$'
109-
, '|'
110-
],
111-
regex = RegExp('[' + specials.join('\\') + ']', 'g');
112-
return str.replace(regex, '\\$&'); // $& means the whole matched string
113-
}
114-
115-
let extractSpecialSearchTerms = function (searchedTerms) {
62+
let extractFulltextAndSpecialSearchTerms = function (searchedTerms) {
11663
let specialSearchFilters = {}
117-
let nonSpecialSearchTerms = [];
64+
let fulltextSearchTerms = [];
11865
for ( let i = 0; i < searchedTerms.length; i++ ) {
11966
const searchTerm = searchedTerms[i];
12067

@@ -141,39 +88,50 @@ let extractSpecialSearchTerms = function (searchedTerms) {
14188
continue;
14289
}
14390

144-
nonSpecialSearchTerms.push(searchTerm);
91+
fulltextSearchTerms.push(searchTerm);
14592
}
14693

14794
return {
14895
specialSearchFilters: specialSearchFilters,
149-
nonSpecialSearchTerms: nonSpecialSearchTerms
96+
fulltextSearchTerms: fulltextSearchTerms
15097
}
15198
}
15299

153100
/*
154-
The default search in Mongo uses the OR operatar, here
101+
The default search in Mongo uses the OR operator, here
155102
we make to AND by placing the search terms between ""
156103
*/
157-
let generateFullSearchText = function (nonSpecialSearchTerms) {
104+
let generateFullSearchText = function (fulltextSearchTerms) {
158105
let termsQuery = '';
159-
nonSpecialSearchTerms.forEach(searchTerm => {
160-
if ( searchTerm.startsWith('-') ) {
106+
fulltextSearchTerms.forEach(searchTerm => {
107+
if ( searchTerm.startsWith('-') ) { // "-" means it must not contain this searchTerm
161108
termsQuery += ' ' + searchTerm;
162109
} else { //wrap it in quotes to make it a default AND in search
163110
termsQuery += ' "' + searchTerm.substring(0, searchTerm.length) + '"';
164111
}
165112
});
166-
//const termsJoined = nonSpecialSearchTerms.join(' ');
167-
//const termsQuery = escapeStringRegexp(termsJoined);
168-
//const termsQuery = termsJoined;
169113

170114
return termsQuery.trim();
171115
};
172116

117+
let includeFulltextSearchTermsInFilter = function (fulltextSearchTerms, filter, searchInclude) {
118+
let newFilter = {...filter};
119+
if ( fulltextSearchTerms.length > 0 ) {
120+
let searchText = '';
121+
if ( searchInclude === 'any' ) {
122+
searchText = {$search: fulltextSearchTerms.join(' ')}
123+
} else {
124+
searchText = {$search: generateFullSearchText(fulltextSearchTerms)};
125+
}
126+
127+
newFilter.$text = searchText;
128+
}
129+
return newFilter;
130+
}
131+
173132
module.exports = {
174133
splitSearchQuery: splitSearchQuery,
175-
bookmarkContainsSearchedTerm: bookmarkContainsSearchedTerm,
176-
extractSpecialSearchTerms: extractSpecialSearchTerms,
177-
escapeRegExp: escapeRegExp,
178-
generateFullSearchText: generateFullSearchText
134+
extractFulltextAndSpecialSearchTerms: extractFulltextAndSpecialSearchTerms,
135+
generateFullSearchText: generateFullSearchText,
136+
includeFulltextSearchTermsInFilter: includeFulltextSearchTermsInFilter
179137
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
const searchUtils = require('./search.utils');
2+
3+
describe('splitSearchQuery', () => {
4+
it('should split search query into terms and tags', () => {
5+
const query = 'term1 [tag1] term2 [tag2]';
6+
const expectedResult = {
7+
terms: ['term1', 'term2'],
8+
tags: ['tag1', 'tag2']
9+
};
10+
expect(searchUtils.splitSearchQuery(query)).toEqual(expectedResult);
11+
});
12+
});
13+
14+
describe('extractFulltextAndSpecialSearchTerms', () => {
15+
it('should extract special search terms and filters from searched terms', () => {
16+
const searchedTerms = [ 'lang:en', 'site:github.com', 'private:only', 'term1', 'user:12345678-abcd-1234-abcd-123456789abc' ];
17+
const expectedResult = {
18+
"fulltextSearchTerms": [
19+
"term1"
20+
],
21+
"specialSearchFilters": {
22+
"lang": "en",
23+
"privateOnly": true,
24+
"site": "github.com",
25+
"userId": "12345678-abcd-1234-abcd-123456789abc"
26+
}
27+
}
28+
expect(searchUtils.extractFulltextAndSpecialSearchTerms(searchedTerms)).toEqual(expectedResult);
29+
});
30+
});
31+
32+
describe('includeFulltextSearchTermsInFilter', () => {
33+
test('returns filter with $text when fulltextSearchTerms is not empty', () => {
34+
const fulltextSearchTerms = ['test'];
35+
const filter = {};
36+
const searchInclude = 'any';
37+
const expected = {
38+
...filter,
39+
$text: {$search: fulltextSearchTerms.join(' ')}
40+
};
41+
expect(searchUtils.includeFulltextSearchTermsInFilter(fulltextSearchTerms, filter, searchInclude)).toEqual(expected);
42+
});
43+
44+
test('returns filter without $text when fulltextSearchTerms is empty', () => {
45+
const fulltextSearchTerms = [];
46+
const filter = {};
47+
const searchInclude = 'any';
48+
expect(searchUtils.includeFulltextSearchTermsInFilter(fulltextSearchTerms, filter, searchInclude)).toBe(undefined);
49+
});
50+
});
51+
52+
describe('generateFullSearchText', () => {
53+
it('should generate the correct full search text for given fulltext search terms', () => {
54+
const fulltextSearchTerms = ['apple', '-banana', 'cherry'];
55+
const expectedResult = '"apple" -banana "cherry"';
56+
57+
expect(searchUtils.generateFullSearchText(fulltextSearchTerms)).toBe(expectedResult);
58+
});
59+
});
60+

0 commit comments

Comments
 (0)