Skip to content

Commit 390a15d

Browse files
authored
Merge branch 'master' into custom_domain
2 parents 25bafc0 + 344c23e commit 390a15d

File tree

6 files changed

+179
-151
lines changed

6 files changed

+179
-151
lines changed

api/resolvers/search.js

Lines changed: 171 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ export default {
174174
search: async (parent, { q, cursor, sort, what, when, from: whenFrom, to: whenTo }, { me, models, search }) => {
175175
const decodedCursor = decodeCursor(cursor)
176176
let sitems = null
177-
let termQueries = []
178177

179178
// short circuit: return empty result if either:
180179
// 1. no query provided, or
@@ -186,142 +185,163 @@ export default {
186185
}
187186
}
188187

189-
const whatArr = []
188+
// build query in parts:
189+
// filters: determine the universe of potential search candidates
190+
// termQueries: queries related to the actual search terms
191+
// functions: rank modifiers to boost by recency or popularity
192+
const filters = []
193+
const termQueries = []
194+
const functions = []
195+
196+
// filters for item types
190197
switch (what) {
191-
case 'posts':
192-
whatArr.push({ bool: { must_not: { exists: { field: 'parentId' } } } })
198+
case 'posts': // posts only
199+
filters.push({ bool: { must_not: { exists: { field: 'parentId' } } } })
193200
break
194-
case 'comments':
195-
whatArr.push({ bool: { must: { exists: { field: 'parentId' } } } })
201+
case 'comments': // comments only
202+
filters.push({ bool: { must: { exists: { field: 'parentId' } } } })
196203
break
197204
case 'bookmarks':
198205
if (me?.id) {
199-
whatArr.push({ match: { bookmarkedBy: me?.id } })
206+
filters.push({ match: { bookmarkedBy: me?.id } })
200207
}
201208
break
202209
default:
203210
break
204211
}
205212

206-
const { query: _query, quotes, nym, url, territory } = queryParts(q)
207-
let query = _query
213+
// filter for active posts
214+
filters.push(
215+
me
216+
? {
217+
bool: {
218+
should: [
219+
{ match: { status: 'ACTIVE' } },
220+
{ match: { status: 'NOSATS' } },
221+
{ match: { userId: me.id } }
222+
]
223+
}
224+
}
225+
: {
226+
bool: {
227+
should: [
228+
{ match: { status: 'ACTIVE' } },
229+
{ match: { status: 'NOSATS' } }
230+
]
231+
}
232+
}
233+
)
208234

209-
const isUrlSearch = url && query.length === 0 // exclusively searching for an url
235+
// filter for time range
236+
const whenRange = when === 'custom'
237+
? {
238+
gte: whenFrom,
239+
lte: new Date(Math.min(new Date(Number(whenTo)), decodedCursor.time))
240+
}
241+
: {
242+
lte: decodedCursor.time,
243+
gte: whenToFrom(when)
244+
}
245+
filters.push({ range: { createdAt: whenRange } })
246+
247+
// filter for non negative wvotes
248+
filters.push({ range: { wvotes: { gte: 0 } } })
249+
250+
// decompose the search terms
251+
const { query: _query, quotes, nym, url, territory } = queryParts(q)
252+
const query = _query
210253

254+
// if search contains a url term, modify the query text
211255
if (url) {
212-
const isFQDN = url.startsWith('url:www.')
213-
const domain = isFQDN ? url.slice(8) : url.slice(4)
214-
const fqdn = `www.${domain}`
215-
query = (isUrlSearch) ? `${domain} ${fqdn}` : `${query.trim()} ${domain}`
256+
const uri = url.slice(4)
257+
let uriObj
258+
try {
259+
uriObj = new URL(uri)
260+
} catch {
261+
try {
262+
uriObj = new URL(`https://${uri}`)
263+
} catch {}
264+
}
265+
266+
if (uriObj) {
267+
termQueries.push({
268+
wildcard: { url: `*${uriObj?.hostname ?? uri}${uriObj?.pathname ?? ''}*` }
269+
})
270+
termQueries.push({
271+
match: { text: `${uriObj?.hostname ?? uri}${uriObj?.pathname ?? ''}` }
272+
})
273+
}
216274
}
217275

276+
// if nym, items must contain nym
218277
if (nym) {
219-
whatArr.push({ wildcard: { 'user.name': `*${nym.slice(1).toLowerCase()}*` } })
278+
filters.push({ wildcard: { 'user.name': `*${nym.slice(1).toLowerCase()}*` } })
220279
}
221280

281+
// if territory, item must be from territory
222282
if (territory) {
223-
whatArr.push({ match: { 'sub.name': territory.slice(1) } })
283+
filters.push({ match: { 'sub.name': territory.slice(1) } })
224284
}
225285

226-
termQueries.push({
227-
// all terms are matched in fields
228-
multi_match: {
229-
query,
230-
type: 'best_fields',
231-
fields: ['title^100', 'text'],
232-
minimum_should_match: (isUrlSearch) ? 1 : '100%',
233-
boost: 1000
234-
}
235-
})
236-
286+
// if quoted phrases, items must contain entire phrase
237287
for (const quote of quotes) {
238-
whatArr.push({
288+
termQueries.push({
239289
multi_match: {
240290
query: quote,
241291
type: 'phrase',
242292
fields: ['title', 'text']
243293
}
244294
})
245-
}
246-
247-
// if we search for an exact string only, everything must match
248-
// so score purely on sort field
249-
let boostMode = query ? 'multiply' : 'replace'
250-
let sortField
251-
let sortMod = 'log1p'
252-
switch (sort) {
253-
case 'comments':
254-
sortField = 'ncomments'
255-
sortMod = 'square'
256-
break
257-
case 'sats':
258-
sortField = 'sats'
259-
break
260-
case 'recent':
261-
sortField = 'createdAt'
262-
sortMod = 'square'
263-
boostMode = 'replace'
264-
break
265-
default:
266-
sortField = 'wvotes'
267-
sortMod = 'none'
268-
break
269-
}
270-
271-
const functions = [
272-
{
273-
field_value_factor: {
274-
field: sortField,
275-
modifier: sortMod,
276-
factor: 1.2
277-
}
278-
}
279-
]
280295

281-
if (sort === 'recent' && !isUrlSearch) {
282-
// prioritize exact matches
283-
termQueries.push({
296+
// force the search to include the quoted phrase
297+
filters.push({
284298
multi_match: {
285-
query,
299+
query: quote,
286300
type: 'phrase',
287-
fields: ['title^100', 'text'],
288-
boost: 1000
289-
}
290-
})
291-
} else {
292-
// allow fuzzy matching with partial matches
293-
termQueries.push({
294-
multi_match: {
295-
query,
296-
type: 'most_fields',
297-
fields: ['title^100', 'text'],
298-
fuzziness: 'AUTO',
299-
prefix_length: 3,
300-
minimum_should_match: (isUrlSearch) ? 1 : '60%'
301-
}
302-
})
303-
functions.push({
304-
// small bias toward posts with comments
305-
field_value_factor: {
306-
field: 'ncomments',
307-
modifier: 'ln1p',
308-
factor: 1
309-
}
310-
},
311-
{
312-
// small bias toward recent posts
313-
field_value_factor: {
314-
field: 'createdAt',
315-
modifier: 'log1p',
316-
factor: 1
301+
fields: ['title', 'text']
317302
}
318303
})
319304
}
320305

306+
// query for search terms
321307
if (query.length) {
322-
// if we have a model id and we aren't sort by recent, use neural search
323-
if (process.env.OPENSEARCH_MODEL_ID && sort !== 'recent') {
324-
termQueries = {
308+
// keyword based subquery, to be used on its own or in conjunction with a neural
309+
// search
310+
const subquery = [
311+
{
312+
multi_match: {
313+
query,
314+
type: 'most_fields',
315+
fields: ['title^10', 'text'],
316+
fuzziness: 'AUTO',
317+
minimum_should_match: 1
318+
}
319+
},
320+
// all match matches higher
321+
{
322+
multi_match: {
323+
query,
324+
type: 'most_fields',
325+
fields: ['title^10', 'text'],
326+
minimum_should_match: '100%',
327+
boost: 100
328+
}
329+
},
330+
// phrase match matches higher
331+
{
332+
multi_match: {
333+
query,
334+
type: 'phrase',
335+
fields: ['title^10', 'text'],
336+
boost: 1000
337+
}
338+
}
339+
]
340+
341+
// use hybrid neural search if model id is available, otherwise use only
342+
// keyword search
343+
if (process.env.OPENSEARCH_MODEL_ID) {
344+
termQueries.push({
325345
hybrid: {
326346
queries: [
327347
{
@@ -350,26 +370,58 @@ export default {
350370
},
351371
{
352372
bool: {
353-
should: termQueries
373+
should: subquery,
374+
minimum_should_match: 1
354375
}
355376
}
356377
]
357378
}
358-
}
379+
})
380+
} else {
381+
termQueries.push(...subquery)
359382
}
360-
} else {
361-
termQueries = []
362383
}
363384

364-
const whenRange = when === 'custom'
365-
? {
366-
gte: whenFrom,
367-
lte: new Date(Math.min(new Date(Number(whenTo)), decodedCursor.time))
368-
}
369-
: {
370-
lte: decodedCursor.time,
371-
gte: whenToFrom(when)
372-
}
385+
// functions for boosting search rank by recency or popularity
386+
switch (sort) {
387+
case 'comments':
388+
functions.push({
389+
field_value_factor: {
390+
field: 'ncomments',
391+
modifier: 'log1p'
392+
}
393+
})
394+
break
395+
case 'sats':
396+
functions.push({
397+
field_value_factor: {
398+
field: 'sats',
399+
modifier: 'log1p'
400+
}
401+
})
402+
break
403+
case 'recent':
404+
functions.push({
405+
gauss: {
406+
createdAt: {
407+
origin: 'now',
408+
scale: '7d',
409+
decay: 0.5
410+
}
411+
}
412+
})
413+
break
414+
case 'zaprank':
415+
functions.push({
416+
field_value_factor: {
417+
field: 'wvotes',
418+
modifier: 'log1p'
419+
}
420+
})
421+
break
422+
default:
423+
break
424+
}
373425

374426
try {
375427
sitems = await search.search({
@@ -388,39 +440,14 @@ export default {
388440
function_score: {
389441
query: {
390442
bool: {
391-
must: termQueries,
392-
filter: [
393-
...whatArr,
394-
me
395-
? {
396-
bool: {
397-
should: [
398-
{ match: { status: 'ACTIVE' } },
399-
{ match: { status: 'NOSATS' } },
400-
{ match: { userId: me.id } }
401-
]
402-
}
403-
}
404-
: {
405-
bool: {
406-
should: [
407-
{ match: { status: 'ACTIVE' } },
408-
{ match: { status: 'NOSATS' } }
409-
]
410-
}
411-
},
412-
{
413-
range:
414-
{
415-
createdAt: whenRange
416-
}
417-
},
418-
{ range: { wvotes: { gte: 0 } } }
419-
]
443+
filter: filters,
444+
should: termQueries,
445+
minimum_should_match: 1
420446
}
421447
},
422448
functions,
423-
boost_mode: boostMode
449+
score_mode: 'multiply',
450+
boost_mode: 'multiply'
424451
}
425452
},
426453
highlight: {
@@ -458,7 +485,7 @@ export default {
458485
${SELECT}, rank
459486
FROM "Item"
460487
JOIN r ON "Item".id = r.id`,
461-
orderBy: 'ORDER BY rank ASC'
488+
orderBy: 'ORDER BY rank ASC, msats DESC'
462489
})).map((item, i) => {
463490
const e = sitems.body.hits.hits[i]
464491
item.searchTitle = (e.highlight?.title && e.highlight.title[0]) || item.title

0 commit comments

Comments
 (0)