@@ -174,7 +174,6 @@ export default {
174
174
search : async ( parent , { q, cursor, sort, what, when, from : whenFrom , to : whenTo } , { me, models, search } ) => {
175
175
const decodedCursor = decodeCursor ( cursor )
176
176
let sitems = null
177
- let termQueries = [ ]
178
177
179
178
// short circuit: return empty result if either:
180
179
// 1. no query provided, or
@@ -186,142 +185,163 @@ export default {
186
185
}
187
186
}
188
187
189
- const whatArr = [ ]
188
+ // build query in parts:
189
+ // filters: determine the universe of potential search candidates
190
+ // termQueries: queries related to the actual search terms
191
+ // functions: rank modifiers to boost by recency or popularity
192
+ const filters = [ ]
193
+ const termQueries = [ ]
194
+ const functions = [ ]
195
+
196
+ // filters for item types
190
197
switch ( what ) {
191
- case 'posts' :
192
- whatArr . push ( { bool : { must_not : { exists : { field : 'parentId' } } } } )
198
+ case 'posts' : // posts only
199
+ filters . push ( { bool : { must_not : { exists : { field : 'parentId' } } } } )
193
200
break
194
- case 'comments' :
195
- whatArr . push ( { bool : { must : { exists : { field : 'parentId' } } } } )
201
+ case 'comments' : // comments only
202
+ filters . push ( { bool : { must : { exists : { field : 'parentId' } } } } )
196
203
break
197
204
case 'bookmarks' :
198
205
if ( me ?. id ) {
199
- whatArr . push ( { match : { bookmarkedBy : me ?. id } } )
206
+ filters . push ( { match : { bookmarkedBy : me ?. id } } )
200
207
}
201
208
break
202
209
default :
203
210
break
204
211
}
205
212
206
- const { query : _query , quotes, nym, url, territory } = queryParts ( q )
207
- let query = _query
213
+ // filter for active posts
214
+ filters . push (
215
+ me
216
+ ? {
217
+ bool : {
218
+ should : [
219
+ { match : { status : 'ACTIVE' } } ,
220
+ { match : { status : 'NOSATS' } } ,
221
+ { match : { userId : me . id } }
222
+ ]
223
+ }
224
+ }
225
+ : {
226
+ bool : {
227
+ should : [
228
+ { match : { status : 'ACTIVE' } } ,
229
+ { match : { status : 'NOSATS' } }
230
+ ]
231
+ }
232
+ }
233
+ )
208
234
209
- const isUrlSearch = url && query . length === 0 // exclusively searching for an url
235
+ // filter for time range
236
+ const whenRange = when === 'custom'
237
+ ? {
238
+ gte : whenFrom ,
239
+ lte : new Date ( Math . min ( new Date ( Number ( whenTo ) ) , decodedCursor . time ) )
240
+ }
241
+ : {
242
+ lte : decodedCursor . time ,
243
+ gte : whenToFrom ( when )
244
+ }
245
+ filters . push ( { range : { createdAt : whenRange } } )
246
+
247
+ // filter for non negative wvotes
248
+ filters . push ( { range : { wvotes : { gte : 0 } } } )
249
+
250
+ // decompose the search terms
251
+ const { query : _query , quotes, nym, url, territory } = queryParts ( q )
252
+ const query = _query
210
253
254
+ // if search contains a url term, modify the query text
211
255
if ( url ) {
212
- const isFQDN = url . startsWith ( 'url:www.' )
213
- const domain = isFQDN ? url . slice ( 8 ) : url . slice ( 4 )
214
- const fqdn = `www.${ domain } `
215
- query = ( isUrlSearch ) ? `${ domain } ${ fqdn } ` : `${ query . trim ( ) } ${ domain } `
256
+ const uri = url . slice ( 4 )
257
+ let uriObj
258
+ try {
259
+ uriObj = new URL ( uri )
260
+ } catch {
261
+ try {
262
+ uriObj = new URL ( `https://${ uri } ` )
263
+ } catch { }
264
+ }
265
+
266
+ if ( uriObj ) {
267
+ termQueries . push ( {
268
+ wildcard : { url : `*${ uriObj ?. hostname ?? uri } ${ uriObj ?. pathname ?? '' } *` }
269
+ } )
270
+ termQueries . push ( {
271
+ match : { text : `${ uriObj ?. hostname ?? uri } ${ uriObj ?. pathname ?? '' } ` }
272
+ } )
273
+ }
216
274
}
217
275
276
+ // if nym, items must contain nym
218
277
if ( nym ) {
219
- whatArr . push ( { wildcard : { 'user.name' : `*${ nym . slice ( 1 ) . toLowerCase ( ) } *` } } )
278
+ filters . push ( { wildcard : { 'user.name' : `*${ nym . slice ( 1 ) . toLowerCase ( ) } *` } } )
220
279
}
221
280
281
+ // if territory, item must be from territory
222
282
if ( territory ) {
223
- whatArr . push ( { match : { 'sub.name' : territory . slice ( 1 ) } } )
283
+ filters . push ( { match : { 'sub.name' : territory . slice ( 1 ) } } )
224
284
}
225
285
226
- termQueries . push ( {
227
- // all terms are matched in fields
228
- multi_match : {
229
- query,
230
- type : 'best_fields' ,
231
- fields : [ 'title^100' , 'text' ] ,
232
- minimum_should_match : ( isUrlSearch ) ? 1 : '100%' ,
233
- boost : 1000
234
- }
235
- } )
236
-
286
+ // if quoted phrases, items must contain entire phrase
237
287
for ( const quote of quotes ) {
238
- whatArr . push ( {
288
+ termQueries . push ( {
239
289
multi_match : {
240
290
query : quote ,
241
291
type : 'phrase' ,
242
292
fields : [ 'title' , 'text' ]
243
293
}
244
294
} )
245
- }
246
-
247
- // if we search for an exact string only, everything must match
248
- // so score purely on sort field
249
- let boostMode = query ? 'multiply' : 'replace'
250
- let sortField
251
- let sortMod = 'log1p'
252
- switch ( sort ) {
253
- case 'comments' :
254
- sortField = 'ncomments'
255
- sortMod = 'square'
256
- break
257
- case 'sats' :
258
- sortField = 'sats'
259
- break
260
- case 'recent' :
261
- sortField = 'createdAt'
262
- sortMod = 'square'
263
- boostMode = 'replace'
264
- break
265
- default :
266
- sortField = 'wvotes'
267
- sortMod = 'none'
268
- break
269
- }
270
-
271
- const functions = [
272
- {
273
- field_value_factor : {
274
- field : sortField ,
275
- modifier : sortMod ,
276
- factor : 1.2
277
- }
278
- }
279
- ]
280
295
281
- if ( sort === 'recent' && ! isUrlSearch ) {
282
- // prioritize exact matches
283
- termQueries . push ( {
296
+ // force the search to include the quoted phrase
297
+ filters . push ( {
284
298
multi_match : {
285
- query,
299
+ query : quote ,
286
300
type : 'phrase' ,
287
- fields : [ 'title^100' , 'text' ] ,
288
- boost : 1000
289
- }
290
- } )
291
- } else {
292
- // allow fuzzy matching with partial matches
293
- termQueries . push ( {
294
- multi_match : {
295
- query,
296
- type : 'most_fields' ,
297
- fields : [ 'title^100' , 'text' ] ,
298
- fuzziness : 'AUTO' ,
299
- prefix_length : 3 ,
300
- minimum_should_match : ( isUrlSearch ) ? 1 : '60%'
301
- }
302
- } )
303
- functions . push ( {
304
- // small bias toward posts with comments
305
- field_value_factor : {
306
- field : 'ncomments' ,
307
- modifier : 'ln1p' ,
308
- factor : 1
309
- }
310
- } ,
311
- {
312
- // small bias toward recent posts
313
- field_value_factor : {
314
- field : 'createdAt' ,
315
- modifier : 'log1p' ,
316
- factor : 1
301
+ fields : [ 'title' , 'text' ]
317
302
}
318
303
} )
319
304
}
320
305
306
+ // query for search terms
321
307
if ( query . length ) {
322
- // if we have a model id and we aren't sort by recent, use neural search
323
- if ( process . env . OPENSEARCH_MODEL_ID && sort !== 'recent' ) {
324
- termQueries = {
308
+ // keyword based subquery, to be used on its own or in conjunction with a neural
309
+ // search
310
+ const subquery = [
311
+ {
312
+ multi_match : {
313
+ query,
314
+ type : 'most_fields' ,
315
+ fields : [ 'title^10' , 'text' ] ,
316
+ fuzziness : 'AUTO' ,
317
+ minimum_should_match : 1
318
+ }
319
+ } ,
320
+ // all match matches higher
321
+ {
322
+ multi_match : {
323
+ query,
324
+ type : 'most_fields' ,
325
+ fields : [ 'title^10' , 'text' ] ,
326
+ minimum_should_match : '100%' ,
327
+ boost : 100
328
+ }
329
+ } ,
330
+ // phrase match matches higher
331
+ {
332
+ multi_match : {
333
+ query,
334
+ type : 'phrase' ,
335
+ fields : [ 'title^10' , 'text' ] ,
336
+ boost : 1000
337
+ }
338
+ }
339
+ ]
340
+
341
+ // use hybrid neural search if model id is available, otherwise use only
342
+ // keyword search
343
+ if ( process . env . OPENSEARCH_MODEL_ID ) {
344
+ termQueries . push ( {
325
345
hybrid : {
326
346
queries : [
327
347
{
@@ -350,26 +370,58 @@ export default {
350
370
} ,
351
371
{
352
372
bool : {
353
- should : termQueries
373
+ should : subquery ,
374
+ minimum_should_match : 1
354
375
}
355
376
}
356
377
]
357
378
}
358
- }
379
+ } )
380
+ } else {
381
+ termQueries . push ( ...subquery )
359
382
}
360
- } else {
361
- termQueries = [ ]
362
383
}
363
384
364
- const whenRange = when === 'custom'
365
- ? {
366
- gte : whenFrom ,
367
- lte : new Date ( Math . min ( new Date ( Number ( whenTo ) ) , decodedCursor . time ) )
368
- }
369
- : {
370
- lte : decodedCursor . time ,
371
- gte : whenToFrom ( when )
372
- }
385
+ // functions for boosting search rank by recency or popularity
386
+ switch ( sort ) {
387
+ case 'comments' :
388
+ functions . push ( {
389
+ field_value_factor : {
390
+ field : 'ncomments' ,
391
+ modifier : 'log1p'
392
+ }
393
+ } )
394
+ break
395
+ case 'sats' :
396
+ functions . push ( {
397
+ field_value_factor : {
398
+ field : 'sats' ,
399
+ modifier : 'log1p'
400
+ }
401
+ } )
402
+ break
403
+ case 'recent' :
404
+ functions . push ( {
405
+ gauss : {
406
+ createdAt : {
407
+ origin : 'now' ,
408
+ scale : '7d' ,
409
+ decay : 0.5
410
+ }
411
+ }
412
+ } )
413
+ break
414
+ case 'zaprank' :
415
+ functions . push ( {
416
+ field_value_factor : {
417
+ field : 'wvotes' ,
418
+ modifier : 'log1p'
419
+ }
420
+ } )
421
+ break
422
+ default :
423
+ break
424
+ }
373
425
374
426
try {
375
427
sitems = await search . search ( {
@@ -388,39 +440,14 @@ export default {
388
440
function_score : {
389
441
query : {
390
442
bool : {
391
- must : termQueries ,
392
- filter : [
393
- ...whatArr ,
394
- me
395
- ? {
396
- bool : {
397
- should : [
398
- { match : { status : 'ACTIVE' } } ,
399
- { match : { status : 'NOSATS' } } ,
400
- { match : { userId : me . id } }
401
- ]
402
- }
403
- }
404
- : {
405
- bool : {
406
- should : [
407
- { match : { status : 'ACTIVE' } } ,
408
- { match : { status : 'NOSATS' } }
409
- ]
410
- }
411
- } ,
412
- {
413
- range :
414
- {
415
- createdAt : whenRange
416
- }
417
- } ,
418
- { range : { wvotes : { gte : 0 } } }
419
- ]
443
+ filter : filters ,
444
+ should : termQueries ,
445
+ minimum_should_match : 1
420
446
}
421
447
} ,
422
448
functions,
423
- boost_mode : boostMode
449
+ score_mode : 'multiply' ,
450
+ boost_mode : 'multiply'
424
451
}
425
452
} ,
426
453
highlight : {
@@ -458,7 +485,7 @@ export default {
458
485
${ SELECT } , rank
459
486
FROM "Item"
460
487
JOIN r ON "Item".id = r.id` ,
461
- orderBy : 'ORDER BY rank ASC'
488
+ orderBy : 'ORDER BY rank ASC, msats DESC '
462
489
} ) ) . map ( ( item , i ) => {
463
490
const e = sitems . body . hits . hits [ i ]
464
491
item . searchTitle = ( e . highlight ?. title && e . highlight . title [ 0 ] ) || item . title
0 commit comments