@@ -31,13 +31,11 @@ import {
31
31
CrawlFileAdvancedConfig ,
32
32
CrawlDataAdvancedConfig ,
33
33
IntervalTime ,
34
- DetailTargetFingerprintCommon ,
35
- Platform ,
36
- Mobile
34
+ DetailTargetFingerprintCommon
37
35
} from './types/api'
38
36
import { LoaderXCrawlConfig } from './types'
39
37
import { AnyObject } from './types/common'
40
- import { randomFingerprint } from './default'
38
+ import { fingerprints } from './default'
41
39
42
40
/* Types */
43
41
@@ -104,6 +102,9 @@ interface PageSingleCrawlResult {
104
102
interface CrawlPageConfigOriginal {
105
103
detailTargets : CrawlPageDetailTargetConfig [ ]
106
104
intervalTime : IntervalTime | undefined
105
+
106
+ selectFingerprintIndexs : number [ ]
107
+
107
108
onCrawlItemComplete :
108
109
| ( ( crawlPageSingleRes : CrawlPageSingleRes ) => void )
109
110
| undefined
@@ -112,6 +113,9 @@ interface CrawlPageConfigOriginal {
112
113
interface CrawlDataConfigOriginal {
113
114
detailTargets : CrawlDataDetailTargetConfig [ ]
114
115
intervalTime : IntervalTime | undefined
116
+
117
+ selectFingerprintIndexs : number [ ]
118
+
115
119
onCrawlItemComplete :
116
120
| ( ( crawlDataSingleRes : CrawlDataSingleRes < any > ) => void )
117
121
| undefined
@@ -120,6 +124,9 @@ interface CrawlDataConfigOriginal {
120
124
interface CrawlFileConfigOriginal {
121
125
detailTargets : CrawlFileDetailTargetConfig [ ]
122
126
intervalTime : IntervalTime | undefined
127
+
128
+ selectFingerprintIndexs : number [ ]
129
+
123
130
onBeforeSaveItemFile :
124
131
| ( ( info : {
125
132
id : number
@@ -225,7 +232,7 @@ function loaderCommonFingerprintToDetailTarget(
225
232
| CrawlFileDetailTargetConfig ,
226
233
fingerprint : DetailTargetFingerprintCommon
227
234
) {
228
- const { userAgent , ua, platform, platformVersion, mobile, acceptLanguage } =
235
+ const { ua, platform, platformVersion, mobile, acceptLanguage, userAgent } =
229
236
fingerprint
230
237
231
238
let headers = detail . headers
@@ -234,16 +241,17 @@ function loaderCommonFingerprintToDetailTarget(
234
241
detail . headers = headers = { }
235
242
}
236
243
237
- // 1.user-agent
238
- if ( userAgent ) {
239
- headers [ 'user-agent' ] = userAgent
240
- }
241
-
242
- // 2.sec-ch-ua
244
+ // 1.sec-ch-ua
243
245
if ( ua ) {
244
246
headers [ 'sec-ch-ua' ] = ua
245
247
}
246
248
249
+ // 2.sec-ch-ua-mobile
250
+ if ( mobile ) {
251
+ headers [ 'sec-ch-ua-mobile' ] =
252
+ mobile === 'random' ? ( random ( 2 ) ? '?1' : '?0' ) : mobile
253
+ }
254
+
247
255
// 3.sec-ch-platform
248
256
if ( platform ) {
249
257
headers [ 'sec-ch-platform' ] = platform
@@ -254,38 +262,85 @@ function loaderCommonFingerprintToDetailTarget(
254
262
headers [ 'sec-ch-ua-platform-version' ] = platformVersion
255
263
}
256
264
257
- // 5.sec-ch-mobile
258
- if ( mobile ) {
259
- headers [ 'sec-ch-mobile' ] = mobile
260
- }
261
-
262
- // 6.accept-language
265
+ // 5.accept-language
263
266
if ( acceptLanguage ) {
264
267
headers [ 'accept-language' ] = acceptLanguage
265
268
}
269
+
270
+ // 6.user-agent
271
+ if ( userAgent ) {
272
+ let value = userAgent . value
273
+
274
+ userAgent . versions ?. forEach ( ( version ) => {
275
+ const {
276
+ name,
277
+ maxMajorVersion,
278
+ minMajorVersion,
279
+ maxMinorVersion,
280
+ minMinorVersion,
281
+ maxPatchVersion,
282
+ minPatchVersion
283
+ } = version
284
+
285
+ const nameSplit = value . split ( `${ name } /` )
286
+ const versionSplit : any [ ] = nameSplit [ 1 ] . split ( ' ' ) [ 0 ] . split ( '.' )
287
+ const originalVersion = versionSplit . join ( '.' )
288
+
289
+ if ( ! isUndefined ( maxMajorVersion ) ) {
290
+ versionSplit [ 0 ] =
291
+ maxMajorVersion === minMajorVersion
292
+ ? maxMajorVersion
293
+ : random ( maxMajorVersion , minMajorVersion )
294
+ }
295
+
296
+ if ( ! isUndefined ( maxMinorVersion ) ) {
297
+ versionSplit [ 1 ] =
298
+ maxMinorVersion === minMinorVersion
299
+ ? maxMinorVersion
300
+ : random ( maxMinorVersion , minMinorVersion )
301
+ }
302
+
303
+ if ( ! isUndefined ( maxPatchVersion ) ) {
304
+ versionSplit [ 2 ] =
305
+ maxPatchVersion === minPatchVersion
306
+ ? maxPatchVersion
307
+ : random ( maxPatchVersion , minPatchVersion )
308
+ }
309
+
310
+ const searchValue = `${ name } /${ originalVersion } `
311
+ const replaceValue = `${ name } /${ versionSplit . join ( '.' ) } `
312
+ value = value . replace ( searchValue , replaceValue )
313
+ } )
314
+
315
+ headers [ 'user-agent' ] = value
316
+ }
266
317
}
267
318
268
319
function loaderPageFingerprintToDetailTarget (
269
320
detail : CrawlPageDetailTargetConfig ,
270
321
fingerprint : {
271
- maxWidth : number
322
+ maxWidth ? : number
272
323
minWidth ?: number
273
- maxHeight : number
324
+ maxHeight ? : number
274
325
minHidth ?: number
275
326
}
276
327
) {
277
328
const { maxWidth, minWidth, maxHeight, minHidth } = fingerprint
278
329
330
+ const viewport : any = detail . viewport ?? { }
279
331
// 1.width / height
280
- const width = maxWidth === minWidth ? maxWidth : random ( maxWidth , minWidth )
281
- const height =
282
- maxHeight === minHidth ? maxHeight : random ( maxHeight , minHidth )
283
- const viewport = detail . viewport
284
- if ( ! viewport ) {
285
- detail . viewport = { width, height }
286
- } else {
287
- viewport . width = width
288
- viewport . height = height
332
+ if ( maxWidth ) {
333
+ viewport . width =
334
+ maxWidth === minWidth ? maxWidth : random ( maxWidth , minWidth )
335
+ }
336
+
337
+ if ( maxHeight ) {
338
+ viewport . height =
339
+ maxHeight === minHidth ? maxHeight : random ( maxHeight , minHidth )
340
+ }
341
+
342
+ if ( Object . hasOwn ( viewport , 'width' ) && Object . hasOwn ( viewport , 'height' ) ) {
343
+ detail . viewport = viewport
289
344
}
290
345
}
291
346
@@ -353,73 +408,30 @@ function loaderCommonConfigToCrawlConfig(
353
408
// detaileTarget
354
409
355
410
loaderCommonFingerprintToDetailTarget ( detail , fingerprint )
356
- } else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
411
+ } else if (
412
+ isUndefined ( fingerprint ) &&
413
+ isArray ( advancedConfig . fingerprints ) &&
414
+ advancedConfig . fingerprints . length
415
+ ) {
357
416
// advancedConfig
358
417
359
- const {
360
- userAgents,
361
- uas,
362
- platforms,
363
- platformVersions,
364
- mobiles,
365
- acceptLanguages
366
- } = advancedConfig . fingerprint
367
-
368
- // 1.user-agent
369
- const userAgent = userAgents
370
- ? userAgents [ random ( userAgents . length ) ]
371
- : undefined
372
-
373
- // 2.sec-ch-ua
374
- const ua = uas ? uas [ random ( uas . length ) ] : undefined
375
-
376
- // 3.sec-ch-platform
377
- const platform = platforms
378
- ? platforms [ random ( platforms . length ) ]
379
- : undefined
380
-
381
- // 4.sec-ch-platform-version
382
- const platformVersion = platformVersions
383
- ? platformVersions [ random ( platformVersions . length ) ]
384
- : undefined
385
-
386
- // 5.sec-ch-mobile
387
- const mobile = mobiles ? mobiles [ random ( mobiles . length ) ] : undefined
388
-
389
- // 6.accept-language
390
- const acceptLanguage = acceptLanguages
391
- ? acceptLanguages [ random ( acceptLanguages . length ) ]
392
- : undefined
393
-
394
- loaderCommonFingerprintToDetailTarget ( detail , {
395
- userAgent,
396
- ua,
397
- platform,
398
- platformVersion,
399
- mobile,
400
- acceptLanguage
401
- } )
402
- } else if ( xCrawlConfig . enableRandomFingerprint ) {
403
- // xCrawlConfig
418
+ const fingerprints = advancedConfig . fingerprints
419
+ const selectFingerprintIndex = random ( fingerprints . length )
420
+ const fingerprint = fingerprints [ selectFingerprintIndex ]
404
421
405
- const { platforms, mobiles } = randomFingerprint
422
+ // 记录每个目标选中的指纹索引
423
+ crawlConfig . selectFingerprintIndexs . push ( selectFingerprintIndex )
406
424
407
- // 1.user-agent
408
- const userAgent = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.${ random (
409
- 10
410
- ) } .${ random ( 10000 ) } .${ random ( 1000 ) } Safari/537.36`
411
-
412
- // 2.sec-ch-platform
413
- const platform = platforms [ random ( platforms . length ) ] as Platform
414
-
415
- // 3.sec-ch-mobile
416
- const mobile = mobiles [ random ( mobiles . length ) ] as Mobile
425
+ loaderCommonFingerprintToDetailTarget ( detail , fingerprint )
426
+ } else if (
427
+ isUndefined ( fingerprint ) &&
428
+ ! isArray ( advancedConfig . fingerprints ) &&
429
+ xCrawlConfig . enableRandomFingerprint
430
+ ) {
431
+ // xCrawlConfig
432
+ const fingerprint = fingerprints [ random ( fingerprints . length ) ]
417
433
418
- loaderCommonFingerprintToDetailTarget ( detail , {
419
- userAgent,
420
- platform,
421
- mobile
422
- } )
434
+ loaderCommonFingerprintToDetailTarget ( detail , fingerprint )
423
435
}
424
436
} )
425
437
@@ -453,6 +465,9 @@ function createCrawlPageConfig(
453
465
const crawlPageConfig : CrawlPageConfigOriginal = {
454
466
detailTargets : [ ] ,
455
467
intervalTime : undefined ,
468
+
469
+ selectFingerprintIndexs : [ ] ,
470
+
456
471
onCrawlItemComplete : undefined
457
472
}
458
473
@@ -482,7 +497,7 @@ function createCrawlPageConfig(
482
497
loaderCommonConfigToCrawlConfig ( xCrawlConfig , advancedConfig , crawlPageConfig )
483
498
484
499
// 装载单独配置
485
- crawlPageConfig . detailTargets . forEach ( ( detail ) => {
500
+ crawlPageConfig . detailTargets . forEach ( ( detail , index ) => {
486
501
// detail > advanced > xCrawl
487
502
const { cookies, viewport, fingerprint } = detail
488
503
@@ -499,8 +514,16 @@ function createCrawlPageConfig(
499
514
// 3.fingerprint
500
515
if ( fingerprint ) {
501
516
loaderPageFingerprintToDetailTarget ( detail , fingerprint )
502
- } else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
503
- loaderPageFingerprintToDetailTarget ( detail , advancedConfig . fingerprint )
517
+ } else if (
518
+ isUndefined ( fingerprint ) &&
519
+ advancedConfig . fingerprints ?. length
520
+ ) {
521
+ // 从对应的选中记录中取出指纹索引
522
+ const selectFingerprintIndex =
523
+ crawlPageConfig . selectFingerprintIndexs [ index ]
524
+ const fingerprint = advancedConfig . fingerprints [ selectFingerprintIndex ]
525
+
526
+ loaderPageFingerprintToDetailTarget ( detail , fingerprint )
504
527
}
505
528
} )
506
529
@@ -514,6 +537,9 @@ function createCrawlDataConfig<T>(
514
537
const crawlDataConfig : CrawlDataConfigOriginal = {
515
538
detailTargets : [ ] ,
516
539
intervalTime : undefined ,
540
+
541
+ selectFingerprintIndexs : [ ] ,
542
+
517
543
onCrawlItemComplete : undefined
518
544
}
519
545
@@ -551,6 +577,9 @@ function createCrawlFileConfig(
551
577
const crawlFileConfig : CrawlFileConfigOriginal = {
552
578
detailTargets : [ ] ,
553
579
intervalTime : undefined ,
580
+
581
+ selectFingerprintIndexs : [ ] ,
582
+
554
583
onBeforeSaveItemFile : undefined ,
555
584
onCrawlItemComplete : undefined
556
585
}
0 commit comments