Skip to content

Commit 507ebad

Browse files
committed
Feat: Perfect the rotation agent and refactor the bulk crawl function
1 parent 667d758 commit 507ebad

File tree

12 files changed

+441
-1494
lines changed

12 files changed

+441
-1494
lines changed

src/api.ts

Lines changed: 154 additions & 106 deletions
Large diffs are not rendered by default.

src/batchCrawl.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { isNumber, isUndefined, log, logNumber, random, sleep } from './utils'
2+
3+
import type { ExtraCommonConfig } from './api'
4+
import { CrawlDetail, Device } from './controller'
5+
6+
async function useSleepByBatch(
7+
isHaventervalTime: boolean,
8+
isNumberIntervalTime: boolean,
9+
intervalTime: any,
10+
id: number
11+
) {
12+
if (isHaventervalTime && id > 1) {
13+
const timeout: number = isNumberIntervalTime
14+
? intervalTime
15+
: random(intervalTime.max, intervalTime.min)
16+
17+
log(
18+
`Id: ${logNumber(id)} - Crawl needs to sleep for ${logNumber(
19+
timeout + 'ms'
20+
)} milliseconds before sending`
21+
)
22+
23+
await sleep(timeout)
24+
} else {
25+
log(`Id: ${logNumber(id)} - Crawl does not need to sleep, send immediately`)
26+
}
27+
}
28+
29+
export async function asyncBatchCrawl<
30+
T extends CrawlDetail,
31+
E extends ExtraCommonConfig,
32+
R
33+
>(
34+
devices: Device<T, R>[],
35+
extraConfig: E,
36+
singleCrawlHandle: (device: Device<T, R>, extraConfig: E) => Promise<R>
37+
) {
38+
const { intervalTime } = extraConfig
39+
40+
const isHaventervalTime = !isUndefined(intervalTime)
41+
const isNumberIntervalTime = isNumber(intervalTime)
42+
43+
const crawlPendingQueue: Promise<any>[] = []
44+
for (const device of devices) {
45+
const { id } = device
46+
47+
await useSleepByBatch(
48+
isHaventervalTime,
49+
isNumberIntervalTime,
50+
intervalTime,
51+
id
52+
)
53+
54+
crawlPendingQueue.push(singleCrawlHandle(device, extraConfig))
55+
}
56+
57+
// 等待所有爬取结束
58+
await Promise.all(crawlPendingQueue)
59+
}
60+
61+
export async function syncBatchCrawl<
62+
T extends CrawlDetail,
63+
E extends ExtraCommonConfig,
64+
R
65+
>(
66+
devices: Device<T, R>[],
67+
extraConfig: E,
68+
singleCrawlHandle: (device: Device<T, R>, extraConfig: E) => Promise<R>
69+
) {
70+
const { intervalTime } = extraConfig
71+
72+
const isHaventervalTime = !isUndefined(intervalTime)
73+
const isNumberIntervalTime = isNumber(intervalTime)
74+
75+
for (const device of devices) {
76+
const { id } = device
77+
78+
await useSleepByBatch(
79+
isHaventervalTime,
80+
isNumberIntervalTime,
81+
intervalTime,
82+
id
83+
)
84+
85+
await singleCrawlHandle(device, extraConfig)
86+
}
87+
}

src/batchCrawlHandle.ts

Lines changed: 0 additions & 149 deletions
This file was deleted.

0 commit comments

Comments
 (0)