Skip to content

Commit 83ad9d9

Browse files
committed
feat: control terminal information and information upgrades
1 parent dd2114b commit 83ad9d9

File tree

9 files changed

+366
-220
lines changed

9 files changed

+366
-220
lines changed

src/api.ts

Lines changed: 186 additions & 123 deletions
Large diffs are not rendered by default.

src/batchCrawl.ts

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,58 @@
1-
import { isNumber, isUndefined, log, logNumber, random, sleep } from './utils'
1+
import {
2+
isNumber,
3+
isUndefined,
4+
log,
5+
logNumber,
6+
random,
7+
sleep,
8+
whiteBold
9+
} from './utils'
210

3-
import type { ExtraCommonConfig } from './api'
11+
import type { InfoCommonConfig } from './api'
412
import { CrawlDetail, Device } from './controller'
513

6-
async function useSleepByBatch(
14+
async function useSleepByBatch<T extends InfoCommonConfig>(
715
isHaventervalTime: boolean,
816
isNumberIntervalTime: boolean,
917
intervalTime: any,
10-
id: number
18+
id: number,
19+
infoConfig: T
1120
) {
21+
const { serialNumber, logConfig } = infoConfig
22+
1223
if (isHaventervalTime && id > 1) {
1324
const timeout: number = isNumberIntervalTime
1425
? intervalTime
1526
: random(intervalTime.max, intervalTime.min)
1627

17-
log(
18-
`Target id: ${logNumber(id)} - Sleep time: ${logNumber(timeout + 'ms')}`
19-
)
28+
if (logConfig.process) {
29+
log(
30+
`${whiteBold(serialNumber)} | Target id: ${logNumber(
31+
id
32+
)} - Sleep time: ${logNumber(timeout + 'ms')}`
33+
)
34+
}
2035

2136
await sleep(timeout)
22-
} else {
23-
log(`Target id: ${logNumber(id)} - Sleep time: ${logNumber('0ms')}`)
37+
} else if (logConfig.process) {
38+
log(
39+
`${whiteBold(serialNumber)} | Target id: ${logNumber(
40+
id
41+
)} - Sleep time: ${logNumber('0ms')}`
42+
)
2443
}
2544
}
2645

2746
export async function asyncBatchCrawl<
2847
T extends CrawlDetail,
29-
E extends ExtraCommonConfig,
48+
I extends InfoCommonConfig,
3049
R
3150
>(
3251
devices: Device<T, R>[],
33-
extraConfig: E,
34-
singleCrawlHandle: (device: Device<T, R>, extraConfig: E) => Promise<void>
52+
infoConfig: I,
53+
singleCrawlHandle: (device: Device<T, R>, infoConfig: I) => Promise<void>
3554
) {
36-
const { intervalTime } = extraConfig
55+
const { intervalTime } = infoConfig
3756

3857
const isHaventervalTime = !isUndefined(intervalTime)
3958
const isNumberIntervalTime = isNumber(intervalTime)
@@ -46,10 +65,11 @@ export async function asyncBatchCrawl<
4665
isHaventervalTime,
4766
isNumberIntervalTime,
4867
intervalTime,
49-
id
68+
id,
69+
infoConfig
5070
)
5171

52-
crawlPendingQueue.push(singleCrawlHandle(device, extraConfig))
72+
crawlPendingQueue.push(singleCrawlHandle(device, infoConfig))
5373
}
5474

5575
// 等待所有爬取结束
@@ -58,14 +78,14 @@ export async function asyncBatchCrawl<
5878

5979
export async function syncBatchCrawl<
6080
T extends CrawlDetail,
61-
E extends ExtraCommonConfig,
81+
I extends InfoCommonConfig,
6282
R
6383
>(
6484
devices: Device<T, R>[],
65-
extraConfig: E,
66-
singleCrawlHandle: (device: Device<T, R>, extraConfig: E) => Promise<void>
85+
infoConfig: I,
86+
singleCrawlHandle: (device: Device<T, R>, infoConfig: I) => Promise<void>
6787
) {
68-
const { intervalTime } = extraConfig
88+
const { intervalTime } = infoConfig
6989

7090
const isHaventervalTime = !isUndefined(intervalTime)
7191
const isNumberIntervalTime = isNumber(intervalTime)
@@ -77,9 +97,10 @@ export async function syncBatchCrawl<
7797
isHaventervalTime,
7898
isNumberIntervalTime,
7999
intervalTime,
80-
id
100+
id,
101+
infoConfig
81102
)
82103

83-
await singleCrawlHandle(device, extraConfig)
104+
await singleCrawlHandle(device, infoConfig)
84105
}
85106
}

src/controller.ts

Lines changed: 45 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { asyncBatchCrawl, syncBatchCrawl } from './batchCrawl'
22
import { priorityQueueMergeSort } from './sort'
33

44
import {
5-
ExtraCommonConfig,
5+
InfoCommonConfig,
66
LoaderCrawlDataDetail,
77
LoaderCrawlFileDetail,
88
LoaderCrawlHTMLDetail,
@@ -18,7 +18,8 @@ import {
1818
logStart,
1919
logStatistics,
2020
logSuccess,
21-
logWarn
21+
logWarn,
22+
whiteBold
2223
} from './utils'
2324
import { HTTPResponse } from 'puppeteer'
2425
import { Request } from './request'
@@ -81,15 +82,14 @@ export function isCrawlStatusInHttpStatus(device: Device<CrawlDetail, any>) {
8182

8283
export async function controller<
8384
T extends CrawlDetail,
84-
E extends ExtraCommonConfig,
85+
I extends InfoCommonConfig,
8586
R
8687
>(
87-
mode: 'async' | 'sync',
8888
detailTargets: T[],
89-
extraConfig: E,
90-
singleCrawlHandle: (device: Device<T, R>, extraConfig: E) => Promise<void>
89+
infoConfig: I,
90+
singleCrawlHandle: (device: Device<T, R>, infoConfig: I) => Promise<void>
9191
) {
92-
const { type } = extraConfig
92+
const { serialNumber, mode, logConfig } = infoConfig
9393

9494
// 是否使用优先爬取
9595
const isPriorityCrawl = !detailTargets.every(
@@ -136,19 +136,21 @@ export async function controller<
136136
}
137137
)
138138

139-
log(
140-
logStart(
141-
`Start crawling - type: ${type}, mode: ${mode}, total: ${devices.length}`
139+
if (logConfig.start) {
140+
log(
141+
`${whiteBold(serialNumber)} | ${logStart(
142+
`Start crawling - mode: ${mode}, total: ${devices.length}`
143+
)}`
142144
)
143-
)
145+
}
144146

145147
// 选择爬取模式
146148
const batchCrawl = mode === 'async' ? asyncBatchCrawl : syncBatchCrawl
147149

148150
let i = 0
149151
let crawlQueue: Device<T, R>[] = devices
150152
while (crawlQueue.length) {
151-
await batchCrawl(crawlQueue, extraConfig, singleCrawlHandle)
153+
await batchCrawl(crawlQueue, infoConfig, singleCrawlHandle)
152154

153155
crawlQueue = crawlQueue.filter((device) => {
154156
const {
@@ -204,42 +206,42 @@ export async function controller<
204206
return item.id
205207
})
206208

207-
log(
208-
logWarn(
209-
`Start retrying - count: ${++i}, targets id: [ ${retriedIds.join(
210-
', '
211-
)} ]`
209+
if (logConfig.process) {
210+
log(
211+
`${whiteBold(serialNumber)} | ${logWarn(
212+
`Start retrying - count: ${++i}, targets id: [ ${retriedIds.join(
213+
', '
214+
)} ]`
215+
)}`
212216
)
213-
)
217+
}
214218
}
215219
}
216220

217221
// 统计结果
218-
const succssIds: number[] = []
219-
const errorIds: number[] = []
220-
devices.forEach((device) => {
221-
if (device.isSuccess) {
222-
succssIds.push(device.id)
223-
} else {
224-
errorIds.push(device.id)
225-
}
226-
})
227-
228-
log(logStatistics(`Crawl ${type}s finish:`))
229-
log(
230-
logSuccess(
231-
` Success - total: ${succssIds.length}, targets id: [ ${succssIds.join(
232-
', '
233-
)} ]`
234-
)
235-
)
236-
log(
237-
logError(
238-
` Error - total: ${errorIds.length}, targets id: [ ${errorIds.join(
239-
', '
240-
)} ]`
241-
)
242-
)
222+
if (logConfig.result) {
223+
const succssIds: number[] = []
224+
const errorIds: number[] = []
225+
devices.forEach((device) => {
226+
if (device.isSuccess) {
227+
succssIds.push(device.id)
228+
} else {
229+
errorIds.push(device.id)
230+
}
231+
})
232+
233+
log(`${whiteBold(serialNumber)} | ${logStatistics(`Crawl finish:`)}
234+
${logSuccess(
235+
`Success - total: ${
236+
succssIds.length
237+
}, targets id: [ ${succssIds.join(', ')} ]`
238+
)}
239+
${logError(
240+
`Error - total: ${
241+
errorIds.length
242+
}, targets id: [ ${errorIds.join(', ')} ]`
243+
)}`)
244+
}
243245

244246
return devices.map((device) => device.result)
245247
}

src/index.ts

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,50 +3,81 @@ import {
33
createCrawlFile,
44
createCrawlHTML,
55
createCrawlPage,
6-
startPolling
6+
createStartPolling
77
} from './api'
88

9-
import { LoaderXCrawlConfig, XCrawlConfig, XCrawlInstance } from './types'
10-
import { isUndefined } from './utils'
9+
import { XCrawlConfig, XCrawlInstance, XCrawlInstanceConfig } from './types'
10+
import { isBoolean, isObject } from './utils'
1111

12-
const loaderBaseConfigDefault: LoaderXCrawlConfig = {
13-
mode: 'async',
14-
enableRandomFingerprint: true,
15-
timeout: 10000,
16-
maxRetry: 0
17-
}
12+
let id = 0
13+
14+
function createInstanceConfig(
15+
config: XCrawlConfig | undefined
16+
): XCrawlInstanceConfig {
17+
const {
18+
mode,
19+
enableRandomFingerprint,
20+
baseUrl,
21+
intervalTime,
22+
log,
23+
crawlPage,
24+
timeout,
25+
proxy,
26+
maxRetry
27+
} = config ?? {}
1828

19-
function loaderBaseConfig(
20-
baseConfig: XCrawlConfig | undefined
21-
): LoaderXCrawlConfig {
22-
const loaderBaseConfig: any = baseConfig ? baseConfig : {}
29+
const xCrawlInstanceConfig: XCrawlInstanceConfig = {
30+
id: ++id,
2331

24-
Object.keys(loaderBaseConfigDefault).forEach((key) => {
25-
if (isUndefined(loaderBaseConfig[key])) {
26-
loaderBaseConfig[key] =
27-
loaderBaseConfigDefault[key as keyof LoaderXCrawlConfig]
32+
mode: mode ?? 'async',
33+
enableRandomFingerprint: enableRandomFingerprint ?? true,
34+
timeout: timeout ?? 10000,
35+
maxRetry: maxRetry ?? 0,
36+
logConfig: { start: true, process: true, result: true },
37+
38+
baseUrl,
39+
intervalTime,
40+
proxy,
41+
crawlPage
42+
}
43+
44+
// logConfig
45+
if (isObject(log)) {
46+
xCrawlInstanceConfig.logConfig = {
47+
...xCrawlInstanceConfig.logConfig,
48+
...log
2849
}
29-
})
50+
} else if (isBoolean(log) && !log) {
51+
const keys = Object.keys(xCrawlInstanceConfig.logConfig) as [
52+
'start',
53+
'process',
54+
'result'
55+
]
56+
57+
keys.forEach((key) => (xCrawlInstanceConfig.logConfig[key] = false))
58+
}
3059

31-
return loaderBaseConfig as LoaderXCrawlConfig
60+
return xCrawlInstanceConfig
3261
}
3362

34-
function createnInstance(baseConfig: LoaderXCrawlConfig): XCrawlInstance {
63+
function createnInstance(
64+
xCrawlInstanceConfig: XCrawlInstanceConfig
65+
): XCrawlInstance {
3566
const instance: XCrawlInstance = {
36-
crawlPage: createCrawlPage(baseConfig),
37-
crawlHTML: createCrawlHTML(baseConfig),
38-
crawlData: createCrawlData(baseConfig),
39-
crawlFile: createCrawlFile(baseConfig),
40-
startPolling
67+
crawlPage: createCrawlPage(xCrawlInstanceConfig),
68+
crawlHTML: createCrawlHTML(xCrawlInstanceConfig),
69+
crawlData: createCrawlData(xCrawlInstanceConfig),
70+
crawlFile: createCrawlFile(xCrawlInstanceConfig),
71+
startPolling: createStartPolling(xCrawlInstanceConfig)
4172
}
4273

4374
return instance
4475
}
4576

46-
export default function xCrawl(baseConfig?: XCrawlConfig): XCrawlInstance {
47-
const newBaseConfig = loaderBaseConfig(baseConfig)
77+
export default function xCrawl(config?: XCrawlConfig): XCrawlInstance {
78+
const xCrawlInstanceConfig = createInstanceConfig(config)
4879

49-
const instance = createnInstance(newBaseConfig)
80+
const instance = createnInstance(xCrawlInstanceConfig)
5081

5182
return instance
5283
}

src/types/common.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export interface AnyObject extends Object {
1+
export interface AnyObject {
22
[key: string | number | symbol]: any
33
}
44

0 commit comments

Comments
 (0)