Skip to content

Commit 913b47e

Browse files
committed
Update: Print capture log
1 parent c687a2c commit 913b47e

File tree

5 files changed

+98
-16
lines changed

5 files changed

+98
-16
lines changed

src/api.ts

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@ import puppeteer, { Browser, HTTPResponse, Page, Protocol } from 'puppeteer'
55

66
import { ControllerConfig, controller } from './controller'
77
import { request } from './request'
8-
import { isArray, isObject, isUndefined, logSuccess, logWarn } from './utils'
8+
import {
9+
isArray,
10+
isObject,
11+
isUndefined,
12+
log,
13+
logError,
14+
logSuccess,
15+
logWarn
16+
} from './utils'
917

1018
import {
1119
DataRequestConfig,
@@ -30,6 +38,7 @@ import {
3038
CrawlDataConfigObject
3139
} from './types/api'
3240
import { LoaderXCrawlBaseConfig } from './types'
41+
import { quickSort } from './sort'
3342

3443
async function crawlRequestSingle(
3544
controllerConfig: ControllerConfig<DataRequestConfig & FileRequestConfig, any>
@@ -539,6 +548,35 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
539548
// 等待保存文件完成
540549
await Promise.all(saveFileQueue)
541550

551+
// 打印保存错误
552+
quickSort(saveFileErrorArr).forEach((item) => log(logError(item.message)))
553+
554+
// 统计保存
555+
const succssIds: number[] = []
556+
const errorIds: number[] = []
557+
crawlResArr.forEach((item) => {
558+
if (item.data?.data.isSuccess) {
559+
succssIds.push(item.id)
560+
} else {
561+
errorIds.push(item.id)
562+
}
563+
})
564+
log('Save file final result:')
565+
log(
566+
logSuccess(
567+
` Success - total: ${succssIds.length}, ids: [ ${succssIds.join(
568+
' - '
569+
)} ]`
570+
)
571+
)
572+
log(
573+
logError(
574+
` Error - total: ${errorIds.length}, ids: [ ${errorIds.join(
575+
' - '
576+
)} ]`
577+
)
578+
)
579+
542580
const crawlRes = isArray(config.requestConfig)
543581
? crawlResArr
544582
: crawlResArr[0]

src/batchCrawlHandle.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ async function useSleepByBatch(
2222

2323
await sleep(timeout)
2424
} else {
25-
log(`Crawl ${logNumber(id)} does not need to sleep, send immediately`)
25+
log(`Id: ${logNumber(id)} - Crawl does not need to sleep, send immediately`)
2626
}
2727
}
2828

src/controller.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { asyncBatchCrawl, syncBatchCrawl } from './batchCrawlHandle'
22
import { IntervalTime } from './types/api'
3+
import { log, logError, logNumber, logSuccess, logWarn } from './utils'
34

45
export interface ControllerConfig<T, V> {
56
id: number
@@ -12,6 +13,7 @@ export interface ControllerConfig<T, V> {
1213
}
1314

1415
export async function controller<T extends { maxRetry?: number }, V, C>(
16+
name: 'page' | 'data' | 'file',
1517
mode: 'async' | 'sync',
1618
requestConfigs: T[],
1719
intervalTime: IntervalTime | undefined,
@@ -34,6 +36,12 @@ export async function controller<T extends { maxRetry?: number }, V, C>(
3436
})
3537
)
3638

39+
log(
40+
`${logSuccess(`Start crawling`)} - name: ${logWarn(name)}, mode: ${logWarn(
41+
mode
42+
)}, total: ${logNumber(controllerConfigs.length)} `
43+
)
44+
3745
// 选择爬取模式
3846
const batchCrawl = mode === 'async' ? asyncBatchCrawl : syncBatchCrawl
3947

@@ -52,7 +60,37 @@ export async function controller<T extends { maxRetry?: number }, V, C>(
5260
!config.isSuccess &&
5361
config.retryCount < config.maxRetry
5462
)
63+
64+
if (crawlQueue.length) {
65+
const retriedIds = crawlQueue.map((item) => item.id)
66+
log(logWarn(`Ids to retry: [ ${retriedIds.join(' - ')} ]`))
67+
}
5568
}
5669

70+
// 统计结果
71+
const succssIds: number[] = []
72+
const errorIds: number[] = []
73+
controllerConfigs.forEach((item) => {
74+
if (item.isSuccess) {
75+
succssIds.push(item.id)
76+
} else {
77+
errorIds.push(item.id)
78+
}
79+
})
80+
81+
log('Crawl the final result:')
82+
log(
83+
logSuccess(
84+
` Success - total: ${succssIds.length}, ids: [ ${succssIds.join(
85+
' - '
86+
)} ]`
87+
)
88+
)
89+
log(
90+
logError(
91+
` Error - total: ${errorIds.length}, ids: [ ${errorIds.join(' - ')} ]`
92+
)
93+
)
94+
5795
return controllerConfigs
5896
}

test/start/index.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/start/index.ts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import path from 'node:path'
2+
import { Browser } from 'puppeteer'
23
import xCrawl from 'x-crawl'
34

45
const testXCrawl = xCrawl({
@@ -7,19 +8,24 @@ const testXCrawl = xCrawl({
78
})
89

910
testXCrawl
10-
.crawlData({
11-
url: 'http://localhost:9001/api/area/阳江市',
12-
method: 'POST',
13-
data: {
14-
type: 'goodPrice',
15-
offset: 0,
16-
size: 20
17-
}
11+
.crawlPage({
12+
requestConfigs: [
13+
'https://www.google.com/search?q=1',
14+
'https://www.google.com/search?q=2',
15+
'https://www.google.com/search?q=2'
16+
],
17+
maxRetry: 2
1818
})
1919
.then((res) => {
20-
if (res.data?.statusCode === 200) {
21-
return true
22-
} else {
23-
return false
24-
}
20+
let browser: Browser | null = null
21+
22+
res.forEach((item) => {
23+
if (!browser) browser = item.data.browser
24+
25+
console.log(item.isSuccess, item.retryCount)
26+
27+
console.log(item.errorQueue.map((item) => item.message))
28+
})
29+
30+
browser!.close()
2531
})

0 commit comments

Comments
 (0)