Skip to content

Commit 46d9733

Browse files
committed
fetchHTML API remove content option
1 parent 0669eef commit 46d9733

File tree

5 files changed

+50
-26
lines changed

5 files changed

+50
-26
lines changed

src/api.ts

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ import {
2929
IntervalTime,
3030
StartPollingConfig
3131
} from './types/api'
32-
import { XCrawlBaseConifg } from './types'
32+
import { LoaderXCrawlBaseConifg } from './types'
3333
import { RequestConfig, RequestResItem } from './types/request'
3434

3535
function mergeConfig<T extends FetchBaseConifgV1>(
36-
baseConfig: XCrawlBaseConifg,
36+
baseConfig: LoaderXCrawlBaseConifg,
3737
rawConfig: T
3838
): T {
3939
const newConfig = structuredClone(rawConfig)
@@ -70,7 +70,7 @@ function mergeConfig<T extends FetchBaseConifgV1>(
7070
}
7171

7272
async function useBatchRequestByMode(
73-
mode: 'async' | 'sync' | undefined,
73+
mode: 'async' | 'sync',
7474
requestConifg: RequestConfig | RequestConfig[],
7575
intervalTime: IntervalTime | undefined,
7676
callback: (requestRestem: RequestResItem) => void
@@ -79,14 +79,14 @@ async function useBatchRequestByMode(
7979
? requestConifg
8080
: [requestConifg]
8181

82-
if (mode !== 'sync') {
82+
if (mode === 'async') {
8383
await batchRequest(requestConfigQueue, intervalTime, callback)
8484
} else {
8585
await syncBatchRequest(requestConfigQueue, intervalTime, callback)
8686
}
8787
}
8888

89-
export function createFetchHTML(baseConfig: XCrawlBaseConifg) {
89+
export function createFetchHTML(baseConfig: LoaderXCrawlBaseConifg) {
9090
let browser: Browser | null = null
9191
let createBrowserState: Promise<void> | null = null
9292
let callTotal = 0
@@ -95,7 +95,7 @@ export function createFetchHTML(baseConfig: XCrawlBaseConifg) {
9595
config: FetchHTMLConfig,
9696
callback?: (res: FetchHTML) => void
9797
): Promise<FetchHTML> {
98-
// 记录调用次数, 为关闭浏览器
98+
// 记录调用次数, 目的: 关闭浏览器
9999
callTotal++
100100

101101
// 只创建一次浏览器
@@ -129,22 +129,20 @@ export function createFetchHTML(baseConfig: XCrawlBaseConifg) {
129129
})
130130
}
131131

132-
const httpResponse = await page!.goto(requestConifg.url)
132+
const httpResponse = await page!.goto(requestConifg.url, {
133+
timeout: requestConifg.timeout
134+
})
133135

134136
const content = await page!.content()
135137

136138
// 关闭浏览器
137139
if (--callTotal === 0) {
138-
await browser!.close()
140+
browser!.close()
139141
}
140142

141143
const res: FetchHTML = {
142144
httpResponse,
143-
data: {
144-
page,
145-
content,
146-
jsdom: new JSDOM(content)
147-
}
145+
data: { page, jsdom: new JSDOM(content) }
148146
}
149147

150148
if (callback) {
@@ -157,7 +155,7 @@ export function createFetchHTML(baseConfig: XCrawlBaseConifg) {
157155
return fetchHTML
158156
}
159157

160-
export function createFetchData(baseConfig: XCrawlBaseConifg) {
158+
export function createFetchData(baseConfig: LoaderXCrawlBaseConifg) {
161159
async function fetchData<T = any>(
162160
config: FetchDataConfig,
163161
callback?: (res: FetchResCommonV1<T>) => void
@@ -198,7 +196,7 @@ export function createFetchData(baseConfig: XCrawlBaseConifg) {
198196
return fetchData
199197
}
200198

201-
export function createFetchFile(baseConfig: XCrawlBaseConifg) {
199+
export function createFetchFile(baseConfig: LoaderXCrawlBaseConifg) {
202200
async function fetchFile(
203201
config: FetchFileConfig,
204202
callback?: (res: FetchResCommonV1<FileInfo>) => void

src/index.ts

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,25 @@ import {
55
startPolling
66
} from './api'
77

8-
import { XCrawlBaseConifg, XCrawlInstance } from './types'
8+
import {
9+
LoaderXCrawlBaseConifg,
10+
XCrawlBaseConifg,
11+
XCrawlInstance
12+
} from './types'
13+
14+
function loaderBaseConifg(
15+
baseConfig: XCrawlBaseConifg | undefined
16+
): LoaderXCrawlBaseConifg {
17+
const loaderBaseConfig = baseConfig ? baseConfig : {}
18+
19+
if (!loaderBaseConfig.mode) {
20+
loaderBaseConfig.mode = 'async'
21+
}
922

10-
function createnstance(baseConfig: XCrawlBaseConifg): XCrawlInstance {
23+
return loaderBaseConfig as LoaderXCrawlBaseConifg
24+
}
25+
26+
function createnInstance(baseConfig: LoaderXCrawlBaseConifg): XCrawlInstance {
1127
const instance: XCrawlInstance = {
1228
fetchHTML: createFetchHTML(baseConfig),
1329
fetchData: createFetchData(baseConfig),
@@ -18,10 +34,10 @@ function createnstance(baseConfig: XCrawlBaseConifg): XCrawlInstance {
1834
return instance
1935
}
2036

21-
export default function xCrawl(
22-
baseConfig: XCrawlBaseConifg = {}
23-
): XCrawlInstance {
24-
const instance = createnstance(baseConfig)
37+
export default function xCrawl(baseConfig?: XCrawlBaseConifg): XCrawlInstance {
38+
const newBaseConfig = loaderBaseConifg(baseConfig)
39+
40+
const instance = createnInstance(newBaseConfig)
2541

2642
return instance
2743
}

src/request.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
logError,
1212
logNumber,
1313
logSuccess,
14+
logWarn,
1415
random,
1516
sleep
1617
} from './utils'
@@ -166,7 +167,9 @@ export async function batchRequest(
166167
const isNumberntervalTime = isNumber(intervalTime)
167168

168169
log(
169-
`Begin execution, mode: async, total: ${logNumber(requestConifgs.length)} `
170+
`Begin execution, mode: ${logWarn('async')}, total: ${logNumber(
171+
requestConifgs.length
172+
)} `
170173
)
171174

172175
let index = 0
@@ -227,7 +230,9 @@ export async function syncBatchRequest(
227230
const isNumberntervalTime = isNumber(intervalTime)
228231

229232
log(
230-
`Begin execution, mode: sync, total: ${logNumber(requestConifgs.length)} `
233+
`Begin execution, mode: ${logWarn('sync')}, total: ${logNumber(
234+
requestConifgs.length
235+
)} `
231236
)
232237

233238
let id = 0

src/types/api.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { HTTPResponse, Page } from 'puppeteer'
33
import { JSDOM } from 'jsdom'
44

55
import { RequestConfig } from './request'
6-
import { AnyObject } from './common'
76

87
export type IntervalTime = number | { max: number; min?: number }
98

@@ -14,7 +13,6 @@ export interface FetchBaseConifgV1 {
1413

1514
export interface FetchBaseConifgV2 {
1615
url: string
17-
header?: AnyObject
1816
timeout?: number
1917
proxy?: string
2018
}
@@ -56,7 +54,6 @@ export interface FetchHTML {
5654
httpResponse: HTTPResponse | null
5755
data: {
5856
page: Page
59-
content: string
6057
jsdom: JSDOM
6158
}
6259
}

src/types/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
StartPollingConfig,
1010
IntervalTime
1111
} from './api'
12+
import { MapTypeObject } from './common'
1213

1314
export interface XCrawlBaseConifg {
1415
baseUrl?: string
@@ -18,6 +19,13 @@ export interface XCrawlBaseConifg {
1819
proxy?: string
1920
}
2021

22+
interface LoaderXCrawlBaseConifgValue {
23+
mode: 'async' | 'sync'
24+
}
25+
26+
export type LoaderXCrawlBaseConifg = XCrawlBaseConifg &
27+
MapTypeObject<LoaderXCrawlBaseConifgValue>
28+
2129
export interface XCrawlInstance {
2230
fetchHTML: (
2331
config: FetchHTMLConfig,

0 commit comments

Comments
 (0)