Skip to content

Commit 2be3cda

Browse files
committed
Chores: defaults and type tweaks
1 parent 41f6aa5 commit 2be3cda

File tree

7 files changed

+99
-53
lines changed

7 files changed

+99
-53
lines changed

src/api.ts

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,19 @@ import {
3535
CrawlFileRequestConfig,
3636
CrawlFileRes,
3737
CrawlFileSingleRes,
38-
CrawlDataConfigObject
38+
CrawlDataConfigObject,
39+
LoaderPageRequestConfig,
40+
LoaderDataRequestConfig,
41+
LoaderFileRequestConfig
3942
} from './types/api'
4043
import { LoaderXCrawlBaseConfig } from './types'
4144
import { quickSort } from './sort'
4245

4346
async function crawlRequestSingle(
44-
controllerConfig: ControllerConfig<DataRequestConfig & FileRequestConfig, any>
47+
controllerConfig: ControllerConfig<
48+
LoaderDataRequestConfig | LoaderFileRequestConfig,
49+
any
50+
>
4551
) {
4652
const { requestConfig } = controllerConfig
4753

@@ -95,41 +101,47 @@ function transformRequestConfig(config: any) {
95101

96102
function loaderCommonConfig(
97103
baseConfig: LoaderXCrawlBaseConfig,
104+
requestObjecs: (PageRequestConfig | DataRequestConfig | FileRequestConfig)[],
98105
loaderConfig:
99106
| LoaderCrawlPageConfig
100107
| LoaderCrawlDataConfig
101108
| LoaderCrawlFileConfig
102109
) {
103110
// 1.requestConfigs
104-
loaderConfig.requestConfigs.forEach((requestConfig) => {
105-
const { url, timeout, proxy, maxRetry } = requestConfig
111+
loaderConfig.requestConfigs = requestObjecs.map((requestConfig) => {
112+
let { url, timeout, proxy, maxRetry } = requestConfig
106113

107114
// 1.1.baseUrl
108115
if (!isUndefined(baseConfig.baseUrl)) {
109-
requestConfig.url = baseConfig.baseUrl + url
116+
url = baseConfig.baseUrl + url
110117
}
111118

112119
// 1.2.timeout
113-
if (isUndefined(timeout) && !isUndefined(baseConfig.timeout)) {
114-
requestConfig.timeout = baseConfig.timeout
120+
// requestConfig > loaderConfig > baseConfig
121+
if (isUndefined(timeout)) {
122+
if (!isUndefined(loaderConfig.timeout)) {
123+
timeout = loaderConfig.timeout
124+
} else {
125+
timeout = baseConfig.timeout
126+
}
115127
}
116128

117129
// 1.3.porxy
118130
if (isUndefined(proxy) && !isUndefined(baseConfig.proxy)) {
119-
requestConfig.proxy = baseConfig.proxy
131+
proxy = baseConfig.proxy
120132
}
121133

122134
// 1.4.maxRetry
123135
// requestConfig > loaderConfig > baseConfig
124136
if (isUndefined(maxRetry)) {
125137
if (!isUndefined(loaderConfig.maxRetry)) {
126-
// 装载 API Config
127-
requestConfig.maxRetry = loaderConfig.maxRetry
128-
} else if (!isUndefined(baseConfig.maxRetry)) {
129-
// 装载 baseConfig
130-
requestConfig.maxRetry = baseConfig.maxRetry
138+
maxRetry = loaderConfig.maxRetry
139+
} else {
140+
maxRetry = baseConfig.maxRetry
131141
}
132142
}
143+
144+
return { url, timeout, proxy, maxRetry }
133145
})
134146

135147
// 2.intervalTime
@@ -147,31 +159,31 @@ function loaderPageConfig(
147159
): LoaderCrawlPageConfig {
148160
const loaderConfig: LoaderCrawlPageConfig = { requestConfigs: [] }
149161

162+
const requestObjecs: PageRequestConfig[] = []
150163
// requestConfig 统一转成 PageRequestConfig 类型
151164
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
152165
// CrawlPageConfigObject 处理
153-
const { requestConfigs, cookies, intervalTime, maxRetry } =
166+
const { requestConfigs, timeout, cookies, intervalTime, maxRetry } =
154167
rawConfig as CrawlPageConfigObject
155168

156169
// 给 loaderConfig 装载 API Config
157170
loaderConfig.cookies = cookies
158171
loaderConfig.intervalTime = intervalTime
159172
loaderConfig.maxRetry = maxRetry
173+
loaderConfig.timeout = timeout
160174

161-
const transformRes = transformRequestConfig(requestConfigs)
162-
163-
loaderConfig.requestConfigs.push(...transformRes)
175+
requestObjecs.push(...transformRequestConfig(requestConfigs))
164176
} else {
165177
// string | PageRequestConfig | (string | PageRequestConfig)[] 处理
166178
const transformRes = transformRequestConfig(
167179
rawConfig as string | PageRequestConfig | (string | PageRequestConfig)[]
168180
)
169181

170-
loaderConfig.requestConfigs.push(...transformRes)
182+
requestObjecs.push(...transformRes)
171183
}
172184

173-
// 装载公共配置
174-
loaderCommonConfig(baseConfig, loaderConfig)
185+
// 装载公共配置到 loaderConfig
186+
loaderCommonConfig(baseConfig, requestObjecs, loaderConfig)
175187

176188
// 装载单独的配置
177189
loaderConfig.requestConfigs.forEach((requestConfig) => {
@@ -193,30 +205,30 @@ function loaderDataConfig(
193205
): LoaderCrawlDataConfig {
194206
const loaderConfig: LoaderCrawlDataConfig = { requestConfigs: [] }
195207

196-
// requestConfig 统一转成 PageRequestConfig 类型
208+
// requestConfig 统一转成 DataRequestConfig 类型
209+
const requestObjecs: DataRequestConfig[] = []
197210
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
198211
// CrawlDataConfigObject 处理
199-
const { requestConfigs, intervalTime, maxRetry } =
212+
const { requestConfigs, timeout, intervalTime, maxRetry } =
200213
rawConfig as CrawlDataConfigObject
201214

202215
// 给 loaderConfig 装载 API Config
203216
loaderConfig.intervalTime = intervalTime
204217
loaderConfig.maxRetry = maxRetry
218+
loaderConfig.timeout = timeout
205219

206-
const transformRes = transformRequestConfig(requestConfigs)
207-
208-
loaderConfig.requestConfigs.push(...transformRes)
220+
requestObjecs.push(...transformRequestConfig(requestConfigs))
209221
} else {
210222
// string | DataRequestConfig | (string | DataRequestConfig)[] 处理
211223
const transformRes = transformRequestConfig(
212224
rawConfig as string | DataRequestConfig | (string | DataRequestConfig)[]
213225
)
214226

215-
loaderConfig.requestConfigs.push(...transformRes)
227+
requestObjecs.push(...transformRequestConfig(transformRes))
216228
}
217229

218-
// 装载公共配置
219-
loaderCommonConfig(baseConfig, loaderConfig)
230+
// 装载公共配置到 loaderConfig
231+
loaderCommonConfig(baseConfig, requestObjecs, loaderConfig)
220232

221233
return loaderConfig
222234
}
@@ -227,16 +239,19 @@ function loaderFileConfig(
227239
): LoaderCrawlFileConfig {
228240
const loaderConfig: LoaderCrawlFileConfig = {
229241
requestConfigs: [],
242+
timeout: rawConfig.timeout,
230243
intervalTime: rawConfig.intervalTime,
231244
maxRetry: rawConfig.maxRetry,
232245
fileConfig: rawConfig.fileConfig
233246
}
234247

235248
// requestConfig 统一转成 FileRequestConfig 类型
236-
loaderConfig.requestConfigs = transformRequestConfig(rawConfig.requestConfig)
249+
const requestObjecs: FileRequestConfig[] = transformRequestConfig(
250+
rawConfig.requestConfig
251+
)
237252

238-
// 装载公共配置
239-
loaderCommonConfig(baseConfig, loaderConfig)
253+
// 装载公共配置到 loaderConfig
254+
loaderCommonConfig(baseConfig, requestObjecs, loaderConfig)
240255

241256
return loaderConfig
242257
}
@@ -292,7 +307,7 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
292307
id,
293308
isSuccess,
294309
maxRetry,
295-
retryCount,
310+
crawlCount,
296311
errorQueue,
297312
crawlSingleRes
298313
} = item
@@ -314,7 +329,8 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
314329
id,
315330
isSuccess,
316331
maxRetry,
317-
retryCount,
332+
crawlCount,
333+
retryCount: crawlCount - 1,
318334
errorQueue,
319335
data
320336
}
@@ -339,7 +355,7 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
339355
}
340356

341357
async function crawlPageSingle(
342-
controllerConfig: ControllerConfig<PageRequestConfig, any>,
358+
controllerConfig: ControllerConfig<LoaderPageRequestConfig, any>,
343359
cid: number
344360
) {
345361
const { id, requestConfig } = controllerConfig
@@ -419,7 +435,7 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
419435
id,
420436
isSuccess,
421437
maxRetry,
422-
retryCount,
438+
crawlCount,
423439
errorQueue,
424440
crawlSingleRes
425441
} = item
@@ -428,7 +444,8 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
428444
id,
429445
isSuccess,
430446
maxRetry,
431-
retryCount,
447+
crawlCount,
448+
retryCount: crawlCount - 1,
432449
errorQueue,
433450
data: null
434451
}
@@ -493,7 +510,7 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
493510
id,
494511
isSuccess,
495512
maxRetry,
496-
retryCount,
513+
crawlCount,
497514
errorQueue,
498515
crawlSingleRes
499516
} = item
@@ -502,7 +519,8 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
502519
id,
503520
isSuccess,
504521
maxRetry,
505-
retryCount,
522+
crawlCount,
523+
retryCount: crawlCount - 1,
506524
errorQueue,
507525
data: null
508526
}

src/batchCrawlHandle.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ export async function asyncBatchCrawl<T, V, C>(
4949
id
5050
)
5151

52-
controllerConfig.retryCount++
52+
controllerConfig.crawlCount++
5353

5454
const crawlSingle = crawlSingleFn(
5555
controllerConfig,
@@ -95,7 +95,7 @@ export async function syncBatchCrawl<T, V, C>(
9595
id
9696
)
9797

98-
controllerConfig.retryCount++
98+
controllerConfig.crawlCount++
9999

100100
try {
101101
controllerConfig.crawlSingleRes = await crawlSingleFn(

src/controller.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ import { log, logError, logNumber, logSuccess, logWarn } from './utils'
55
export interface ControllerConfig<T, V> {
66
id: number
77
isSuccess: boolean
8+
crawlCount: number
89
maxRetry: number
9-
retryCount: number
1010
errorQueue: Error[]
1111
requestConfig: T
1212
crawlSingleRes: V | null
1313
}
1414

15-
export async function controller<T extends { maxRetry?: number }, V, C>(
15+
export async function controller<T extends { maxRetry: number }, V, C>(
1616
name: 'page' | 'data' | 'file',
1717
mode: 'async' | 'sync',
1818
requestConfigs: T[],
@@ -28,8 +28,8 @@ export async function controller<T extends { maxRetry?: number }, V, C>(
2828
(requestConfig, index) => ({
2929
id: index + 1,
3030
isSuccess: false,
31-
maxRetry: requestConfig.maxRetry ?? 0,
32-
retryCount: -1,
31+
maxRetry: requestConfig.maxRetry,
32+
crawlCount: 0,
3333
errorQueue: [],
3434
requestConfig,
3535
crawlSingleRes: null
@@ -58,7 +58,7 @@ export async function controller<T extends { maxRetry?: number }, V, C>(
5858
(config) =>
5959
config.maxRetry &&
6060
!config.isSuccess &&
61-
config.retryCount < config.maxRetry
61+
config.crawlCount <= config.maxRetry
6262
)
6363

6464
if (crawlQueue.length) {

src/index.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
XCrawlBaseConfig,
1111
XCrawlInstance
1212
} from './types'
13+
import { isUndefined } from './utils'
1314

1415
function loaderBaseConfig(
1516
baseConfig: XCrawlBaseConfig | undefined
@@ -20,6 +21,14 @@ function loaderBaseConfig(
2021
loaderBaseConfig.mode = 'async'
2122
}
2223

24+
if (isUndefined(baseConfig?.timeout)) {
25+
loaderBaseConfig.timeout = 10000
26+
}
27+
28+
if (isUndefined(baseConfig?.maxRetry)) {
29+
loaderBaseConfig.maxRetry = 0
30+
}
31+
2332
return loaderBaseConfig as LoaderXCrawlBaseConfig
2433
}
2534

src/request.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import HttpsProxyAgent from 'https-proxy-agent'
1111
import { isUndefined } from './utils'
1212

1313
import { AnyObject, MapTypeEmptyObject } from './types/common'
14-
import { DataRequestConfig, FileRequestConfig } from './types/api'
14+
import { LoaderDataRequestConfig, LoaderFileRequestConfig } from './types/api'
1515

1616
/* Type */
1717
export interface Request {
@@ -36,7 +36,7 @@ function parseParams(urlSearch: string, params?: AnyObject): string {
3636
}
3737

3838
function parseHeaders(
39-
rawConfig: DataRequestConfig & FileRequestConfig,
39+
rawConfig: LoaderDataRequestConfig & LoaderFileRequestConfig,
4040
config: RequestOptions & MapTypeEmptyObject<URL>
4141
) {
4242
const rawHeaders = rawConfig.headers ?? {}
@@ -55,7 +55,7 @@ function parseHeaders(
5555
}
5656

5757
function handleRequestConfig(
58-
rawConfig: DataRequestConfig & FileRequestConfig
58+
rawConfig: LoaderDataRequestConfig & LoaderFileRequestConfig
5959
): RequestOptions & MapTypeEmptyObject<URL> {
6060
const { protocol, hostname, port, pathname, search } = new Url.URL(
6161
rawConfig.url
@@ -85,7 +85,9 @@ function handleRequestConfig(
8585
return config
8686
}
8787

88-
export function request(config: DataRequestConfig & FileRequestConfig) {
88+
export function request(
89+
config: LoaderDataRequestConfig & LoaderFileRequestConfig
90+
) {
8991
return new Promise<Request>((resolve, reject) => {
9092
const isDataUndefine = isUndefined(config.data)
9193
config.data = !isDataUndefine ? JSON.stringify(config.data) : config.data

0 commit comments

Comments
 (0)