@@ -35,13 +35,19 @@ import {
35
35
CrawlFileRequestConfig ,
36
36
CrawlFileRes ,
37
37
CrawlFileSingleRes ,
38
- CrawlDataConfigObject
38
+ CrawlDataConfigObject ,
39
+ LoaderPageRequestConfig ,
40
+ LoaderDataRequestConfig ,
41
+ LoaderFileRequestConfig
39
42
} from './types/api'
40
43
import { LoaderXCrawlBaseConfig } from './types'
41
44
import { quickSort } from './sort'
42
45
43
46
async function crawlRequestSingle (
44
- controllerConfig : ControllerConfig < DataRequestConfig & FileRequestConfig , any >
47
+ controllerConfig : ControllerConfig <
48
+ LoaderDataRequestConfig | LoaderFileRequestConfig ,
49
+ any
50
+ >
45
51
) {
46
52
const { requestConfig } = controllerConfig
47
53
@@ -95,41 +101,47 @@ function transformRequestConfig(config: any) {
95
101
96
102
function loaderCommonConfig (
97
103
baseConfig : LoaderXCrawlBaseConfig ,
104
+ requestObjecs : ( PageRequestConfig | DataRequestConfig | FileRequestConfig ) [ ] ,
98
105
loaderConfig :
99
106
| LoaderCrawlPageConfig
100
107
| LoaderCrawlDataConfig
101
108
| LoaderCrawlFileConfig
102
109
) {
103
110
// 1.requestConfigs
104
- loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
105
- const { url, timeout, proxy, maxRetry } = requestConfig
111
+ loaderConfig . requestConfigs = requestObjecs . map ( ( requestConfig ) => {
112
+ let { url, timeout, proxy, maxRetry } = requestConfig
106
113
107
114
// 1.1.baseUrl
108
115
if ( ! isUndefined ( baseConfig . baseUrl ) ) {
109
- requestConfig . url = baseConfig . baseUrl + url
116
+ url = baseConfig . baseUrl + url
110
117
}
111
118
112
119
// 1.2.timeout
113
- if ( isUndefined ( timeout ) && ! isUndefined ( baseConfig . timeout ) ) {
114
- requestConfig . timeout = baseConfig . timeout
120
+ // requestConfig > loaderConfig > baseConfig
121
+ if ( isUndefined ( timeout ) ) {
122
+ if ( ! isUndefined ( loaderConfig . timeout ) ) {
123
+ timeout = loaderConfig . timeout
124
+ } else {
125
+ timeout = baseConfig . timeout
126
+ }
115
127
}
116
128
117
129
// 1.3.porxy
118
130
if ( isUndefined ( proxy ) && ! isUndefined ( baseConfig . proxy ) ) {
119
- requestConfig . proxy = baseConfig . proxy
131
+ proxy = baseConfig . proxy
120
132
}
121
133
122
134
// 1.4.maxRetry
123
135
// requestConfig > loaderConfig > baseConfig
124
136
if ( isUndefined ( maxRetry ) ) {
125
137
if ( ! isUndefined ( loaderConfig . maxRetry ) ) {
126
- // 装载 API Config
127
- requestConfig . maxRetry = loaderConfig . maxRetry
128
- } else if ( ! isUndefined ( baseConfig . maxRetry ) ) {
129
- // 装载 baseConfig
130
- requestConfig . maxRetry = baseConfig . maxRetry
138
+ maxRetry = loaderConfig . maxRetry
139
+ } else {
140
+ maxRetry = baseConfig . maxRetry
131
141
}
132
142
}
143
+
144
+ return { url, timeout, proxy, maxRetry }
133
145
} )
134
146
135
147
// 2.intervalTime
@@ -147,31 +159,31 @@ function loaderPageConfig(
147
159
) : LoaderCrawlPageConfig {
148
160
const loaderConfig : LoaderCrawlPageConfig = { requestConfigs : [ ] }
149
161
162
+ const requestObjecs : PageRequestConfig [ ] = [ ]
150
163
// requestConfig 统一转成 PageRequestConfig 类型
151
164
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'requestConfigs' ) ) {
152
165
// CrawlPageConfigObject 处理
153
- const { requestConfigs, cookies, intervalTime, maxRetry } =
166
+ const { requestConfigs, timeout , cookies, intervalTime, maxRetry } =
154
167
rawConfig as CrawlPageConfigObject
155
168
156
169
// 给 loaderConfig 装载 API Config
157
170
loaderConfig . cookies = cookies
158
171
loaderConfig . intervalTime = intervalTime
159
172
loaderConfig . maxRetry = maxRetry
173
+ loaderConfig . timeout = timeout
160
174
161
- const transformRes = transformRequestConfig ( requestConfigs )
162
-
163
- loaderConfig . requestConfigs . push ( ...transformRes )
175
+ requestObjecs . push ( ...transformRequestConfig ( requestConfigs ) )
164
176
} else {
165
177
// string | PageRequestConfig | (string | PageRequestConfig)[] 处理
166
178
const transformRes = transformRequestConfig (
167
179
rawConfig as string | PageRequestConfig | ( string | PageRequestConfig ) [ ]
168
180
)
169
181
170
- loaderConfig . requestConfigs . push ( ...transformRes )
182
+ requestObjecs . push ( ...transformRes )
171
183
}
172
184
173
- // 装载公共配置
174
- loaderCommonConfig ( baseConfig , loaderConfig )
185
+ // 装载公共配置到 loaderConfig
186
+ loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
175
187
176
188
// 装载单独的配置
177
189
loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
@@ -193,30 +205,30 @@ function loaderDataConfig(
193
205
) : LoaderCrawlDataConfig {
194
206
const loaderConfig : LoaderCrawlDataConfig = { requestConfigs : [ ] }
195
207
196
- // requestConfig 统一转成 PageRequestConfig 类型
208
+ // requestConfig 统一转成 DataRequestConfig 类型
209
+ const requestObjecs : DataRequestConfig [ ] = [ ]
197
210
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'requestConfigs' ) ) {
198
211
// CrawlDataConfigObject 处理
199
- const { requestConfigs, intervalTime, maxRetry } =
212
+ const { requestConfigs, timeout , intervalTime, maxRetry } =
200
213
rawConfig as CrawlDataConfigObject
201
214
202
215
// 给 loaderConfig 装载 API Config
203
216
loaderConfig . intervalTime = intervalTime
204
217
loaderConfig . maxRetry = maxRetry
218
+ loaderConfig . timeout = timeout
205
219
206
- const transformRes = transformRequestConfig ( requestConfigs )
207
-
208
- loaderConfig . requestConfigs . push ( ...transformRes )
220
+ requestObjecs . push ( ...transformRequestConfig ( requestConfigs ) )
209
221
} else {
210
222
// string | DataRequestConfig | (string | DataRequestConfig)[] 处理
211
223
const transformRes = transformRequestConfig (
212
224
rawConfig as string | DataRequestConfig | ( string | DataRequestConfig ) [ ]
213
225
)
214
226
215
- loaderConfig . requestConfigs . push ( ...transformRes )
227
+ requestObjecs . push ( ...transformRequestConfig ( transformRes ) )
216
228
}
217
229
218
- // 装载公共配置
219
- loaderCommonConfig ( baseConfig , loaderConfig )
230
+ // 装载公共配置到 loaderConfig
231
+ loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
220
232
221
233
return loaderConfig
222
234
}
@@ -227,16 +239,19 @@ function loaderFileConfig(
227
239
) : LoaderCrawlFileConfig {
228
240
const loaderConfig : LoaderCrawlFileConfig = {
229
241
requestConfigs : [ ] ,
242
+ timeout : rawConfig . timeout ,
230
243
intervalTime : rawConfig . intervalTime ,
231
244
maxRetry : rawConfig . maxRetry ,
232
245
fileConfig : rawConfig . fileConfig
233
246
}
234
247
235
248
// requestConfig 统一转成 FileRequestConfig 类型
236
- loaderConfig . requestConfigs = transformRequestConfig ( rawConfig . requestConfig )
249
+ const requestObjecs : FileRequestConfig [ ] = transformRequestConfig (
250
+ rawConfig . requestConfig
251
+ )
237
252
238
- // 装载公共配置
239
- loaderCommonConfig ( baseConfig , loaderConfig )
253
+ // 装载公共配置到 loaderConfig
254
+ loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
240
255
241
256
return loaderConfig
242
257
}
@@ -292,7 +307,7 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
292
307
id,
293
308
isSuccess,
294
309
maxRetry,
295
- retryCount ,
310
+ crawlCount ,
296
311
errorQueue,
297
312
crawlSingleRes
298
313
} = item
@@ -314,7 +329,8 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
314
329
id,
315
330
isSuccess,
316
331
maxRetry,
317
- retryCount,
332
+ crawlCount,
333
+ retryCount : crawlCount - 1 ,
318
334
errorQueue,
319
335
data
320
336
}
@@ -339,7 +355,7 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
339
355
}
340
356
341
357
async function crawlPageSingle (
342
- controllerConfig : ControllerConfig < PageRequestConfig , any > ,
358
+ controllerConfig : ControllerConfig < LoaderPageRequestConfig , any > ,
343
359
cid : number
344
360
) {
345
361
const { id, requestConfig } = controllerConfig
@@ -419,7 +435,7 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
419
435
id,
420
436
isSuccess,
421
437
maxRetry,
422
- retryCount ,
438
+ crawlCount ,
423
439
errorQueue,
424
440
crawlSingleRes
425
441
} = item
@@ -428,7 +444,8 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
428
444
id,
429
445
isSuccess,
430
446
maxRetry,
431
- retryCount,
447
+ crawlCount,
448
+ retryCount : crawlCount - 1 ,
432
449
errorQueue,
433
450
data : null
434
451
}
@@ -493,7 +510,7 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
493
510
id,
494
511
isSuccess,
495
512
maxRetry,
496
- retryCount ,
513
+ crawlCount ,
497
514
errorQueue,
498
515
crawlSingleRes
499
516
} = item
@@ -502,7 +519,8 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
502
519
id,
503
520
isSuccess,
504
521
maxRetry,
505
- retryCount,
522
+ crawlCount,
523
+ retryCount : crawlCount - 1 ,
506
524
errorQueue,
507
525
data : null
508
526
}
0 commit comments