@@ -34,13 +34,13 @@ import {
34
34
LoaderCrawlFileConfig ,
35
35
CrawlDataSingleRes ,
36
36
CrawlDataRes ,
37
- CrawlFileRequestConfig ,
38
37
CrawlFileRes ,
39
38
CrawlFileSingleRes ,
40
39
CrawlDataConfigObject ,
41
40
LoaderPageRequestConfig ,
42
41
LoaderDataRequestConfig ,
43
- LoaderFileRequestConfig
42
+ LoaderFileRequestConfig ,
43
+ CrawlFileConfigObject
44
44
} from './types/api'
45
45
import { LoaderXCrawlBaseConfig } from './types'
46
46
@@ -92,7 +92,7 @@ function transformRequestConfig(
92
92
config : string | DataRequestConfig | ( string | DataRequestConfig ) [ ]
93
93
) : DataRequestConfig [ ]
94
94
function transformRequestConfig (
95
- config : string | FileRequestConfig | ( string | FileRequestConfig ) [ ]
95
+ config : ( string | FileRequestConfig ) [ ]
96
96
) : FileRequestConfig [ ]
97
97
function transformRequestConfig ( config : any ) {
98
98
return isArray ( config )
@@ -102,14 +102,14 @@ function transformRequestConfig(config: any) {
102
102
103
103
function loaderCommonConfig (
104
104
baseConfig : LoaderXCrawlBaseConfig ,
105
- requestObjecs : ( PageRequestConfig | DataRequestConfig | FileRequestConfig ) [ ] ,
105
+ requestObjects : ( PageRequestConfig | DataRequestConfig | FileRequestConfig ) [ ] ,
106
106
loaderConfig :
107
107
| LoaderCrawlPageConfig
108
108
| LoaderCrawlDataConfig
109
109
| LoaderCrawlFileConfig
110
110
) {
111
111
// 1.requestConfigs
112
- loaderConfig . requestConfigs = requestObjecs . map ( ( requestConfig ) => {
112
+ loaderConfig . requestConfigs = requestObjects . map ( ( requestConfig ) => {
113
113
let { url, timeout, proxy, maxRetry, priority } = requestConfig
114
114
115
115
// 1.1.baseUrl
@@ -170,7 +170,7 @@ function loaderPageConfig(
170
170
) : LoaderCrawlPageConfig {
171
171
const loaderConfig : LoaderCrawlPageConfig = { requestConfigs : [ ] }
172
172
173
- const requestObjecs : PageRequestConfig [ ] = [ ]
173
+ const requestObjects : PageRequestConfig [ ] = [ ]
174
174
// requestConfig 统一转成 PageRequestConfig 类型
175
175
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'requestConfigs' ) ) {
176
176
// CrawlPageConfigObject 处理
@@ -184,29 +184,31 @@ function loaderPageConfig(
184
184
loaderConfig . maxRetry = maxRetry
185
185
loaderConfig . timeout = timeout
186
186
187
- requestObjecs . push ( ...transformRequestConfig ( requestConfigs ) )
187
+ requestObjects . push ( ...transformRequestConfig ( requestConfigs ) )
188
188
} else {
189
189
// string | PageRequestConfig | (string | PageRequestConfig)[] 处理
190
190
const transformRes = transformRequestConfig (
191
191
rawConfig as string | PageRequestConfig | ( string | PageRequestConfig ) [ ]
192
192
)
193
193
194
- requestObjecs . push ( ...transformRes )
194
+ requestObjects . push ( ...transformRes )
195
195
}
196
196
197
197
// 装载公共配置到 loaderConfig
198
- loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
198
+ loaderCommonConfig ( baseConfig , requestObjects , loaderConfig )
199
199
200
200
// 装载单独的配置
201
- loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
202
- const { cookies } = requestConfig
203
-
204
- // cookies
205
- if ( isUndefined ( cookies ) && ! isUndefined ( loaderConfig . cookies ) ) {
206
- // 装载 API Config
207
- requestConfig . cookies = loaderConfig . cookies
208
- }
209
- } )
201
+ if ( ! isUndefined ( loaderConfig . cookies ) ) {
202
+ loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
203
+ const { cookies } = requestConfig
204
+
205
+ // cookies
206
+ if ( isUndefined ( cookies ) && ! isUndefined ( loaderConfig . cookies ) ) {
207
+ // 装载 API Config
208
+ requestConfig . cookies = loaderConfig . cookies
209
+ }
210
+ } )
211
+ }
210
212
211
213
return loaderConfig
212
214
}
@@ -218,7 +220,7 @@ function loaderDataConfig(
218
220
const loaderConfig : LoaderCrawlDataConfig = { requestConfigs : [ ] }
219
221
220
222
// requestConfig 统一转成 DataRequestConfig 类型
221
- const requestObjecs : DataRequestConfig [ ] = [ ]
223
+ const requestObjects : DataRequestConfig [ ] = [ ]
222
224
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'requestConfigs' ) ) {
223
225
// CrawlDataConfigObject 处理
224
226
const { requestConfigs, proxy, timeout, intervalTime, maxRetry } =
@@ -230,59 +232,80 @@ function loaderDataConfig(
230
232
loaderConfig . maxRetry = maxRetry
231
233
loaderConfig . timeout = timeout
232
234
233
- requestObjecs . push ( ...transformRequestConfig ( requestConfigs ) )
235
+ requestObjects . push ( ...transformRequestConfig ( requestConfigs ) )
234
236
} else {
235
237
// string | DataRequestConfig | (string | DataRequestConfig)[] 处理
236
238
const transformRes = transformRequestConfig (
237
239
rawConfig as string | DataRequestConfig | ( string | DataRequestConfig ) [ ]
238
240
)
239
241
240
- requestObjecs . push ( ...transformRequestConfig ( transformRes ) )
242
+ requestObjects . push ( ...transformRequestConfig ( transformRes ) )
241
243
}
242
244
243
245
// 装载公共配置到 loaderConfig
244
- loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
246
+ loaderCommonConfig ( baseConfig , requestObjects , loaderConfig )
245
247
246
248
return loaderConfig
247
249
}
248
250
249
251
function loaderFileConfig (
250
252
baseConfig : LoaderXCrawlBaseConfig ,
251
- rawConfig : CrawlFileConfig < CrawlFileRequestConfig >
253
+ rawConfig : CrawlFileConfig
252
254
) : LoaderCrawlFileConfig {
253
- const loaderConfig : LoaderCrawlFileConfig = {
254
- requestConfigs : [ ] ,
255
- proxy : rawConfig . proxy ,
256
- timeout : rawConfig . timeout ,
257
- intervalTime : rawConfig . intervalTime ,
258
- maxRetry : rawConfig . maxRetry ,
259
- fileConfig : rawConfig . fileConfig
260
- }
255
+ const loaderConfig : LoaderCrawlFileConfig = { requestConfigs : [ ] }
261
256
262
257
// requestConfig 统一转成 FileRequestConfig 类型
263
- const requestObjecs : FileRequestConfig [ ] = transformRequestConfig (
264
- rawConfig . requestConfig
265
- )
258
+ const requestObjects : FileRequestConfig [ ] = [ ]
259
+ if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'requestConfigs' ) ) {
260
+ // CrawlFileConfigObject 处理
261
+ const {
262
+ requestConfigs,
263
+ proxy,
264
+ timeout,
265
+ intervalTime,
266
+ maxRetry,
267
+ fileConfig
268
+ } = rawConfig as CrawlFileConfigObject
269
+
270
+ // 给 loaderConfig 装载 API Config
271
+ loaderConfig . proxy = proxy
272
+ loaderConfig . intervalTime = intervalTime
273
+ loaderConfig . maxRetry = maxRetry
274
+ loaderConfig . timeout = timeout
275
+ loaderConfig . fileConfig = fileConfig
276
+
277
+ requestObjects . push ( ...transformRequestConfig ( requestConfigs ) )
278
+ } else {
279
+ // FileRequestConfig | FileRequestConfig[] 处理
280
+ requestObjects . push (
281
+ ...( isArray ( rawConfig ) ? rawConfig : [ rawConfig as FileRequestConfig ] )
282
+ )
283
+ }
266
284
267
285
// 装载公共配置到 loaderConfig
268
- loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
286
+ loaderCommonConfig ( baseConfig , requestObjects , loaderConfig )
269
287
270
288
// 装载单独的配置
271
- loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
272
- if (
273
- isUndefined ( requestConfig . storeDir ) &&
274
- ! isUndefined ( rawConfig . fileConfig ?. storeDir )
275
- ) {
276
- requestConfig . storeDir = rawConfig . fileConfig ! . storeDir
277
- }
289
+ if (
290
+ ! isUndefined ( loaderConfig . fileConfig ?. storeDir ) ||
291
+ ! isUndefined ( loaderConfig . fileConfig ?. extension )
292
+ ) {
293
+ loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
294
+ if (
295
+ isUndefined ( requestConfig . storeDir ) &&
296
+ ! isUndefined ( loaderConfig . fileConfig ?. storeDir )
297
+ ) {
298
+ requestConfig . storeDir = loaderConfig . fileConfig ! . storeDir
299
+ }
278
300
279
- if (
280
- isUndefined ( requestConfig . extension ) &&
281
- ! isUndefined ( rawConfig . fileConfig ?. extension )
282
- ) {
283
- requestConfig . extension = rawConfig . fileConfig ! . extension
284
- }
285
- } )
301
+ if (
302
+ isUndefined ( requestConfig . extension ) &&
303
+ ! isUndefined ( loaderConfig . fileConfig ?. extension )
304
+ ) {
305
+ requestConfig . extension = loaderConfig . fileConfig ! . extension
306
+ }
307
+ } )
308
+ }
286
309
287
310
return loaderConfig
288
311
}
@@ -511,10 +534,10 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
511
534
}
512
535
513
536
export function createCrawlFile ( baseConfig : LoaderXCrawlBaseConfig ) {
514
- async function crawlFile < R extends CrawlFileRequestConfig > (
515
- config : CrawlFileConfig < R > ,
537
+ async function crawlFile < T extends CrawlFileConfig > (
538
+ config : T ,
516
539
callback ?: ( res : CrawlFileSingleRes ) => void
517
- ) : Promise < CrawlFileRes < R > > {
540
+ ) : Promise < CrawlFileRes < T > > {
518
541
const { requestConfigs, intervalTime, fileConfig } = loaderFileConfig (
519
542
baseConfig ,
520
543
config
@@ -658,11 +681,13 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
658
681
)
659
682
)
660
683
661
- const crawlRes = isArray ( config . requestConfig )
662
- ? crawlResArr
663
- : crawlResArr [ 0 ]
684
+ const crawlRes =
685
+ isArray ( config ) ||
686
+ ( isObject ( config ) && Object . hasOwn ( config , 'requestConfigs' ) )
687
+ ? crawlResArr
688
+ : crawlResArr [ 0 ]
664
689
665
- return crawlRes as CrawlFileRes < R >
690
+ return crawlRes as CrawlFileRes < T >
666
691
}
667
692
668
693
return crawlFile
0 commit comments