Skip to content

Commit ef822cc

Browse files
committed
Update: crawlFile API configuration changes
1 parent d936cee commit ef822cc

File tree

4 files changed

+137
-114
lines changed

4 files changed

+137
-114
lines changed

src/api.ts

Lines changed: 81 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ import {
3434
LoaderCrawlFileConfig,
3535
CrawlDataSingleRes,
3636
CrawlDataRes,
37-
CrawlFileRequestConfig,
3837
CrawlFileRes,
3938
CrawlFileSingleRes,
4039
CrawlDataConfigObject,
4140
LoaderPageRequestConfig,
4241
LoaderDataRequestConfig,
43-
LoaderFileRequestConfig
42+
LoaderFileRequestConfig,
43+
CrawlFileConfigObject
4444
} from './types/api'
4545
import { LoaderXCrawlBaseConfig } from './types'
4646

@@ -92,7 +92,7 @@ function transformRequestConfig(
9292
config: string | DataRequestConfig | (string | DataRequestConfig)[]
9393
): DataRequestConfig[]
9494
function transformRequestConfig(
95-
config: string | FileRequestConfig | (string | FileRequestConfig)[]
95+
config: (string | FileRequestConfig)[]
9696
): FileRequestConfig[]
9797
function transformRequestConfig(config: any) {
9898
return isArray(config)
@@ -102,14 +102,14 @@ function transformRequestConfig(config: any) {
102102

103103
function loaderCommonConfig(
104104
baseConfig: LoaderXCrawlBaseConfig,
105-
requestObjecs: (PageRequestConfig | DataRequestConfig | FileRequestConfig)[],
105+
requestObjects: (PageRequestConfig | DataRequestConfig | FileRequestConfig)[],
106106
loaderConfig:
107107
| LoaderCrawlPageConfig
108108
| LoaderCrawlDataConfig
109109
| LoaderCrawlFileConfig
110110
) {
111111
// 1.requestConfigs
112-
loaderConfig.requestConfigs = requestObjecs.map((requestConfig) => {
112+
loaderConfig.requestConfigs = requestObjects.map((requestConfig) => {
113113
let { url, timeout, proxy, maxRetry, priority } = requestConfig
114114

115115
// 1.1.baseUrl
@@ -170,7 +170,7 @@ function loaderPageConfig(
170170
): LoaderCrawlPageConfig {
171171
const loaderConfig: LoaderCrawlPageConfig = { requestConfigs: [] }
172172

173-
const requestObjecs: PageRequestConfig[] = []
173+
const requestObjects: PageRequestConfig[] = []
174174
// requestConfig 统一转成 PageRequestConfig 类型
175175
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
176176
// CrawlPageConfigObject 处理
@@ -184,29 +184,31 @@ function loaderPageConfig(
184184
loaderConfig.maxRetry = maxRetry
185185
loaderConfig.timeout = timeout
186186

187-
requestObjecs.push(...transformRequestConfig(requestConfigs))
187+
requestObjects.push(...transformRequestConfig(requestConfigs))
188188
} else {
189189
// string | PageRequestConfig | (string | PageRequestConfig)[] 处理
190190
const transformRes = transformRequestConfig(
191191
rawConfig as string | PageRequestConfig | (string | PageRequestConfig)[]
192192
)
193193

194-
requestObjecs.push(...transformRes)
194+
requestObjects.push(...transformRes)
195195
}
196196

197197
// 装载公共配置到 loaderConfig
198-
loaderCommonConfig(baseConfig, requestObjecs, loaderConfig)
198+
loaderCommonConfig(baseConfig, requestObjects, loaderConfig)
199199

200200
// 装载单独的配置
201-
loaderConfig.requestConfigs.forEach((requestConfig) => {
202-
const { cookies } = requestConfig
203-
204-
// cookies
205-
if (isUndefined(cookies) && !isUndefined(loaderConfig.cookies)) {
206-
// 装载 API Config
207-
requestConfig.cookies = loaderConfig.cookies
208-
}
209-
})
201+
if (!isUndefined(loaderConfig.cookies)) {
202+
loaderConfig.requestConfigs.forEach((requestConfig) => {
203+
const { cookies } = requestConfig
204+
205+
// cookies
206+
if (isUndefined(cookies) && !isUndefined(loaderConfig.cookies)) {
207+
// 装载 API Config
208+
requestConfig.cookies = loaderConfig.cookies
209+
}
210+
})
211+
}
210212

211213
return loaderConfig
212214
}
@@ -218,7 +220,7 @@ function loaderDataConfig(
218220
const loaderConfig: LoaderCrawlDataConfig = { requestConfigs: [] }
219221

220222
// requestConfig 统一转成 DataRequestConfig 类型
221-
const requestObjecs: DataRequestConfig[] = []
223+
const requestObjects: DataRequestConfig[] = []
222224
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
223225
// CrawlDataConfigObject 处理
224226
const { requestConfigs, proxy, timeout, intervalTime, maxRetry } =
@@ -230,59 +232,80 @@ function loaderDataConfig(
230232
loaderConfig.maxRetry = maxRetry
231233
loaderConfig.timeout = timeout
232234

233-
requestObjecs.push(...transformRequestConfig(requestConfigs))
235+
requestObjects.push(...transformRequestConfig(requestConfigs))
234236
} else {
235237
// string | DataRequestConfig | (string | DataRequestConfig)[] 处理
236238
const transformRes = transformRequestConfig(
237239
rawConfig as string | DataRequestConfig | (string | DataRequestConfig)[]
238240
)
239241

240-
requestObjecs.push(...transformRequestConfig(transformRes))
242+
requestObjects.push(...transformRequestConfig(transformRes))
241243
}
242244

243245
// 装载公共配置到 loaderConfig
244-
loaderCommonConfig(baseConfig, requestObjecs, loaderConfig)
246+
loaderCommonConfig(baseConfig, requestObjects, loaderConfig)
245247

246248
return loaderConfig
247249
}
248250

249251
function loaderFileConfig(
250252
baseConfig: LoaderXCrawlBaseConfig,
251-
rawConfig: CrawlFileConfig<CrawlFileRequestConfig>
253+
rawConfig: CrawlFileConfig
252254
): LoaderCrawlFileConfig {
253-
const loaderConfig: LoaderCrawlFileConfig = {
254-
requestConfigs: [],
255-
proxy: rawConfig.proxy,
256-
timeout: rawConfig.timeout,
257-
intervalTime: rawConfig.intervalTime,
258-
maxRetry: rawConfig.maxRetry,
259-
fileConfig: rawConfig.fileConfig
260-
}
255+
const loaderConfig: LoaderCrawlFileConfig = { requestConfigs: [] }
261256

262257
// requestConfig 统一转成 FileRequestConfig 类型
263-
const requestObjecs: FileRequestConfig[] = transformRequestConfig(
264-
rawConfig.requestConfig
265-
)
258+
const requestObjects: FileRequestConfig[] = []
259+
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
260+
// CrawlFileConfigObject 处理
261+
const {
262+
requestConfigs,
263+
proxy,
264+
timeout,
265+
intervalTime,
266+
maxRetry,
267+
fileConfig
268+
} = rawConfig as CrawlFileConfigObject
269+
270+
// 给 loaderConfig 装载 API Config
271+
loaderConfig.proxy = proxy
272+
loaderConfig.intervalTime = intervalTime
273+
loaderConfig.maxRetry = maxRetry
274+
loaderConfig.timeout = timeout
275+
loaderConfig.fileConfig = fileConfig
276+
277+
requestObjects.push(...transformRequestConfig(requestConfigs))
278+
} else {
279+
// FileRequestConfig | FileRequestConfig[] 处理
280+
requestObjects.push(
281+
...(isArray(rawConfig) ? rawConfig : [rawConfig as FileRequestConfig])
282+
)
283+
}
266284

267285
// 装载公共配置到 loaderConfig
268-
loaderCommonConfig(baseConfig, requestObjecs, loaderConfig)
286+
loaderCommonConfig(baseConfig, requestObjects, loaderConfig)
269287

270288
// 装载单独的配置
271-
loaderConfig.requestConfigs.forEach((requestConfig) => {
272-
if (
273-
isUndefined(requestConfig.storeDir) &&
274-
!isUndefined(rawConfig.fileConfig?.storeDir)
275-
) {
276-
requestConfig.storeDir = rawConfig.fileConfig!.storeDir
277-
}
289+
if (
290+
!isUndefined(loaderConfig.fileConfig?.storeDir) ||
291+
!isUndefined(loaderConfig.fileConfig?.extension)
292+
) {
293+
loaderConfig.requestConfigs.forEach((requestConfig) => {
294+
if (
295+
isUndefined(requestConfig.storeDir) &&
296+
!isUndefined(loaderConfig.fileConfig?.storeDir)
297+
) {
298+
requestConfig.storeDir = loaderConfig.fileConfig!.storeDir
299+
}
278300

279-
if (
280-
isUndefined(requestConfig.extension) &&
281-
!isUndefined(rawConfig.fileConfig?.extension)
282-
) {
283-
requestConfig.extension = rawConfig.fileConfig!.extension
284-
}
285-
})
301+
if (
302+
isUndefined(requestConfig.extension) &&
303+
!isUndefined(loaderConfig.fileConfig?.extension)
304+
) {
305+
requestConfig.extension = loaderConfig.fileConfig!.extension
306+
}
307+
})
308+
}
286309

287310
return loaderConfig
288311
}
@@ -511,10 +534,10 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
511534
}
512535

513536
export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
514-
async function crawlFile<R extends CrawlFileRequestConfig>(
515-
config: CrawlFileConfig<R>,
537+
async function crawlFile<T extends CrawlFileConfig>(
538+
config: T,
516539
callback?: (res: CrawlFileSingleRes) => void
517-
): Promise<CrawlFileRes<R>> {
540+
): Promise<CrawlFileRes<T>> {
518541
const { requestConfigs, intervalTime, fileConfig } = loaderFileConfig(
519542
baseConfig,
520543
config
@@ -658,11 +681,13 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
658681
)
659682
)
660683

661-
const crawlRes = isArray(config.requestConfig)
662-
? crawlResArr
663-
: crawlResArr[0]
684+
const crawlRes =
685+
isArray(config) ||
686+
(isObject(config) && Object.hasOwn(config, 'requestConfigs'))
687+
? crawlResArr
688+
: crawlResArr[0]
664689

665-
return crawlRes as CrawlFileRes<R>
690+
return crawlRes as CrawlFileRes<T>
666691
}
667692

668693
return crawlFile

src/types/api.ts

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ export interface LoaderCrawlDataConfig
2727
}
2828

2929
export interface LoaderCrawlFileConfig
30-
extends MapTypeObject<
31-
CrawlFileConfig<CrawlFileRequestConfig>,
32-
'requestConfig'
33-
> {
30+
extends MapTypeObject<CrawlFileConfigObject, 'requestConfigs'> {
3431
requestConfigs: LoaderFileRequestConfig[]
3532
}
3633

@@ -100,11 +97,6 @@ export interface FileRequestConfig {
10097
}
10198

10299
// CrawlConfig
103-
export type CrawlFileRequestConfig =
104-
| string
105-
| FileRequestConfig
106-
| (string | FileRequestConfig)[]
107-
108100
export interface CrawlPageConfigObject {
109101
requestConfigs: (string | PageRequestConfig)[]
110102
proxy?: string
@@ -122,20 +114,8 @@ export interface CrawlDataConfigObject {
122114
maxRetry?: number
123115
}
124116

125-
export type CrawlPageConfig =
126-
| string
127-
| PageRequestConfig
128-
| (string | PageRequestConfig)[]
129-
| CrawlPageConfigObject
130-
131-
export type CrawlDataConfig =
132-
| string
133-
| DataRequestConfig
134-
| (string | DataRequestConfig)[]
135-
| CrawlDataConfigObject
136-
137-
export interface CrawlFileConfig<R extends CrawlFileRequestConfig> {
138-
requestConfig: R
117+
export interface CrawlFileConfigObject {
118+
requestConfigs: (string | FileRequestConfig)[]
139119
proxy?: string
140120
timeout?: number
141121
intervalTime?: IntervalTime
@@ -152,6 +132,23 @@ export interface CrawlFileConfig<R extends CrawlFileRequestConfig> {
152132
}
153133
}
154134

135+
export type CrawlPageConfig =
136+
| string
137+
| PageRequestConfig
138+
| (string | PageRequestConfig)[]
139+
| CrawlPageConfigObject
140+
141+
export type CrawlDataConfig =
142+
| string
143+
| DataRequestConfig
144+
| (string | DataRequestConfig)[]
145+
| CrawlDataConfigObject
146+
147+
export type CrawlFileConfig =
148+
| FileRequestConfig
149+
| FileRequestConfig[]
150+
| CrawlFileConfigObject
151+
155152
export interface StartPollingConfig {
156153
d?: number
157154
h?: number
@@ -211,7 +208,8 @@ export type CrawlDataRes<D, R extends CrawlDataConfig> = R extends
211208
? CrawlDataSingleRes<D>[]
212209
: CrawlDataSingleRes<D>
213210

214-
export type CrawlFileRes<R extends CrawlFileRequestConfig> = R extends
215-
| (string | PageRequestConfig)[]
211+
export type CrawlFileRes<R extends CrawlFileConfig> = R extends
212+
| FileRequestConfig[]
213+
| CrawlFileConfigObject
216214
? CrawlFileSingleRes[]
217215
: CrawlFileSingleRes

src/types/index.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import {
88
CrawlPageSingleRes,
99
CrawlDataRes,
1010
CrawlDataSingleRes,
11-
CrawlFileRequestConfig,
1211
CrawlFileSingleRes,
1312
CrawlFileRes
1413
} from './api'
@@ -39,10 +38,10 @@ export interface XCrawlInstance {
3938
callback?: ((res: CrawlDataSingleRes<D>) => void) | undefined
4039
) => Promise<CrawlDataRes<D, T>>
4140

42-
crawlFile: <R extends CrawlFileRequestConfig>(
43-
config: CrawlFileConfig<R>,
41+
crawlFile: <T extends CrawlFileConfig>(
42+
config: T,
4443
callback?: ((res: CrawlFileSingleRes) => void) | undefined
45-
) => Promise<CrawlFileRes<R>>
44+
) => Promise<CrawlFileRes<T>>
4645

4746
startPolling: (
4847
config: StartPollingConfig,

0 commit comments

Comments
 (0)