Skip to content

Commit 49b2cac

Browse files
committed
Add: Allows to set proxy uniformly in crawling API
1 parent 15639c4 commit 49b2cac

File tree

5 files changed

+21
-12
lines changed

5 files changed

+21
-12
lines changed

src/api.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,13 @@ function loaderCommonConfig(
128128
}
129129

130130
// 1.3.porxy
131-
if (isUndefined(proxy) && !isUndefined(baseConfig.proxy)) {
132-
proxy = baseConfig.proxy
131+
// requestConfig > loaderConfig > baseConfig
132+
if (isUndefined(proxy)) {
133+
if (!isUndefined(loaderConfig.proxy)) {
134+
proxy = loaderConfig.proxy
135+
} else if (!isUndefined(baseConfig.proxy)) {
136+
proxy = baseConfig.proxy
137+
}
133138
}
134139

135140
// 1.4.maxRetry
@@ -169,10 +174,11 @@ function loaderPageConfig(
169174
// requestConfig 统一转成 PageRequestConfig 类型
170175
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
171176
// CrawlPageConfigObject 处理
172-
const { requestConfigs, timeout, cookies, intervalTime, maxRetry } =
177+
const { requestConfigs, proxy, timeout, cookies, intervalTime, maxRetry } =
173178
rawConfig as CrawlPageConfigObject
174179

175180
// 给 loaderConfig 装载 API Config
181+
loaderConfig.proxy = proxy
176182
loaderConfig.cookies = cookies
177183
loaderConfig.intervalTime = intervalTime
178184
loaderConfig.maxRetry = maxRetry
@@ -215,10 +221,11 @@ function loaderDataConfig(
215221
const requestObjecs: DataRequestConfig[] = []
216222
if (isObject(rawConfig) && Object.hasOwn(rawConfig, 'requestConfigs')) {
217223
// CrawlDataConfigObject 处理
218-
const { requestConfigs, timeout, intervalTime, maxRetry } =
224+
const { requestConfigs, proxy, timeout, intervalTime, maxRetry } =
219225
rawConfig as CrawlDataConfigObject
220226

221227
// 给 loaderConfig 装载 API Config
228+
loaderConfig.proxy = proxy
222229
loaderConfig.intervalTime = intervalTime
223230
loaderConfig.maxRetry = maxRetry
224231
loaderConfig.timeout = timeout
@@ -245,6 +252,7 @@ function loaderFileConfig(
245252
): LoaderCrawlFileConfig {
246253
const loaderConfig: LoaderCrawlFileConfig = {
247254
requestConfigs: [],
255+
proxy: rawConfig.proxy,
248256
timeout: rawConfig.timeout,
249257
intervalTime: rawConfig.intervalTime,
250258
maxRetry: rawConfig.maxRetry,

src/types/api.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ export type CrawlFileRequestConfig =
107107

108108
export interface CrawlPageConfigObject {
109109
requestConfigs: (string | PageRequestConfig)[]
110+
proxy?: string
110111
timeout?: number
111112
cookies?: PageRequestConfigCookies
112113
intervalTime?: IntervalTime
@@ -115,6 +116,7 @@ export interface CrawlPageConfigObject {
115116

116117
export interface CrawlDataConfigObject {
117118
requestConfigs: (string | DataRequestConfig)[]
119+
proxy?: string
118120
timeout?: number
119121
intervalTime?: IntervalTime
120122
maxRetry?: number
@@ -134,6 +136,7 @@ export type CrawlDataConfig =
134136

135137
export interface CrawlFileConfig<R extends CrawlFileRequestConfig> {
136138
requestConfig: R
139+
proxy?: string
137140
timeout?: number
138141
intervalTime?: IntervalTime
139142
maxRetry?: number

test/environment/crawlData.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ async function loaderBaseConfig() {
8484
// 2.2.Loader API Config
8585
async function loaderAPIConfig() {
8686
const testXCrawl = xCrawl({
87-
baseUrl: 'http://localhost:9001/api',
88-
proxy: 'http://localhost:14892'
87+
baseUrl: 'http://localhost:9001/api'
8988
})
9089

9190
const res = await testXCrawl.crawlData({
9291
requestConfigs: ['/room/193581217', '/room/193581217'],
92+
proxy: 'http://localhost:14892',
9393
timeout: 10000,
9494
intervalTime: { max: 1000 },
9595
maxRetry: 0

test/environment/crawlFile.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ async function loaderBaseConfig() {
8888
async function loaderAPIConfig() {
8989
const testXCrawl = xCrawl({
9090
baseUrl:
91-
'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area',
92-
proxy: 'http://localhost:14892'
91+
'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area'
9392
})
9493

9594
const res = await testXCrawl.crawlFile({
9695
requestConfig: ['/4401.jpg', '/4403.jpg'],
96+
proxy: 'http://localhost:14892',
9797
timeout: 10000,
9898
fileConfig: { storeDir },
9999
intervalTime: { max: 1000 },

test/environment/crawlPage.test.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,11 @@ async function loaderBaseConfig() {
9191

9292
// 2.2.Loader API Config
9393
async function loaderAPIConfig() {
94-
const testXCrawl = xCrawl({
95-
baseUrl: 'https://github.com',
96-
proxy: 'http://localhost:14892'
97-
})
94+
const testXCrawl = xCrawl({ baseUrl: 'https://github.com' })
9895

9996
const res = await testXCrawl.crawlPage({
10097
requestConfigs: ['/coder-hxl', '/coder-hxl/x-crawl'],
98+
proxy: 'http://localhost:14892',
10199
timeout: 10000,
102100
intervalTime: { max: 1000 },
103101
maxRetry: 0

0 commit comments

Comments
 (0)