Skip to content

Commit e6ae399

Browse files
committed
minor adjustments
1 parent 0edc053 commit e6ae399

File tree

7 files changed

+38
-52
lines changed

7 files changed

+38
-52
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ Create a crawler instance via new XCrawl. The request queue is maintained by the
5050
5151
```ts
5252
class XCrawl {
53-
private readonly baseConfig
5453
constructor(baseConfig?: IXCrawlBaseConifg)
5554
fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML>
5655
fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>

document/cn.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ docsXCrawl.fetchHTML('/zh/get-started').then((res) => {
5050
5151
```ts
5252
class XCrawl {
53-
private readonly baseConfig
5453
constructor(baseConfig?: IXCrawlBaseConifg)
5554
fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML>
5655
fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"private": true,
33
"name": "x-crawl",
4-
"version": "0.1.5",
4+
"version": "0.1.6",
55
"author": "CoderHxl",
66
"description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",
77
"license": "MIT",

publish/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "x-crawl",
3-
"version": "0.1.5",
3+
"version": "0.1.6",
44
"author": "CoderHxl",
55
"description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",
66
"license": "MIT",

src/index.ts

Lines changed: 33 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -27,45 +27,39 @@ import {
2727
IIntervalTime
2828
} from './types'
2929

30-
function mergeConfig<T extends IFetchBaseConifg>(
31-
baseConfig: IXCrawlBaseConifg,
32-
config: T
33-
): IFetchBaseConifg & T {
34-
const {
35-
baseUrl,
36-
timeout: baseTimeout,
37-
intervalTime: baseIntervalTime
38-
} = baseConfig
39-
const { requestConifg, intervalTime } = config
40-
41-
const requestConifgArr = isArray(requestConifg)
42-
? requestConifg
43-
: [requestConifg]
44-
45-
for (const requestItem of requestConifgArr) {
46-
const { url, timeout } = requestItem
47-
48-
if (!isUndefined(baseUrl)) {
49-
requestItem.url = baseUrl + url
50-
}
30+
export default class XCrawl {
31+
private readonly baseConfig: IXCrawlBaseConifg
5132

52-
if (isUndefined(timeout) && !isUndefined(baseTimeout)) {
53-
requestItem.timeout = baseTimeout
54-
}
33+
constructor(baseConfig: IXCrawlBaseConifg = {}) {
34+
this.baseConfig = baseConfig
5535
}
5636

57-
if (isUndefined(intervalTime) && !isUndefined(baseIntervalTime)) {
58-
config.intervalTime = baseIntervalTime
59-
}
37+
private mergeConfig<T extends IFetchBaseConifg>(rawConfig: T): T {
38+
const baseConfig = this.baseConfig
39+
const newConfig: T = structuredClone(rawConfig)
6040

61-
return config
62-
}
41+
// 1.处理 requestConifg
42+
const requestConifgArr = isArray(newConfig.requestConifg)
43+
? newConfig.requestConifg
44+
: [newConfig.requestConifg]
45+
for (const requestItem of requestConifgArr) {
46+
const { url, timeout } = requestItem
6347

64-
export default class XCrawl {
65-
private readonly baseConfig: IXCrawlBaseConifg
48+
if (!isUndefined(baseConfig.baseUrl)) {
49+
requestItem.url = baseConfig.baseUrl + url
50+
}
6651

67-
constructor(baseConfig: IXCrawlBaseConifg = {}) {
68-
this.baseConfig = baseConfig
52+
if (isUndefined(timeout)) {
53+
requestItem.timeout = baseConfig.timeout
54+
}
55+
}
56+
57+
// 2.处理 intervalTime
58+
if (isUndefined(newConfig.intervalTime)) {
59+
newConfig.intervalTime = baseConfig.intervalTime
60+
}
61+
62+
return newConfig
6963
}
7064

7165
private async useBatchRequestByMode(
@@ -87,12 +81,8 @@ export default class XCrawl {
8781
}
8882

8983
async fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML> {
90-
const rawRequestConifg: IFetchHTMLConfig = isString(config)
91-
? { url: config }
92-
: config
93-
94-
const { requestConifg } = mergeConfig(this.baseConfig, {
95-
requestConifg: rawRequestConifg
84+
const { requestConifg } = this.mergeConfig({
85+
requestConifg: isString(config) ? { url: config } : config
9686
})
9787

9888
const requestRes = await request(requestConifg)
@@ -110,7 +100,7 @@ export default class XCrawl {
110100
}
111101

112102
async fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>> {
113-
const { requestConifg, intervalTime } = mergeConfig(this.baseConfig, config)
103+
const { requestConifg, intervalTime } = this.mergeConfig(config)
114104

115105
const requestRes = await this.useBatchRequestByMode(
116106
requestConifg,
@@ -134,10 +124,8 @@ export default class XCrawl {
134124
}
135125

136126
async fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>> {
137-
const { requestConifg, intervalTime, fileConfig } = mergeConfig(
138-
this.baseConfig,
139-
config
140-
)
127+
const { requestConifg, intervalTime, fileConfig } = this.mergeConfig(config)
128+
141129
const requestRes = await this.useBatchRequestByMode(
142130
requestConifg,
143131
intervalTime
@@ -170,7 +158,7 @@ export default class XCrawl {
170158

171159
const saveTotal = requestRes.length
172160
const success = container.length
173-
const error = requestRes.length - container.length
161+
const error = saveTotal - success
174162
log(
175163
`saveTotal: ${logNumber(saveTotal)}, success: ${logSuccess(
176164
success

src/utils.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ export function sleep(timeout: number) {
77
export function random(max: number, min = 0) {
88
let res = Math.floor(Math.random() * max)
99

10-
if (res < min) {
11-
res = random(max, min)
10+
while (res < min) {
11+
res = Math.floor(Math.random() * max)
1212
}
1313

1414
return res

test/start/index.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)