Skip to content

Commit 42ad192

Browse files
committed
Docs: Default value
1 parent 2a787f9 commit 42ad192

File tree

6 files changed

+293
-17
lines changed

6 files changed

+293
-17
lines changed

README.md

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ x-crawl is a flexible Node.js multifunctional crawler library. Used to crawl pag
88
99
## Features
1010

11-
- **🔥 Async/Sync** - Just change the mode attribute value to switch async or sync crawling mode.
11+
- **🔥 AsyncSync** - Just change the mode attribute value to switch async or sync crawling mode.
1212
- **⚙️Multiple functions** - It can crawl pages, crawl interfaces, crawl files and polling crawls, and supports crawling single or multiple.
1313
- **🖋️ Flexible writing style** - Simple target configuration, detailed target configuration, mixed target array configuration and advanced configuration, the same crawling API can adapt to multiple configurations.
1414
- **👀Device Fingerprinting** - Zero configuration or custom configuration to avoid fingerprinting to identify and track us from different locations.
1515
- **⏱️ Interval Crawling** - No interval, fixed interval and random interval can generate or avoid high concurrent crawling.
16-
- **🔄 Retry on failure** - Global settings, local settings and individual settings. It can avoid crawling failure caused by temporary problems.
16+
- **🔄 Retry on failure** - Global settings, local settings and individual settings, It can avoid crawling failure caused by temporary problems.
1717
- **🚀 Priority Queue** - According to the priority of a single crawling target, it can be crawled ahead of other targets.
1818
- **☁️ Crawl SPA** - Crawl SPA (Single Page Application) to generate pre-rendered content (aka "SSR" (Server Side Rendering)).
1919
- **⚒️ Controlling Pages** - Headless browsers can submit forms, keystrokes, event actions, generate screenshots of pages, etc.
@@ -1137,6 +1137,15 @@ export interface XCrawlConfig extends CrawlCommonConfig {
11371137
}
11381138
```
11391139
1140+
**Default Value**
1141+
1142+
- mode: 'async'
1143+
- enableRandomFingerprint: true
1144+
- baseUrl: undefined
1145+
- intervalTime: undefined
1146+
- crawlPage: undefined
1147+
- launchBrowser: undefined
1148+
11401149
#### Detail target config
11411150
11421151
##### CrawlPageDetailTargetConfig
@@ -1159,6 +1168,14 @@ export interface CrawlPageDetailTargetConfig extends CrawlCommonConfig {
11591168
}
11601169
```
11611170
1171+
**Default Value**
1172+
1173+
- headers: undefined
1174+
- method: undefined
1175+
- priority: undefined
1176+
- viewport: undefined
1177+
- fingerprint: undefined
1178+
11621179
##### CrawlDataDetailTargetConfig
11631180
11641181
```ts
@@ -1173,6 +1190,16 @@ export interface CrawlDataDetailTargetConfig extends CrawlCommonConfig {
11731190
}
11741191
```
11751192
1193+
**Default Value**
1194+
1195+
- method: 'GET'
1196+
1197+
- headers: undefined
1198+
- params: undefined
1199+
- data: undefined
1200+
- priority: undefined
1201+
- fingerprint: undefined
1202+
11761203
##### CrawlFileDetailTargetConfig
11771204
11781205
```ts
@@ -1187,6 +1214,15 @@ export interface CrawlFileDetailTargetConfig extends CrawlCommonConfig {
11871214
}
11881215
```
11891216
1217+
**Default Value**
1218+
1219+
- headers: undefined
1220+
- priority: undefined
1221+
- storeDir: \_\_dirname
1222+
- fileName: string
1223+
- extension: string
1224+
- fingerprint: undefined
1225+
11901226
#### Advanced config
11911227
11921228
##### CrawlPageAdvancedConfig
@@ -1210,6 +1246,15 @@ export interface CrawlPageAdvancedConfig extends CrawlCommonConfig {
12101246
}
12111247
```
12121248
1249+
**Default Value**
1250+
1251+
- intervalTime: undefined
1252+
- fingerprint: undefined
1253+
- headers: undefined
1254+
- cookies: undefined
1255+
- viewport: undefined
1256+
- onCrawlItemComplete: undefined
1257+
12131258
##### CrawlDataAdvancedConfig
12141259
12151260
```ts
@@ -1224,6 +1269,13 @@ export interface CrawlDataAdvancedConfig<T> extends CrawlCommonConfig {
12241269
}
12251270
```
12261271
1272+
**Default Value**
1273+
1274+
- intervalTime: undefined
1275+
- fingerprint: undefined
1276+
- headers: undefined
1277+
- onCrawlItemComplete: undefined
1278+
12271279
##### CrawlFileAdvancedConfig
12281280
12291281
```ts
@@ -1246,6 +1298,16 @@ export interface CrawlFileAdvancedConfig extends CrawlCommonConfig {
12461298
}
12471299
```
12481300
1301+
**Default Value**
1302+
1303+
- intervalTime: undefined
1304+
- fingerprint: undefined
1305+
- headers: undefined
1306+
- storeDir: \_\_dirname
1307+
- extension: string
1308+
- onCrawlItemComplete: undefined
1309+
- onBeforeSaveItemFile: undefined
1310+
12491311
#### StartPollingConfig
12501312
12511313
```ts
@@ -1256,6 +1318,12 @@ export interface StartPollingConfig {
12561318
}
12571319
```
12581320
1321+
**Default Value**
1322+
1323+
- d: undefined
1324+
- h: undefined
1325+
- m: undefined
1326+
12591327
#### Crawl other config
12601328
12611329
##### CrawlCommonConfig
@@ -1268,6 +1336,12 @@ export interface CrawlCommonConfig {
12681336
}
12691337
```
12701338
1339+
**Default Value**
1340+
1341+
- timeout: 10000
1342+
- proxy: undefined
1343+
- maxRetry: 0
1344+
12711345
##### DetailTargetFingerprintCommon
12721346
12731347
```ts
@@ -1281,6 +1355,15 @@ export interface DetailTargetFingerprintCommon {
12811355
}
12821356
```
12831357
1358+
**Default Value**
1359+
1360+
- userAgent: undefined
1361+
- ua: undefined
1362+
- platform: undefined
1363+
- platformVersion: undefined
1364+
- mobile: undefined
1365+
- acceptLanguage: undefined
1366+
12841367
##### AdvancedFingerprintCommon
12851368
12861369
```ts
@@ -1294,6 +1377,15 @@ export interface AdvancedFingerprintCommon {
12941377
}
12951378
```
12961379
1380+
**Default Value**
1381+
1382+
- userAgents: undefined
1383+
- uas: undefined
1384+
- platforms: undefined
1385+
- platformVersions: undefined
1386+
- mobiles: undefined
1387+
- acceptLanguages: undefined
1388+
12971389
##### Mobile
12981390
12991391
```ts

docs/cn.md

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ x-crawl 是一个灵活的 Node.js 多功能爬虫库。用于爬页面、爬接
88
99
## 特征
1010

11-
- **🔥 异步/同步** - 只需更改一下 mode 属性值即可切换异步或同步爬取模式。
11+
- **🔥 异步同步** - 只需更改一下 mode 属性值即可切换异步或同步爬取模式。
1212
- **⚙️ 多种功能** - 可爬页面、爬接口、爬文件以及轮询爬,并且支持爬取单个或多个。
1313
- **🖋️ 写法灵活** - 简单目标配置、详细目标配置、混合目标数组配置以及进阶配置,同种爬取 API 适配多种配置。
1414
- **👀 设备指纹** - 零配置或自定义配置,即可避免通过指纹识别从不同位置识别并跟踪我们。
1515
- **⏱️ 间隔爬取** - 无间隔、固定间隔以及随机间隔,即可产生或避免高并发爬取。
16-
- **🔄 失败重试** - 全局设置、局部设置以及单独设置即可避免因一时问题而造成爬取失败。
16+
- **🔄 失败重试** - 全局设置、局部设置以及单独设置, 即可避免因一时问题而造成爬取失败。
1717
- **🚀 优先队列** - 根据单个爬取目标的优先级可以优先于其他目标提前进行爬取。
1818
- **☁️ 爬取 SPA** - 爬取 SPA(单页应用程序)生成预渲染内容(即“SSR”(服务器端渲染))。
1919
- **⚒️ 控制页面** - 无头浏览器可以表单提交、键盘输入、事件操作、生成页面的屏幕截图等。
@@ -1128,6 +1128,15 @@ export interface XCrawlConfig extends CrawlCommonConfig {
11281128
}
11291129
```
11301130
1131+
**默认值**
1132+
1133+
- mode: 'async'
1134+
- enableRandomFingerprint: true
1135+
- baseUrl: undefined
1136+
- intervalTime: undefined
1137+
- crawlPage: undefined
1138+
- launchBrowser: undefined
1139+
11311140
#### Detail target config
11321141
11331142
##### CrawlPageDetailTargetConfig
@@ -1150,6 +1159,14 @@ export interface CrawlPageDetailTargetConfig extends CrawlCommonConfig {
11501159
}
11511160
```
11521161
1162+
**默认值**
1163+
1164+
- headers: undefined
1165+
- method: undefined
1166+
- priority: undefined
1167+
- viewport: undefined
1168+
- fingerprint: undefined
1169+
11531170
##### CrawlDataDetailTargetConfig
11541171
11551172
```ts
@@ -1164,6 +1181,16 @@ export interface CrawlDataDetailTargetConfig extends CrawlCommonConfig {
11641181
}
11651182
```
11661183
1184+
**默认值**
1185+
1186+
- method: 'GET'
1187+
1188+
- headers: undefined
1189+
- params: undefined
1190+
- data: undefined
1191+
- priority: undefined
1192+
- fingerprint: undefined
1193+
11671194
##### CrawlFileDetailTargetConfig
11681195
11691196
```ts
@@ -1178,6 +1205,15 @@ export interface CrawlFileDetailTargetConfig extends CrawlCommonConfig {
11781205
}
11791206
```
11801207
1208+
**默认值**
1209+
1210+
- headers: undefined
1211+
- priority: undefined
1212+
- storeDir: \_\_dirname
1213+
- fileName: string
1214+
- extension: string
1215+
- fingerprint: undefined
1216+
11811217
#### Advanced config
11821218
11831219
##### CrawlPageAdvancedConfig
@@ -1201,6 +1237,15 @@ export interface CrawlPageAdvancedConfig extends CrawlCommonConfig {
12011237
}
12021238
```
12031239
1240+
**默认值**
1241+
1242+
- intervalTime: undefined
1243+
- fingerprint: undefined
1244+
- headers: undefined
1245+
- cookies: undefined
1246+
- viewport: undefined
1247+
- onCrawlItemComplete: undefined
1248+
12041249
##### CrawlDataAdvancedConfig
12051250
12061251
```ts
@@ -1215,6 +1260,13 @@ export interface CrawlDataAdvancedConfig<T> extends CrawlCommonConfig {
12151260
}
12161261
```
12171262
1263+
**默认值**
1264+
1265+
- intervalTime: undefined
1266+
- fingerprint: undefined
1267+
- headers: undefined
1268+
- onCrawlItemComplete: undefined
1269+
12181270
##### CrawlFileAdvancedConfig
12191271
12201272
```ts
@@ -1237,6 +1289,16 @@ export interface CrawlFileAdvancedConfig extends CrawlCommonConfig {
12371289
}
12381290
```
12391291
1292+
**默认值**
1293+
1294+
- intervalTime: undefined
1295+
- fingerprint: undefined
1296+
- headers: undefined
1297+
- storeDir: \_\_dirname
1298+
- extension: string
1299+
- onCrawlItemComplete: undefined
1300+
- onBeforeSaveItemFile: undefined
1301+
12401302
#### StartPollingConfig
12411303
12421304
```ts
@@ -1247,6 +1309,12 @@ export interface StartPollingConfig {
12471309
}
12481310
```
12491311
1312+
**默认值**
1313+
1314+
- d: undefined
1315+
- h: undefined
1316+
- m: undefined
1317+
12501318
#### Crawl other config
12511319
12521320
##### CrawlCommonConfig
@@ -1259,6 +1327,12 @@ export interface CrawlCommonConfig {
12591327
}
12601328
```
12611329
1330+
**默认值**
1331+
1332+
- timeout: 10000
1333+
- proxy: undefined
1334+
- maxRetry: 0
1335+
12621336
##### DetailTargetFingerprintCommon
12631337
12641338
```ts
@@ -1272,6 +1346,15 @@ export interface DetailTargetFingerprintCommon {
12721346
}
12731347
```
12741348
1349+
**默认值**
1350+
1351+
- userAgent: undefined
1352+
- ua: undefined
1353+
- platform: undefined
1354+
- platformVersion: undefined
1355+
- mobile: undefined
1356+
- acceptLanguage: undefined
1357+
12751358
##### AdvancedFingerprintCommon
12761359
12771360
```ts
@@ -1285,6 +1368,15 @@ export interface AdvancedFingerprintCommon {
12851368
}
12861369
```
12871370
1371+
**默认值**
1372+
1373+
- userAgents: undefined
1374+
- uas: undefined
1375+
- platforms: undefined
1376+
- platformVersions: undefined
1377+
- mobiles: undefined
1378+
- acceptLanguages: undefined
1379+
12881380
##### Mobile
12891381
12901382
```ts

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
"crawl",
1313
"crawler",
1414
"spider",
15-
"flexible",
1615
"fingerprint",
16+
"flexible",
1717
"multifunction"
1818
],
1919
"main": "src/index.ts",
@@ -32,7 +32,7 @@
3232
"dependencies": {
3333
"chalk": "4.1.2",
3434
"https-proxy-agent": "^5.0.1",
35-
"puppeteer": "19.9.1",
35+
"puppeteer": "19.10.0",
3636
"x-crawl": "link:"
3737
},
3838
"devDependencies": {

0 commit comments

Comments
 (0)