@@ -135,63 +135,59 @@ npm install x-crawl
135
135
Take the automatic acquisition of some photos of experiences and homes around the world every day as an example:
136
136
137
137
``` js
138
- // 1.Import module ES/CJS
138
+ // 1. Import module ES/CJS
139
139
import xCrawl from ' x-crawl'
140
140
141
- // 2.Create a crawler instance
142
- const myXCrawl = xCrawl ({ maxRetry: 3 , intervalTime: { max: 3000 , min: 2000 } })
141
+ // 2. Create a crawler instance
142
+ const myXCrawl = xCrawl ({ maxRetry: 3 , intervalTime: { max: 2000 , min: 1000 } })
143
143
144
- // 3.Set the crawling task
144
+ // 3. Set the crawling task
145
145
/*
146
146
Call the startPolling API to start the polling function,
147
147
and the callback function will be called every other day
148
148
*/
149
149
myXCrawl .startPolling ({ d: 1 }, async (count , stopPolling ) => {
150
- // Call crawlPage API to crawl Page
151
- const res = await myXCrawl .crawlPage ({
150
+ // Call the crawlPage API to crawl the page
151
+ const pageResults = await myXCrawl .crawlPage ({
152
152
targets: [
153
- ' https://www.airbnb.cn/s/experiences' ,
153
+ ' https://www.airbnb.cn/s/*/ experiences' ,
154
154
' https://www.airbnb.cn/s/plus_homes'
155
155
],
156
156
viewport: { width: 1920 , height: 1080 }
157
157
})
158
158
159
- // Store the image URL to targets
160
- const targets = []
161
- const elSelectorMap = [' ._fig15y' , ' ._aov0j6' ]
162
- for (const item of res) {
159
+ // Obtain the image URL by traversing the crawled page results
160
+ const imgUrls = []
161
+ for (const item of pageResults) {
163
162
const { id } = item
164
163
const { page } = item .data
164
+ const elSelector = id === 1 ? ' .i9cqrtb' : ' .c4mnd7m'
165
165
166
- // Wait for the page to load
167
- await new Promise (( r ) => setTimeout (r, 300 ) )
166
+ // wait for the page element to appear
167
+ await page . waitForSelector (elSelector )
168
168
169
- // Gets the URL of the page image
170
- const urls = await page .$$eval (` ${ elSelectorMap[id - 1 ] } img` , (imgEls ) => {
171
- return imgEls .map ((item ) => item .src )
172
- } )
173
- targets .push (... urls)
169
+ // Get the URL of the page image
170
+ const urls = await page .$$eval (` ${ elSelector } picture img` , (imgEls ) =>
171
+ imgEls .map ((item ) => item .src )
172
+ )
173
+ imgUrls .push (... urls . slice ( 0 , 8 ) )
174
174
175
- // Close page
175
+ // close the page
176
176
page .close ()
177
177
}
178
178
179
- // Call the crawlFile API to crawl pictures
180
- myXCrawl .crawlFile ({ targets, storeDirs: ' ./upload' })
179
+ // Call crawlFile API to crawl pictures
180
+ await myXCrawl .crawlFile ({ targets: imgUrls , storeDirs: ' ./upload' })
181
181
})
182
182
```
183
183
184
184
running result:
185
185
186
186
<div align =" center " >
187
- <img src =" https://raw.githubusercontent.com/coder-hxl/x-crawl/main/assets/en/crawler.png " />
188
- </div >
189
-
190
- <div align =" center " >
191
- <img src =" https://raw.githubusercontent.com/coder-hxl/x-crawl/main/assets/en/crawler-result.png " />
187
+ <img src =" https://raw.githubusercontent.com/coder-hxl/x-crawl/main/assets/example.gif " />
192
188
</div >
193
189
194
- ** Note:** Do not crawl at will , you can check the ** robots.txt** protocol before crawling. This is just to demonstrate how to use x-crawl.
190
+ ** Note:** Please do not crawl randomly , you can check the ** robots.txt** protocol before crawling. The class name of the website may change, this is just to demonstrate how to use x-crawl.
195
191
196
192
## Core Concepts
197
193
0 commit comments