coder-hxl
diff --git a/‎README.md
Lines changed: 4 additions & 6 deletions b/‎README.md
Lines changed: 4 additions & 6 deletions
diff --git a/‎assets/cn/crawler.png
32.1 KB b/‎assets/cn/crawler.png
32.1 KB
diff --git a/‎assets/en/crawler-result.png
91.3 KB b/‎assets/en/crawler-result.png
91.3 KB
diff --git a/‎assets/en/crawler.png
63.9 KB b/‎assets/en/crawler.png
63.9 KB
diff --git a/‎docs/cn.md
Lines changed: 5 additions & 7 deletions b/‎docs/cn.md
Lines changed: 5 additions & 7 deletions
diff --git a/‎package.json
Lines changed: 0 additions & 2 deletions b/‎package.json
Lines changed: 0 additions & 2 deletions
@@ -106,14 +106,12 @@ const myXCrawl = xCrawl({
 */
 myXCrawl.startPolling({ d: 1 }, async (count, stopPolling) => {
   // Call crawlPage API to crawl Page
-  const { jsdom, page } = await myXCrawl.crawlPage('https://zh.airbnb.com/s/*/plus_homes')
-
-  // Get the cover image elements for Plus listings
-  const imgEls = jsdom.window.document.querySelector('.a1stauiv')?.querySelectorAll('picture img')
+  const { page } = await myXCrawl.crawlPage('https://zh.airbnb.com/s/*/plus_homes')
 
   // set request configuration
-  const requestConfig: string[] = []
-  imgEls?.forEach((item) => requestConfig.push(item.src))
+  const requestConfig = await page.$$eval('picture img', (img) => {
+    return img.map((item) => item.src)
+  })
 
   // Call the crawlFile API to crawl pictures
   myXCrawl.crawlFile({ requestConfig, fileConfig: { storeDir: './upload' } })
 
@@ -103,14 +103,12 @@ const myXCrawl = xCrawl({
 // 调用 startPolling API 开始轮询功能，每隔一天会调用回调函数
 myXCrawl.startPolling({ d: 1 }, async (count, stopPolling) => {
   // 调用 crawlPage API 爬取 Page
-  const { jsdom, page } = await myXCrawl.crawlPage('https://www.bilibili.com/guochuang/')
+  const { page } = await myXCrawl.crawlPage('https://www.bilibili.com/guochuang/')
 
-  // 获取轮播图片元素
-  const imgEls = jsdom.window.document.querySelectorAll('.chief-recom-item img')
-
-  // 设置请求配置
-  const requestConfig = []
-  imgEls.forEach((item) => requestConfig.push(`https:${item.src}`))
+  // 获取轮播图片元素的 URL ，设置请求配置
+  const requestConfig = await page.$$eval('.chief-recom-item img', (imgEls) =>
+    imgEls.map((item) => item.src)
+  )
 
   // 调用 crawlFile API 爬取图片
   myXCrawl.crawlFile({ requestConfig, fileConfig: { storeDir: './upload' } })
 
@@ -18,7 +18,6 @@
   "dependencies": {
     "chalk": "4.1.2",
     "https-proxy-agent": "^5.0.1",
-    "jsdom": "^21.1.0",
     "puppeteer": "^19.7.2",
     "x-crawl": "link:"
   },
@@ -29,7 +28,6 @@
     "@rollup/plugin-babel": "^6.0.3",
     "@rollup/plugin-run": "^3.0.1",
     "@rollup/plugin-terser": "^0.4.0",
-    "@types/jsdom": "^21.0.0",
     "@types/node": "^18.11.18",
     "@typescript-eslint/eslint-plugin": "^5.48.2",
     "@typescript-eslint/parser": "^5.48.2",