From 4bc22ad958ef345ca8114b903631fb8eb0723c3e Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Fri, 16 May 2025 12:16:03 +0200 Subject: [PATCH 1/3] fix: update intro, lesson titles, and descriptions to mention JS --- .../04_downloading_html.md | 6 +++--- .../05_parsing_html.md | 6 +++--- .../06_locating_elements.md | 6 +++--- .../07_extracting_data.md | 4 ++-- .../08_saving_data.md | 6 +++--- .../09_getting_links.md | 6 +++--- .../scraping_basics_javascript2/10_crawling.md | 6 +++--- .../11_scraping_variants.md | 4 ++-- .../scraping_basics_javascript2/12_framework.md | 4 ++-- .../scraping_basics_javascript2/13_platform.md | 4 ++-- .../scraping_basics_javascript2/index.md | 16 ++++++++-------- 11 files changed, 34 insertions(+), 34 deletions(-) diff --git a/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md index ec361214f..1c2f53651 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md @@ -1,14 +1,14 @@ --- -title: Downloading HTML with Python +title: Downloading HTML with Node.js sidebar_label: Downloading HTML -description: Lesson about building a Python application for watching prices. Using the HTTPX library to download HTML code of a product listing page. +description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to download HTML code of a product listing page. slug: /scraping-basics-javascript2/downloading-html unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson we'll start building a Python application for watching prices. As a first step, we'll use the HTTPX library to download HTML code of a product listing page.** +**In this lesson we'll start building a Node.js application for watching prices. As a first step, we'll use the /TBD/ library to download HTML code of a product listing page.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md b/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md index 81aaf6778..3a5de6368 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md @@ -1,14 +1,14 @@ --- -title: Parsing HTML with Python +title: Parsing HTML with Node.js sidebar_label: Parsing HTML -description: Lesson about building a Python application for watching prices. Using the Beautiful Soup library to parse HTML code of a product listing page. +description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to parse HTML code of a product listing page. slug: /scraping-basics-javascript2/parsing-html unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson we'll look for products in the downloaded HTML. We'll use BeautifulSoup to turn the HTML into objects which we can work with in our Python program.** +**In this lesson we'll look for products in the downloaded HTML. We'll use /TBD/ to turn the HTML into objects which we can work with in our Node.js program.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md b/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md index ef85a2612..8fd29410f 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md @@ -1,14 +1,14 @@ --- -title: Locating HTML elements with Python +title: Locating HTML elements with Node.js sidebar_label: Locating HTML elements -description: Lesson about building a Python application for watching prices. Using the Beautiful Soup library to locate products on the product listing page. +description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to locate products on the product listing page. slug: /scraping-basics-javascript2/locating-elements unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson we'll locate product data in the downloaded HTML. We'll use BeautifulSoup to find those HTML elements which contain details about each product, such as title or price.** +**In this lesson we'll locate product data in the downloaded HTML. We'll use /TBD/ to find those HTML elements which contain details about each product, such as title or price.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md b/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md index 81a375dc5..5a156a682 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md @@ -1,7 +1,7 @@ --- -title: Extracting data from HTML with Python +title: Extracting data from HTML with Node.js sidebar_label: Extracting data from HTML -description: Lesson about building a Python application for watching prices. Using string manipulation to extract and clean data scraped from the product listing page. +description: Lesson about building a Node.js application for watching prices. Using string manipulation to extract and clean data scraped from the product listing page. slug: /scraping-basics-javascript2/extracting-data unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md b/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md index b2c027a8c..ba3871f05 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md @@ -1,12 +1,12 @@ --- -title: Saving data with Python +title: Saving data with Node.js sidebar_label: Saving data -description: Lesson about building a Python application for watching prices. Using standard library to save data scraped from product listing pages in popular formats such as CSV or JSON. +description: Lesson about building a Node.js application for watching prices. Using /TBD/ to save data scraped from product listing pages in popular formats such as CSV or JSON. slug: /scraping-basics-javascript2/saving-data unlisted: true --- -**In this lesson, we'll save the data we scraped in the popular formats, such as CSV or JSON. We'll use Python's standard library to export the files.** +**In this lesson, we'll save the data we scraped in the popular formats, such as CSV or JSON. We'll use /TBD/ to export the files.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md b/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md index 9d2a41333..03637445f 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md @@ -1,14 +1,14 @@ --- -title: Getting links from HTML with Python +title: Getting links from HTML with Node.js sidebar_label: Getting links from HTML -description: Lesson about building a Python application for watching prices. Using the Beautiful Soup library to locate links to individual product pages. +description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to locate links to individual product pages. slug: /scraping-basics-javascript2/getting-links unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson, we'll locate and extract links to individual product pages. We'll use BeautifulSoup to find the relevant bits of HTML.** +**In this lesson, we'll locate and extract links to individual product pages. We'll use /TBD/ to find the relevant bits of HTML.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md b/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md index f46b0ec63..381fb61c4 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md @@ -1,14 +1,14 @@ --- -title: Crawling websites with Python +title: Crawling websites with Node.js sidebar_label: Crawling websites -description: Lesson about building a Python application for watching prices. Using the HTTPX library to follow links to individual product pages. +description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to follow links to individual product pages. slug: /scraping-basics-javascript2/crawling unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson, we'll follow links to individual product pages. We'll use HTTPX to download them and BeautifulSoup to process them.** +**In this lesson, we'll follow links to individual product pages. We'll use /TBD/ to download them and /TBD/ to process them.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md b/sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md index 0c68ea5b7..d0cc9e955 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md @@ -1,7 +1,7 @@ --- -title: Scraping product variants with Python +title: Scraping product variants with Node.js sidebar_label: Scraping product variants -description: Lesson about building a Python application for watching prices. Using browser DevTools to figure out how to extract product variants and exporting them as separate items. +description: Lesson about building a Node.js application for watching prices. Using browser DevTools to figure out how to extract product variants and exporting them as separate items. slug: /scraping-basics-javascript2/scraping-variants unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/12_framework.md b/sources/academy/webscraping/scraping_basics_javascript2/12_framework.md index 3cf1f02c7..fe80fb5fc 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/12_framework.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/12_framework.md @@ -1,7 +1,7 @@ --- -title: Using a scraping framework with Python +title: Using a scraping framework with Node.js sidebar_label: Using a framework -description: Lesson about building a Python application for watching prices. Using the Crawlee framework to simplify creating a scraper. +description: Lesson about building a Node.js application for watching prices. Using the Crawlee framework to simplify creating a scraper. slug: /scraping-basics-javascript2/framework unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/13_platform.md b/sources/academy/webscraping/scraping_basics_javascript2/13_platform.md index e1bb36f3f..475f36a17 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/13_platform.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/13_platform.md @@ -1,7 +1,7 @@ --- -title: Using a scraping platform with Python +title: Using a scraping platform with Node.js sidebar_label: Using a platform -description: Lesson about building a Python application for watching prices. Using the Apify platform to deploy a scraper. +description: Lesson about building a Node.js application for watching prices. Using the Apify platform to deploy a scraper. slug: /scraping-basics-javascript2/platform unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/index.md b/sources/academy/webscraping/scraping_basics_javascript2/index.md index 03c7dde99..1158f717b 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/index.md @@ -9,32 +9,32 @@ unlisted: true import DocCardList from '@theme/DocCardList'; -**Learn how to use Python to extract information from websites in this practical course, starting from the absolute basics.** +**Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics.** --- -In this course we'll use Python to create an application for watching prices. It'll be able to scrape all product pages of an e-commerce website and record prices. Data from several runs of such program would be useful for seeing trends in price changes, detecting discounts, etc. +In this course we'll use JavaScript to create an application for watching prices. It'll be able to scrape all product pages of an e-commerce website and record prices. Data from several runs of such program would be useful for seeing trends in price changes, detecting discounts, etc. ![E-commerce listing on the left, JSON with data on the right](./images/scraping.webp) ## What we'll do - Inspect pages using browser DevTools. -- Download web pages using the HTTPX library. -- Extract data from web pages using the Beautiful Soup library. +- Download web pages using the /TBD/ library. +- Extract data from web pages using the /TBD/ library. - Save extracted data in various formats, e.g. CSV which MS Excel or Google Sheets can open. - Follow links programmatically (crawling). - Save time and effort with frameworks, such as Crawlee, and scraping platforms, such as Apify. ## Who this course is for -Anyone with basic knowledge of developing programs in Python who wants to start with web scraping can take this course. The course does not expect you to have any prior knowledge of web technologies or scraping. +Anyone with basic knowledge of developing programs in JavaScript who wants to start with web scraping can take this course. The course does not expect you to have any prior knowledge of web technologies or scraping. ## Requirements -- A macOS, Linux, or Windows machine with a web browser and Python installed. -- Familiarity with Python basics: variables, conditions, loops, functions, strings, lists, dictionaries, files, classes, and exceptions. -- Comfort with importing from the Python standard library, using virtual environments, and installing dependencies with `pip`. +- A macOS, Linux, or Windows machine with a web browser and Node.js installed. +- Familiarity with JavaScript basics: variables, conditions, loops, functions, strings, lists, dictionaries, files, classes, and exceptions. +- Comfort with building a Node.js package and installing dependencies with `npm`. - Familiarity with running commands in Terminal (macOS/Linux) or Command Prompt (Windows). ## You may want to know From 3e0953548bc2c3fe18ce7f2257a0cb3905b46ca8 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Fri, 16 May 2025 13:08:29 +0200 Subject: [PATCH 2/3] feat: decide about the technologies --- .../scraping_basics_javascript2/04_downloading_html.md | 4 ++-- .../scraping_basics_javascript2/05_parsing_html.md | 4 ++-- .../scraping_basics_javascript2/06_locating_elements.md | 4 ++-- .../scraping_basics_javascript2/08_saving_data.md | 4 ++-- .../scraping_basics_javascript2/09_getting_links.md | 4 ++-- .../webscraping/scraping_basics_javascript2/10_crawling.md | 4 ++-- .../webscraping/scraping_basics_javascript2/index.md | 6 +++--- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md index 1c2f53651..44d582e5f 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md @@ -1,14 +1,14 @@ --- title: Downloading HTML with Node.js sidebar_label: Downloading HTML -description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to download HTML code of a product listing page. +description: Lesson about building a Node.js application for watching prices. Using the Fetch API to download HTML code of a product listing page. slug: /scraping-basics-javascript2/downloading-html unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson we'll start building a Node.js application for watching prices. As a first step, we'll use the /TBD/ library to download HTML code of a product listing page.** +**In this lesson we'll start building a Node.js application for watching prices. As a first step, we'll use the Fetch API to download HTML code of a product listing page.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md b/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md index 3a5de6368..6f96ed2c7 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md @@ -1,14 +1,14 @@ --- title: Parsing HTML with Node.js sidebar_label: Parsing HTML -description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to parse HTML code of a product listing page. +description: Lesson about building a Node.js application for watching prices. Using the Cheerio library to parse HTML code of a product listing page. slug: /scraping-basics-javascript2/parsing-html unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson we'll look for products in the downloaded HTML. We'll use /TBD/ to turn the HTML into objects which we can work with in our Node.js program.** +**In this lesson we'll look for products in the downloaded HTML. We'll use Cheerio to turn the HTML into objects which we can work with in our Node.js program.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md b/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md index 8fd29410f..2aa3100e7 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md @@ -1,14 +1,14 @@ --- title: Locating HTML elements with Node.js sidebar_label: Locating HTML elements -description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to locate products on the product listing page. +description: Lesson about building a Node.js application for watching prices. Using the Cheerio library to locate products on the product listing page. slug: /scraping-basics-javascript2/locating-elements unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson we'll locate product data in the downloaded HTML. We'll use /TBD/ to find those HTML elements which contain details about each product, such as title or price.** +**In this lesson we'll locate product data in the downloaded HTML. We'll use Cheerio to find those HTML elements which contain details about each product, such as title or price.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md b/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md index ba3871f05..e1ad7365a 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md @@ -1,12 +1,12 @@ --- title: Saving data with Node.js sidebar_label: Saving data -description: Lesson about building a Node.js application for watching prices. Using /TBD/ to save data scraped from product listing pages in popular formats such as CSV or JSON. +description: Lesson about building a Node.js application for watching prices. Using the json2csv library to save data scraped from product listing pages in both JSON and CSV. slug: /scraping-basics-javascript2/saving-data unlisted: true --- -**In this lesson, we'll save the data we scraped in the popular formats, such as CSV or JSON. We'll use /TBD/ to export the files.** +**In this lesson, we'll save the data we scraped in the popular formats, such as CSV or JSON. We'll use the json2csv library to export the files.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md b/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md index 03637445f..5c80796a0 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md @@ -1,14 +1,14 @@ --- title: Getting links from HTML with Node.js sidebar_label: Getting links from HTML -description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to locate links to individual product pages. +description: Lesson about building a Node.js application for watching prices. Using the Cheerio library to locate links to individual product pages. slug: /scraping-basics-javascript2/getting-links unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson, we'll locate and extract links to individual product pages. We'll use /TBD/ to find the relevant bits of HTML.** +**In this lesson, we'll locate and extract links to individual product pages. We'll use Cheerio to find the relevant bits of HTML.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md b/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md index 381fb61c4..552cb6414 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md @@ -1,14 +1,14 @@ --- title: Crawling websites with Node.js sidebar_label: Crawling websites -description: Lesson about building a Node.js application for watching prices. Using the /TBD/ library to follow links to individual product pages. +description: Lesson about building a Node.js application for watching prices. Using the Fetch API to follow links to individual product pages. slug: /scraping-basics-javascript2/crawling unlisted: true --- import Exercises from './_exercises.mdx'; -**In this lesson, we'll follow links to individual product pages. We'll use /TBD/ to download them and /TBD/ to process them.** +**In this lesson, we'll follow links to individual product pages. We'll use the Fetch API to download them and Cheerio to process them.** --- diff --git a/sources/academy/webscraping/scraping_basics_javascript2/index.md b/sources/academy/webscraping/scraping_basics_javascript2/index.md index 1158f717b..c3d9893a1 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/index.md @@ -20,9 +20,9 @@ In this course we'll use JavaScript to create an application for watching prices ## What we'll do - Inspect pages using browser DevTools. -- Download web pages using the /TBD/ library. -- Extract data from web pages using the /TBD/ library. -- Save extracted data in various formats, e.g. CSV which MS Excel or Google Sheets can open. +- Download web pages using the Fetch API. +- Extract data from web pages using the Cheerio library. +- Save extracted data in various formats (e.g. CSV which MS Excel or Google Sheets can open) using the json2csv library. - Follow links programmatically (crawling). - Save time and effort with frameworks, such as Crawlee, and scraping platforms, such as Apify. From e395e5cf82f593562fe6cb8096c0bc22129f35ab Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Fri, 16 May 2025 15:30:53 +0200 Subject: [PATCH 3/3] fix: update intro to be about JS --- .../academy/webscraping/scraping_basics_javascript2/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/academy/webscraping/scraping_basics_javascript2/index.md b/sources/academy/webscraping/scraping_basics_javascript2/index.md index c3d9893a1..c7dcb96b5 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/index.md @@ -28,7 +28,7 @@ In this course we'll use JavaScript to create an application for watching prices ## Who this course is for -Anyone with basic knowledge of developing programs in JavaScript who wants to start with web scraping can take this course. The course does not expect you to have any prior knowledge of web technologies or scraping. +Anyone with basic knowledge of developing programs in JavaScript who wants to start with web scraping can take this course. The course does not expect you to have any prior knowledge of other web technologies or scraping. ## Requirements