diff --git a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md index e3866cfcb..1a8ed5907 100644 --- a/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_python/04_downloading_html.md @@ -5,7 +5,9 @@ description: Lesson about building a Python application for watching prices. Usi slug: /scraping-basics-python/downloading-html --- +import CodeBlock from '@theme/CodeBlock'; import Exercises from '../scraping_basics/_exercises.mdx'; +import LegoExercise from '!!raw-loader!roa-loader!./exercises/scrape_lego.py'; **In this lesson we'll start building a Python application for watching prices. As a first step, we'll use the HTTPX library to download HTML code of a product listing page.** @@ -139,26 +141,17 @@ Letting our program visibly crash on error is enough for our purposes. Now, let' -### Scrape AliExpress +### Scrape LEGO -Download HTML of a product listing page, but this time from a real world e-commerce website. For example this page with AliExpress search results: +Download HTML of a product listing page, but this time from a real world e-commerce website. For example this page with LEGO search results: ```text -https://www.aliexpress.com/w/wholesale-darth-vader.html +https://www.lego.com/themes/star-wars ```
Solution - - ```py - import httpx - - url = "https://www.aliexpress.com/w/wholesale-darth-vader.html" - response = httpx.get(url) - response.raise_for_status() - print(response.text) - ``` - + {LegoExercise.code}
### Save downloaded HTML as a file diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py b/sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py new file mode 100644 index 000000000..57fabfc95 --- /dev/null +++ b/sources/academy/webscraping/scraping_basics_python/exercises/scrape_lego.py @@ -0,0 +1,6 @@ +import httpx + +url = "https://www.lego.com/themes/star-wars" +response = httpx.get(url) +response.raise_for_status() +print(response.text) diff --git a/sources/academy/webscraping/scraping_basics_python/exercises/test.bats b/sources/academy/webscraping/scraping_basics_python/exercises/test.bats new file mode 100644 index 000000000..0e6c2b538 --- /dev/null +++ b/sources/academy/webscraping/scraping_basics_python/exercises/test.bats @@ -0,0 +1,8 @@ +setup() { + DIR=sources/academy/webscraping/scraping_basics_python/exercises +} + +@test "outputs the HTML with Star Wars products" { + run uv run --with httpx python "$DIR/scrape_lego.py" + [[ "$output" == *"Millennium Falcon"* ]] +}