Skip to content

Commit 74fe63a

Browse files
committed
Fixed obtaining character name when character is a unit
1 parent 3736369 commit 74fe63a

File tree

3 files changed

+19
-7
lines changed

3 files changed

+19
-7
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ a simple scraper to download all card images from https://mikucolle.gamerch.com/
88

99
- Install requirements from requirements.txt: ```pip install -r requirements.txt```
1010
- Run main.py: ```python main.py```
11+
- The images should be saved in the ```out``` folder, grouped by character:
1112

13+
![scraper screenshot](docs/folders.png)

docs/folders.png

116 KB
Loading

main.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,29 @@ def download_images(url_list):
5656

5757
if html is not None:
5858
bs = BeautifulSoup(html, "html.parser")
59-
image = bs.find("img", {"class": "ui_wikidb_main_img"})
60-
character = bs.find("a", {"class": "ui_page_match"})
61-
name = bs.find("h2", {"id": "js_wikidb_main_name"})
6259

60+
# image link
61+
image = bs.find("img", {"class": "ui_wikidb_main_img"})
6362
image_url = image['src']
64-
character_name = character['title']
65-
filename = f"{name.text}.jpg"
66-
character_dir = os.path.join(OUTPUT_FOLDER, character_name)
6763

64+
# character
65+
character = bs.select_one('.ui_wikidb_top_pc > p:nth-child(2) > span:nth-child(1)')
66+
next_elem = character.findNext()
67+
if next_elem.name == "a":
68+
character_name = next_elem['title'].strip()
69+
else:
70+
character_name = next_elem.previous_sibling.text.strip()
71+
72+
# card name
73+
card_name = bs.find("h2", {"id": "js_wikidb_main_name"})
74+
75+
# output path
76+
filename = f"{card_name.text.strip()}.jpg"
77+
character_dir = os.path.join(OUTPUT_FOLDER, character_name)
6878
if not os.path.exists(character_dir):
6979
os.makedirs(character_dir, exist_ok=True)
70-
7180
output_file = os.path.join(character_dir, filename)
81+
7282
urllib.request.urlretrieve(image_url, filename=output_file)
7383

7484

0 commit comments

Comments
 (0)