Skip to content

Commit 7149650

Browse files
mgerhardyTiCoKH
authored andcommitted
DEVTOOLS: use the selenium module to work around the cloudflare bot protection
don't assume a python binary in PATH - but use the python from env made the scripts executable
1 parent 7b9fb96 commit 7149650

File tree

2 files changed

+60
-31
lines changed

2 files changed

+60
-31
lines changed

devtools/create_achievements/create_achievements.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ usage() {
1717

1818
add_steam() {
1919
if ! test -f "gen/steam-$1.ini"; then
20-
python steam_achievements.py ${VERBOSE:+"-v"} --steamid "$1"
20+
./steam_achievements.py ${VERBOSE:+"-v"} --steamid "$1"
2121
echo -----------------------------------------
2222
fi
2323
}
2424

2525
add_steamlike_gog() {
2626
if ! test -f "gen/galaxy-$2.ini"; then
27-
python steam_achievements.py ${VERBOSE:+"-v"} --steamid "$1" --saveasgalaxyid "$2"
27+
./steam_achievements.py ${VERBOSE:+"-v"} --steamid "$1" --saveasgalaxyid "$2"
2828
echo -----------------------------------------
2929
fi
3030
}

devtools/create_achievements/steam_achievements.py

100644100755
Lines changed: 58 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,12 @@
1515
import codecs
1616
import argparse
1717
import requests
18-
from requests_html import HTMLSession
18+
import logging
19+
from selenium import webdriver
20+
from selenium.webdriver.chrome.service import Service
21+
import requests
22+
from selenium.webdriver.common.by import By
23+
from lxml import etree
1924

2025
# For Stats only English strings exists on Steam side.
2126
# - stats info is collected from SteamDB, we collect lists of <id, [comment], initial value>
@@ -24,7 +29,7 @@
2429
# This script is currently using various sources of information for Steam achievements:
2530
# - achievements info collection starts from SteamDB, we collect lists of <id, title, [comment], hidden flag>
2631
# - if there are hidden achievements, we look up their English "comment" values on 3rd party site (achievementstats.com)
27-
# - there is an additional call to SteamDB to collect list of achivements translations
32+
# - there is an additional call to SteamDB to collect list of achievements translations
2833
# - if there are no hidden achievements, we use Global Statistics at steamcommunity.com to collect non-English strings
2934
# - if there are hidden achievements, we use User Statistics at steamcommunity.com to collect non-English strings
3035

@@ -66,7 +71,14 @@
6671
parser.add_argument("--steamid", required=True, default="631570", type=int, help="Steam game id")
6772
parser.add_argument("--saveasgalaxyid", type=int, help="GOG Galaxy game id")
6873
parser.add_argument("-v", "--verbose", action="store_true")
74+
parser.add_argument("-d", "--debug", action="store_true")
6975
args = parser.parse_args()
76+
service = Service("/usr/bin/chromedriver")
77+
driver = webdriver.Chrome(service=service)
78+
stringify = etree.XPath("string()")
79+
80+
if args.debug:
81+
logging.basicConfig(level=logging.DEBUG)
7082

7183
def log(msg):
7284
global args
@@ -77,48 +89,63 @@ def err(msg):
7789
sys.stderr.write(msg + "\n")
7890
sys.exit(127)
7991

92+
def create_xpath_object(html_body):
93+
tree = etree.HTML(html_body)
94+
return etree.XPathEvaluator(tree)
95+
96+
def get(url):
97+
driver.get(url)
98+
return driver.page_source
99+
100+
def xpath(response, xpath):
101+
x = create_xpath_object(response)
102+
return x(xpath)
103+
104+
def html(response):
105+
return repr(response)
106+
80107
def parse_steamdb_info(url):
81108
log("query {0}".format(url))
82-
response = HTMLSession().get(url)
109+
response = get(url)
83110

84-
info_rows = response.html.xpath("//div[@id='info']/table/tbody/tr/td")
111+
info_rows = xpath(response, "//div[@id='info']/table/tbody/tr/td")
85112
info_columns = 2 # id, text,
86113
info_entries = int(len(info_rows) / info_columns)
87114
if info_entries == 0:
88-
log(repr(response.html.raw_html))
89-
err("found NO information data")
115+
log(html(response))
116+
err("found NO information data for {0}".format(url))
90117

91118
FORMAT_CHECKER_STRING = "Store Release Date"
92119
is_format_ok = False
93120

94121
langs = "English"
95122
for i in range(info_entries):
96123
idx = info_columns * i
97-
info_key = info_rows[idx + 0].text.strip()
98-
info_value = info_rows[idx + 1].text.strip()
99-
if info_key == FORMAT_CHECKER_STRING:
124+
info_key = stringify(info_rows[idx + 0]).strip()
125+
info_value = stringify(info_rows[idx + 1]).strip()
126+
if info_key == FORMAT_CHECKER_STRING or info_key == "Steam Release Date":
100127
is_format_ok = True
101128
if info_key == "Achievement Languages":
102129
langs = info_value
103130

104131
if not is_format_ok:
105-
log(repr(response.html.raw_html))
132+
log(html(response))
106133
err("found NO {0}\nEntries: {1}".format(FORMAT_CHECKER_STRING, [i.text for i in info_rows][::2]))
107134

108135
return langs.split(", ")
109136

110137
def parse_steamdb_stats(url):
111138
log("query {0}".format(url))
112-
response = HTMLSession().get(url)
139+
response = get(url)
113140

114-
achievements_rows = response.html.xpath("//tr[starts-with(@id, 'achievement-')]/td")
141+
achievements_rows = xpath(response, "//tr[starts-with(@id, 'achievement-')]/td")
115142
achievements_columns = 3 # name, text, img
116143
achievements_entries = int(len(achievements_rows) / achievements_columns)
117144
if achievements_entries == 0:
118-
log(repr(response.html.raw_html))
119-
err("found NO achievements")
145+
log(html(response))
146+
err("found NO achievements for {0}".format(url))
120147

121-
stats_rows = response.html.xpath("//tr[starts-with(@id, 'stat-')]/td")
148+
stats_rows = xpath(response, "//tr[starts-with(@id, 'stat-')]/td")
122149
stats_columns = 3 # name, text, default value
123150
stats_entries = int(len(stats_rows) / stats_columns)
124151

@@ -136,10 +163,10 @@ def parse_steamdb_stats(url):
136163
for i in range(achievements_entries):
137164
idx = achievements_columns * i
138165
name = achievements_rows[idx + 0].text.strip()
139-
texts = achievements_rows[idx + 1].text.strip().split("\n")
166+
texts = stringify(achievements_rows[idx + 1]).strip().replace("\n\n", "\n").split("\n")
140167

141168
if len(texts) != 2:
142-
log(repr(response.html.raw_html))
169+
log(html(response))
143170
err("Unexpected description format: {0}".format(texts))
144171

145172
title = texts[0]
@@ -153,13 +180,13 @@ def parse_steamdb_stats(url):
153180

154181
def parse_steamcommunity_stats(url):
155182
log("query {0}".format(url))
156-
response = HTMLSession().get(url)
183+
response = get(url)
157184

158-
achievements_rows = response.html.xpath("//div[@class='achieveRow']")
185+
achievements_rows = xpath(response, "//div[@class='achieveRow']")
159186
achievements_entries = len(achievements_rows)
160187
if achievements_entries == 0:
161-
log(repr(response.html.raw_html))
162-
err("found NO achievements")
188+
log(html(response))
189+
err("found NO achievements for {0}".format(url))
163190

164191
translation = {}
165192
for idx in range(achievements_entries):
@@ -168,13 +195,13 @@ def parse_steamcommunity_stats(url):
168195
descrs = achievements_rows[idx].xpath(".//div[@class='achieveTxt']/h5/text()")
169196

170197
if len(imgs) != 1:
171-
log(repr(response.html.raw_html))
198+
log(html(response))
172199
err("Unexpected xpath result: expected exactly one img tag per achievement")
173200
if len(titles) != 1:
174-
log(repr(response.html.raw_html))
201+
log(html(response))
175202
err("Unexpected xpath result: expected exactly one h3 tag per achievement")
176203
if len(descrs) > 1:
177-
log(repr(response.html.raw_html))
204+
log(html(response))
178205
err("Unexpected xpath result: expected zero or one h5 tag per achievement")
179206

180207
translation[imgs[0]] = (titles[0].strip(), descrs[0].strip() if descrs else None)
@@ -183,18 +210,18 @@ def parse_steamcommunity_stats(url):
183210

184211
def parse_achievementstats_stats(url):
185212
log("query {0}".format(url))
186-
response = HTMLSession().get(url)
213+
response = get(url)
187214

188-
tables = response.html.xpath("//table")
215+
tables = xpath(response, "//table")
189216
if len(tables) != 1:
190-
log(repr(response.html.raw_html))
217+
log(html(response))
191218
err("Unexpected xpath result: expected exactly one table tag on page")
192219

193-
achievements_rows = response.html.xpath("//tbody/tr/td")
220+
achievements_rows = xpath(response, "//tbody/tr/td")
194221
achievements_columns = 6 # icon, name, text, date, point, report
195222
achievements_entries = int(len(achievements_rows) / achievements_columns)
196223
if achievements_entries == 0:
197-
log(repr(response.html.raw_html))
224+
log(html(response))
198225
err("found NO achievements")
199226

200227
result = {}
@@ -303,7 +330,9 @@ def write_ini(fname, achievements, stats):
303330
else:
304331
FNAME = "steam-{0}.ini".format(args.steamid)
305332
write_ini(os.path.join("gen", FNAME), achievements, stats)
333+
driver.quit()
306334

307335
except requests.exceptions.RequestException as e:
308336
print(e)
337+
driver.quit()
309338
sys.exit(127)

0 commit comments

Comments
 (0)