15
15
import codecs
16
16
import argparse
17
17
import requests
18
- from requests_html import HTMLSession
18
+ import logging
19
+ from selenium import webdriver
20
+ from selenium .webdriver .chrome .service import Service
21
+ import requests
22
+ from selenium .webdriver .common .by import By
23
+ from lxml import etree
19
24
20
25
# For Stats only English strings exists on Steam side.
21
26
# - stats info is collected from SteamDB, we collect lists of <id, [comment], initial value>
24
29
# This script is currently using various sources of information for Steam achievements:
25
30
# - achievements info collection starts from SteamDB, we collect lists of <id, title, [comment], hidden flag>
26
31
# - if there are hidden achievements, we look up their English "comment" values on 3rd party site (achievementstats.com)
27
- # - there is an additional call to SteamDB to collect list of achivements translations
32
+ # - there is an additional call to SteamDB to collect list of achievements translations
28
33
# - if there are no hidden achievements, we use Global Statistics at steamcommunity.com to collect non-English strings
29
34
# - if there are hidden achievements, we use User Statistics at steamcommunity.com to collect non-English strings
30
35
66
71
parser .add_argument ("--steamid" , required = True , default = "631570" , type = int , help = "Steam game id" )
67
72
parser .add_argument ("--saveasgalaxyid" , type = int , help = "GOG Galaxy game id" )
68
73
parser .add_argument ("-v" , "--verbose" , action = "store_true" )
74
+ parser .add_argument ("-d" , "--debug" , action = "store_true" )
69
75
args = parser .parse_args ()
76
+ service = Service ("/usr/bin/chromedriver" )
77
+ driver = webdriver .Chrome (service = service )
78
+ stringify = etree .XPath ("string()" )
79
+
80
+ if args .debug :
81
+ logging .basicConfig (level = logging .DEBUG )
70
82
71
83
def log (msg ):
72
84
global args
@@ -77,48 +89,63 @@ def err(msg):
77
89
sys .stderr .write (msg + "\n " )
78
90
sys .exit (127 )
79
91
92
+ def create_xpath_object (html_body ):
93
+ tree = etree .HTML (html_body )
94
+ return etree .XPathEvaluator (tree )
95
+
96
+ def get (url ):
97
+ driver .get (url )
98
+ return driver .page_source
99
+
100
+ def xpath (response , xpath ):
101
+ x = create_xpath_object (response )
102
+ return x (xpath )
103
+
104
+ def html (response ):
105
+ return repr (response )
106
+
80
107
def parse_steamdb_info (url ):
81
108
log ("query {0}" .format (url ))
82
- response = HTMLSession (). get (url )
109
+ response = get (url )
83
110
84
- info_rows = response . html . xpath ("//div[@id='info']/table/tbody/tr/td" )
111
+ info_rows = xpath (response , "//div[@id='info']/table/tbody/tr/td" )
85
112
info_columns = 2 # id, text,
86
113
info_entries = int (len (info_rows ) / info_columns )
87
114
if info_entries == 0 :
88
- log (repr (response . html . raw_html ))
89
- err ("found NO information data" )
115
+ log (html (response ))
116
+ err ("found NO information data for {0}" . format ( url ) )
90
117
91
118
FORMAT_CHECKER_STRING = "Store Release Date"
92
119
is_format_ok = False
93
120
94
121
langs = "English"
95
122
for i in range (info_entries ):
96
123
idx = info_columns * i
97
- info_key = info_rows [idx + 0 ]. text .strip ()
98
- info_value = info_rows [idx + 1 ]. text .strip ()
99
- if info_key == FORMAT_CHECKER_STRING :
124
+ info_key = stringify ( info_rows [idx + 0 ]) .strip ()
125
+ info_value = stringify ( info_rows [idx + 1 ]) .strip ()
126
+ if info_key == FORMAT_CHECKER_STRING or info_key == "Steam Release Date" :
100
127
is_format_ok = True
101
128
if info_key == "Achievement Languages" :
102
129
langs = info_value
103
130
104
131
if not is_format_ok :
105
- log (repr (response . html . raw_html ))
132
+ log (html (response ))
106
133
err ("found NO {0}\n Entries: {1}" .format (FORMAT_CHECKER_STRING , [i .text for i in info_rows ][::2 ]))
107
134
108
135
return langs .split (", " )
109
136
110
137
def parse_steamdb_stats (url ):
111
138
log ("query {0}" .format (url ))
112
- response = HTMLSession (). get (url )
139
+ response = get (url )
113
140
114
- achievements_rows = response . html . xpath ("//tr[starts-with(@id, 'achievement-')]/td" )
141
+ achievements_rows = xpath (response , "//tr[starts-with(@id, 'achievement-')]/td" )
115
142
achievements_columns = 3 # name, text, img
116
143
achievements_entries = int (len (achievements_rows ) / achievements_columns )
117
144
if achievements_entries == 0 :
118
- log (repr (response . html . raw_html ))
119
- err ("found NO achievements" )
145
+ log (html (response ))
146
+ err ("found NO achievements for {0}" . format ( url ) )
120
147
121
- stats_rows = response . html . xpath ("//tr[starts-with(@id, 'stat-')]/td" )
148
+ stats_rows = xpath (response , "//tr[starts-with(@id, 'stat-')]/td" )
122
149
stats_columns = 3 # name, text, default value
123
150
stats_entries = int (len (stats_rows ) / stats_columns )
124
151
@@ -136,10 +163,10 @@ def parse_steamdb_stats(url):
136
163
for i in range (achievements_entries ):
137
164
idx = achievements_columns * i
138
165
name = achievements_rows [idx + 0 ].text .strip ()
139
- texts = achievements_rows [idx + 1 ]. text . strip ().split ("\n " )
166
+ texts = stringify ( achievements_rows [idx + 1 ]). strip (). replace ( " \n \n " , " \n " ).split ("\n " )
140
167
141
168
if len (texts ) != 2 :
142
- log (repr (response . html . raw_html ))
169
+ log (html (response ))
143
170
err ("Unexpected description format: {0}" .format (texts ))
144
171
145
172
title = texts [0 ]
@@ -153,13 +180,13 @@ def parse_steamdb_stats(url):
153
180
154
181
def parse_steamcommunity_stats (url ):
155
182
log ("query {0}" .format (url ))
156
- response = HTMLSession (). get (url )
183
+ response = get (url )
157
184
158
- achievements_rows = response . html . xpath ("//div[@class='achieveRow']" )
185
+ achievements_rows = xpath (response , "//div[@class='achieveRow']" )
159
186
achievements_entries = len (achievements_rows )
160
187
if achievements_entries == 0 :
161
- log (repr (response . html . raw_html ))
162
- err ("found NO achievements" )
188
+ log (html (response ))
189
+ err ("found NO achievements for {0}" . format ( url ) )
163
190
164
191
translation = {}
165
192
for idx in range (achievements_entries ):
@@ -168,13 +195,13 @@ def parse_steamcommunity_stats(url):
168
195
descrs = achievements_rows [idx ].xpath (".//div[@class='achieveTxt']/h5/text()" )
169
196
170
197
if len (imgs ) != 1 :
171
- log (repr (response . html . raw_html ))
198
+ log (html (response ))
172
199
err ("Unexpected xpath result: expected exactly one img tag per achievement" )
173
200
if len (titles ) != 1 :
174
- log (repr (response . html . raw_html ))
201
+ log (html (response ))
175
202
err ("Unexpected xpath result: expected exactly one h3 tag per achievement" )
176
203
if len (descrs ) > 1 :
177
- log (repr (response . html . raw_html ))
204
+ log (html (response ))
178
205
err ("Unexpected xpath result: expected zero or one h5 tag per achievement" )
179
206
180
207
translation [imgs [0 ]] = (titles [0 ].strip (), descrs [0 ].strip () if descrs else None )
@@ -183,18 +210,18 @@ def parse_steamcommunity_stats(url):
183
210
184
211
def parse_achievementstats_stats (url ):
185
212
log ("query {0}" .format (url ))
186
- response = HTMLSession (). get (url )
213
+ response = get (url )
187
214
188
- tables = response . html . xpath ("//table" )
215
+ tables = xpath (response , "//table" )
189
216
if len (tables ) != 1 :
190
- log (repr (response . html . raw_html ))
217
+ log (html (response ))
191
218
err ("Unexpected xpath result: expected exactly one table tag on page" )
192
219
193
- achievements_rows = response . html . xpath ("//tbody/tr/td" )
220
+ achievements_rows = xpath (response , "//tbody/tr/td" )
194
221
achievements_columns = 6 # icon, name, text, date, point, report
195
222
achievements_entries = int (len (achievements_rows ) / achievements_columns )
196
223
if achievements_entries == 0 :
197
- log (repr (response . html . raw_html ))
224
+ log (html (response ))
198
225
err ("found NO achievements" )
199
226
200
227
result = {}
@@ -303,7 +330,9 @@ def write_ini(fname, achievements, stats):
303
330
else :
304
331
FNAME = "steam-{0}.ini" .format (args .steamid )
305
332
write_ini (os .path .join ("gen" , FNAME ), achievements , stats )
333
+ driver .quit ()
306
334
307
335
except requests .exceptions .RequestException as e :
308
336
print (e )
337
+ driver .quit ()
309
338
sys .exit (127 )
0 commit comments