Skip to content

Commit 298f35a

Browse files
committed
Fixes for igbh dataset detection (#85)
* Support adding dependent CM script commands in CM dockerfile * Fix dataset-igbh register in cache * [Automated Commit] Format Codebase * Fix dataset-igbh register in cache * Fix dataset-igbh register in cache
1 parent d1420d0 commit 298f35a

File tree

2 files changed

+53
-42
lines changed

2 files changed

+53
-42
lines changed

script/get-dataset-igbh/_cm.yaml

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ prehook_deps:
4141
- CM_OUTDIRNAME
4242
force_cache: true
4343
enable_if_env:
44-
CM_DATASET_IGBH_TYPE:
45-
- 'full'
44+
CM_DATASET_IGBH_FULL_DOWNLOAD:
45+
- 'yes'
4646
names:
4747
- dae
4848
- paper-node-feat
@@ -60,8 +60,8 @@ prehook_deps:
6060
- CM_OUTDIRNAME
6161
force_cache: true
6262
enable_if_env:
63-
CM_DATASET_IGBH_TYPE:
64-
- 'full'
63+
CM_DATASET_IGBH_FULL_DOWNLOAD:
64+
- 'yes'
6565
names:
6666
- dae
6767
- paper-node-label19
@@ -79,8 +79,8 @@ prehook_deps:
7979
force_env_keys:
8080
- CM_OUTDIRNAME
8181
enable_if_env:
82-
CM_DATASET_IGBH_TYPE:
83-
- 'full'
82+
CM_DATASET_IGBH_FULL_DOWNLOAD:
83+
- 'yes'
8484
names:
8585
- dae
8686
- paper-node-label2k
@@ -98,8 +98,8 @@ prehook_deps:
9898
force_env_keys:
9999
- CM_OUTDIRNAME
100100
enable_if_env:
101-
CM_DATASET_IGBH_TYPE:
102-
- 'full'
101+
CM_DATASET_IGBH_FULL_DOWNLOAD:
102+
- 'yes'
103103
names:
104104
- dae
105105
- paper-id-index-mapping
@@ -118,8 +118,8 @@ prehook_deps:
118118
force_env_keys:
119119
- CM_OUTDIRNAME
120120
enable_if_env:
121-
CM_DATASET_IGBH_TYPE:
122-
- 'full'
121+
CM_DATASET_IGBH_FULL_DOWNLOAD:
122+
- 'yes'
123123
names:
124124
- dae
125125
- edge-index
@@ -138,8 +138,8 @@ prehook_deps:
138138
force_env_keys:
139139
- CM_OUTDIRNAME
140140
enable_if_env:
141-
CM_DATASET_IGBH_TYPE:
142-
- 'full'
141+
CM_DATASET_IGBH_FULL_DOWNLOAD:
142+
- 'yes'
143143
names:
144144
- dae
145145
- author-id-index-mapping
@@ -157,8 +157,8 @@ prehook_deps:
157157
force_env_keys:
158158
- CM_OUTDIRNAME
159159
enable_if_env:
160-
CM_DATASET_IGBH_TYPE:
161-
- 'full'
160+
CM_DATASET_IGBH_FULL_DOWNLOAD:
161+
- 'yes'
162162
names:
163163
- dae
164164
- author-node-feat
@@ -177,8 +177,8 @@ prehook_deps:
177177
force_env_keys:
178178
- CM_OUTDIRNAME
179179
enable_if_env:
180-
CM_DATASET_IGBH_TYPE:
181-
- 'full'
180+
CM_DATASET_IGBH_FULL_DOWNLOAD:
181+
- 'yes'
182182
names:
183183
- dae
184184
- conference-id-index-mapping
@@ -196,8 +196,8 @@ prehook_deps:
196196
force_env_keys:
197197
- CM_OUTDIRNAME
198198
enable_if_env:
199-
CM_DATASET_IGBH_TYPE:
200-
- 'full'
199+
CM_DATASET_IGBH_FULL_DOWNLOAD:
200+
- 'yes'
201201
names:
202202
- dae
203203
- conference-node-feat
@@ -216,8 +216,8 @@ prehook_deps:
216216
force_env_keys:
217217
- CM_OUTDIRNAME
218218
enable_if_env:
219-
CM_DATASET_IGBH_TYPE:
220-
- 'full'
219+
CM_DATASET_IGBH_FULL_DOWNLOAD:
220+
- 'yes'
221221
names:
222222
- dae
223223
- institute-id-index-mapping
@@ -235,8 +235,8 @@ prehook_deps:
235235
force_env_keys:
236236
- CM_OUTDIRNAME
237237
enable_if_env:
238-
CM_DATASET_IGBH_TYPE:
239-
- 'full'
238+
CM_DATASET_IGBH_FULL_DOWNLOAD:
239+
- 'yes'
240240
names:
241241
- dae
242242
- institute-node-feat
@@ -255,8 +255,8 @@ prehook_deps:
255255
force_env_keys:
256256
- CM_OUTDIRNAME
257257
enable_if_env:
258-
CM_DATASET_IGBH_TYPE:
259-
- 'full'
258+
CM_DATASET_IGBH_FULL_DOWNLOAD:
259+
- 'yes'
260260
names:
261261
- dae
262262
- journal-id-index-mapping
@@ -274,8 +274,8 @@ prehook_deps:
274274
force_env_keys:
275275
- CM_OUTDIRNAME
276276
enable_if_env:
277-
CM_DATASET_IGBH_TYPE:
278-
- 'full'
277+
CM_DATASET_IGBH_FULL_DOWNLOAD:
278+
- 'yes'
279279
names:
280280
- dae
281281
- journal-node-feat
@@ -294,8 +294,8 @@ prehook_deps:
294294
force_env_keys:
295295
- CM_OUTDIRNAME
296296
enable_if_env:
297-
CM_DATASET_IGBH_TYPE:
298-
- 'full'
297+
CM_DATASET_IGBH_FULL_DOWNLOAD:
298+
- 'yes'
299299
names:
300300
- dae
301301
- fos-id-index-mapping
@@ -313,8 +313,8 @@ prehook_deps:
313313
force_env_keys:
314314
- CM_OUTDIRNAME
315315
enable_if_env:
316-
CM_DATASET_IGBH_TYPE:
317-
- 'full'
316+
CM_DATASET_IGBH_FULL_DOWNLOAD:
317+
- 'yes'
318318
names:
319319
- dae
320320
- fos-node-feat
@@ -333,8 +333,8 @@ prehook_deps:
333333
force_env_keys:
334334
- CM_OUTDIRNAME
335335
enable_if_env:
336-
CM_DATASET_IGBH_TYPE:
337-
- 'full'
336+
CM_DATASET_IGBH_FULL_DOWNLOAD:
337+
- 'yes'
338338
names:
339339
- dae
340340
- author-to-institute-edge-index
@@ -353,8 +353,8 @@ prehook_deps:
353353
force_env_keys:
354354
- CM_OUTDIRNAME
355355
enable_if_env:
356-
CM_DATASET_IGBH_TYPE:
357-
- 'full'
356+
CM_DATASET_IGBH_FULL_DOWNLOAD:
357+
- 'yes'
358358
names:
359359
- dae
360360
- paper-published-journal-edge-index
@@ -373,8 +373,8 @@ prehook_deps:
373373
force_env_keys:
374374
- CM_OUTDIRNAME
375375
enable_if_env:
376-
CM_DATASET_IGBH_TYPE:
377-
- 'full'
376+
CM_DATASET_IGBH_FULL_DOWNLOAD:
377+
- 'yes'
378378
names:
379379
- dae
380380
- paper-topic-fos-edge-index
@@ -393,8 +393,8 @@ prehook_deps:
393393
force_env_keys:
394394
- CM_OUTDIRNAME
395395
enable_if_env:
396-
CM_DATASET_IGBH_TYPE:
397-
- 'full'
396+
CM_DATASET_IGBH_FULL_DOWNLOAD:
397+
- 'yes'
398398
names:
399399
- dae
400400
- paper-venue-conference-edge-index
@@ -413,8 +413,8 @@ prehook_deps:
413413
force_env_keys:
414414
- CM_OUTDIRNAME
415415
enable_if_env:
416-
CM_DATASET_IGBH_TYPE:
417-
- 'full'
416+
CM_DATASET_IGBH_FULL_DOWNLOAD:
417+
- 'yes'
418418
names:
419419
- dae
420420
- paper-written-by-author-edge-index

script/get-dataset-igbh/customize.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ def preprocess(i):
1919
graph_folder = os.path.join(
2020
env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT')
2121

22+
if env.get('CM_DATASET_IGBH_PATH',
23+
'') != '': # skip download, just register in cache
24+
env['CM_DATASET_IGBH_OUT_PATH'] = env['CM_DATASET_IGBH_PATH']
25+
return {'return': 0}
26+
2227
download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd())
2328

2429
env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc
@@ -31,14 +36,20 @@ def preprocess(i):
3136
run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \
3237
f" tools/download_igbh_test.py --target-path {download_loc} "
3338

39+
else:
40+
env['CM_DATASET_IGBH_FULL_DOWNLOAD'] = 'yes'
41+
3442
# split seeds
3543
run_cmd += x_sep + \
36-
f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} "
44+
f"""{
45+
env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {
46+
env['CM_DATASET_IGBH_SIZE']} """
3747

3848
# compress graph(for glt implementation)
3949
if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes":
4050
run_cmd += x_sep + \
41-
f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}"
51+
f"""{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}
52+
"""
4253

4354
env['CM_RUN_CMD'] = run_cmd
4455

0 commit comments

Comments
 (0)