Skip to content

Commit 7207954

Browse files
committed
Add script to compress the datasets
1 parent eac7c9f commit 7207954

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

dist.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import os
2+
import tarfile
3+
4+
def compress_dataset(dir, name, files):
5+
tgz_file = os.path.join(dir, name + '.tgz')
6+
with tarfile.open(tgz_file, "w:gz") as tar:
7+
for file in files:
8+
path = os.path.realpath(file)
9+
name = os.path.basename(path)
10+
tar.add(path, arcname=name)
11+
12+
def main():
13+
os.makedirs('dist', exist_ok=True)
14+
15+
datasets = ['FB15K', 'FB15K-237', 'WN18', 'WN18RR', 'YAGO3-10']
16+
for dataset in datasets:
17+
compress_dataset('dist', dataset + '-ID', [
18+
dataset + '/edges_as_id_all.tsv',
19+
dataset + '/edges_as_id_train.tsv',
20+
dataset + '/edges_as_id_test.tsv',
21+
dataset + '/edges_as_id_valid.tsv',
22+
dataset + '/map_entity_id_to_text.tsv',
23+
dataset + '/map_relation_id_to_text.tsv'
24+
])
25+
compress_dataset('dist', dataset, [
26+
dataset + '/edges_as_text_all.tsv',
27+
dataset + '/edges_as_text_train.tsv',
28+
dataset + '/edges_as_text_test.tsv',
29+
dataset + '/edges_as_text_valid.tsv',
30+
])
31+
32+
33+
if __name__ == '__main__':
34+
main()

0 commit comments

Comments
 (0)