|
| 1 | +import os |
| 2 | +import tarfile |
| 3 | + |
| 4 | +def compress_dataset(dir, name, files): |
| 5 | + tgz_file = os.path.join(dir, name + '.tgz') |
| 6 | + with tarfile.open(tgz_file, "w:gz") as tar: |
| 7 | + for file in files: |
| 8 | + path = os.path.realpath(file) |
| 9 | + name = os.path.basename(path) |
| 10 | + tar.add(path, arcname=name) |
| 11 | + |
| 12 | +def main(): |
| 13 | + os.makedirs('dist', exist_ok=True) |
| 14 | + |
| 15 | + datasets = ['FB15K', 'FB15K-237', 'WN18', 'WN18RR', 'YAGO3-10'] |
| 16 | + for dataset in datasets: |
| 17 | + compress_dataset('dist', dataset + '-ID', [ |
| 18 | + dataset + '/edges_as_id_all.tsv', |
| 19 | + dataset + '/edges_as_id_train.tsv', |
| 20 | + dataset + '/edges_as_id_test.tsv', |
| 21 | + dataset + '/edges_as_id_valid.tsv', |
| 22 | + dataset + '/map_entity_id_to_text.tsv', |
| 23 | + dataset + '/map_relation_id_to_text.tsv' |
| 24 | + ]) |
| 25 | + compress_dataset('dist', dataset, [ |
| 26 | + dataset + '/edges_as_text_all.tsv', |
| 27 | + dataset + '/edges_as_text_train.tsv', |
| 28 | + dataset + '/edges_as_text_test.tsv', |
| 29 | + dataset + '/edges_as_text_valid.tsv', |
| 30 | + ]) |
| 31 | + |
| 32 | + |
| 33 | +if __name__ == '__main__': |
| 34 | + main() |
0 commit comments