-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
Not sure if this is an issue caused by the issues we were having with Mondo having terms with a missing field or if it's a new problem.
code:
from obnb.data import DisGeNET
disease_labels = DisGeNET(root='data')
output:
---------------------------------------------------------------------------
BadGzipFile Traceback (most recent call last)
Cell In[9], line 1
----> 1 disease_labels = DisGeNET(root='data')
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/disgenet.py:33, in DisGeNET.__init__(self, root, dsi_min, dsi_max, dpi_min, dpi_max, min_size, max_size, overlap, jaccard, data_sources, gene_id_converter, **kwargs)
30 self.jaccard = jaccard
31 self.overlap = overlap
---> 33 super().__init__(
34 root,
35 annotation_factory=DisGeNETAnnotation,
36 ontology_factory=MondoDiseaseOntology,
37 annotation_kwargs={
38 "data_sources": data_sources,
39 "dsi_min": dsi_min,
40 "dsi_max": dsi_max,
41 "dpi_min": dpi_min,
42 "dpi_max": dpi_max,
43 "gene_id_converter": gene_id_converter,
44 },
45 ontology_kwargs={"xref_prefix": "UMLS"},
46 **kwargs,
47 )
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:30, in BaseAnnotatedOntologyData.__init__(self, root, annotation_factory, ontology_factory, annotation_kwargs, ontology_kwargs, **kwargs)
28 self.annotation_kwargs = annotation_kwargs
29 self.ontology_kwargs = ontology_kwargs
---> 30 super().__init__(root, **kwargs)
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:96, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
94 with log_file_context(self.plogger, self.info_log_path):
95 self._download()
---> 96 self._process()
97 else:
98 self._download_archive()
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:274, in BaseData._process(self)
272 # Process data
273 self.plogger.info(f"Start processing {self.classname}...")
--> 274 self.process()
276 # Pre-transform data
277 if self.pre_transform is not None:
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:53, in BaseAnnotatedOntologyData.process(self)
45 # NOTE: Reprocess is not a valid option for annotation and ontology
46 # data objects as we do not save the processed data. Similarly,
47 # retransform is invalid as there is not transformation for them yet.
48 opts = {
49 "redownload": self.redownload,
50 "version": self.version,
51 "log_level": self.log_level,
52 }
---> 53 ann = self.annotation_factory(self.root, **self.annotation_kwargs, **opts)
54 ont = self.ontology_factory(self.root, **self.ontology_kwargs, **opts)
56 annot = ann.data
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/disgenet.py:89, in DisGeNETAnnotation.__init__(self, root, data_sources, dsi_min, dsi_max, dpi_min, dpi_max, **kwargs)
87 self.dpi_min = dpi_min
88 self.dpi_max = dpi_max
---> 89 super().__init__(root, **kwargs)
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:14, in BaseAnnotationData.__init__(self, root, **kwargs)
12 def __init__(self, root: str, **kwargs):
13 """Initialize BaseAnnotationData."""
---> 14 super().__init__(root, **kwargs)
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:95, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
93 if version == "latest":
94 with log_file_context(self.plogger, self.info_log_path):
---> 95 self._download()
96 self._process()
97 else:
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:260, in BaseData._download(self)
258 if self.redownload or not self.download_completed():
259 self.plogger.info(f"Start downloading {self.classname}...")
--> 260 self.download()
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:30, in BaseAnnotationData.download(self)
23 """Download raw annotation table.
24
25 Note:
26 The raw file is assumed to be gzipped.
27
28 """
29 self.plogger.info(f"Download annotation from: {self.annotation_url}")
---> 30 download_unzip(
31 self.annotation_url,
32 self.raw_dir,
33 zip_type=self.annotation_file_zip_type,
34 rename=self.raw_files[0],
35 logger=self.plogger,
36 )
File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/download.py:115, in download_unzip(url, root, zip_type, rename, logger)
113 elif zip_type == "gzip":
114 with open(path := osp.join(root, filename), "wb") as f:
--> 115 f.write(gzip.decompress(content))
116 logger.info(f"File saved to {path!r}")
117 elif zip_type == "none":
File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:627, in decompress(data)
625 while True:
626 fp = io.BytesIO(data)
--> 627 if _read_gzip_header(fp) is None:
628 return b"".join(decompressed_members)
629 # Use a zlib raw deflate compressor
File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:456, in _read_gzip_header(fp)
453 return None
455 if magic != b'\037\213':
--> 456 raise BadGzipFile('Not a gzipped file (%r)' % magic)
458 (method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))
459 if method != 8:
BadGzipFile: Not a gzipped file (b'<!')
Other potentially helpful info:
contents of data/DisGeNET/info/run.log:
[INFO][2025-02-03 11:53:54,869][base][_process] Start processing DisGeNET...
[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...
contents of data/DisGeNETAnnotation/info:
[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...
data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv file exits. data/DisGeNET/raw/ is empty
Metadata
Metadata
Assignees
Labels
No labels