Skip to content

Mondo Ontology not downloading #503

@kmanpearl

Description

@kmanpearl

Not sure if this is an issue caused by the issues we were having with Mondo having terms with a missing field or if it's a new problem.

code:

from obnb.data import DisGeNET
disease_labels = DisGeNET(root='data')

output:

---------------------------------------------------------------------------
BadGzipFile                               Traceback (most recent call last)
Cell In[9], line 1
----> 1 disease_labels = DisGeNET(root='data')

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/disgenet.py:33, in DisGeNET.__init__(self, root, dsi_min, dsi_max, dpi_min, dpi_max, min_size, max_size, overlap, jaccard, data_sources, gene_id_converter, **kwargs)
     30 self.jaccard = jaccard
     31 self.overlap = overlap
---> 33 super().__init__(
     34     root,
     35     annotation_factory=DisGeNETAnnotation,
     36     ontology_factory=MondoDiseaseOntology,
     37     annotation_kwargs={
     38         "data_sources": data_sources,
     39         "dsi_min": dsi_min,
     40         "dsi_max": dsi_max,
     41         "dpi_min": dpi_min,
     42         "dpi_max": dpi_max,
     43         "gene_id_converter": gene_id_converter,
     44     },
     45     ontology_kwargs={"xref_prefix": "UMLS"},
     46     **kwargs,
     47 )

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:30, in BaseAnnotatedOntologyData.__init__(self, root, annotation_factory, ontology_factory, annotation_kwargs, ontology_kwargs, **kwargs)
     28 self.annotation_kwargs = annotation_kwargs
     29 self.ontology_kwargs = ontology_kwargs
---> 30 super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:96, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
     94     with log_file_context(self.plogger, self.info_log_path):
     95         self._download()
---> 96         self._process()
     97 else:
     98     self._download_archive()

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:274, in BaseData._process(self)
    272 # Process data
    273 self.plogger.info(f"Start processing {self.classname}...")
--> 274 self.process()
    276 # Pre-transform data
    277 if self.pre_transform is not None:

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotated_ontology/base.py:53, in BaseAnnotatedOntologyData.process(self)
     45 # NOTE: Reprocess is not a valid option for annotation and ontology
     46 # data objects as we do not save the processed data. Similarly,
     47 # retransform is invalid as there is not transformation for them yet.
     48 opts = {
     49     "redownload": self.redownload,
     50     "version": self.version,
     51     "log_level": self.log_level,
     52 }
---> 53 ann = self.annotation_factory(self.root, **self.annotation_kwargs, **opts)
     54 ont = self.ontology_factory(self.root, **self.ontology_kwargs, **opts)
     56 annot = ann.data

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/disgenet.py:89, in DisGeNETAnnotation.__init__(self, root, data_sources, dsi_min, dsi_max, dpi_min, dpi_max, **kwargs)
     87 self.dpi_min = dpi_min
     88 self.dpi_max = dpi_max
---> 89 super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:14, in BaseAnnotationData.__init__(self, root, **kwargs)
     12 def __init__(self, root: str, **kwargs):
     13     """Initialize BaseAnnotationData."""
---> 14     super().__init__(root, **kwargs)

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:95, in BaseData.__init__(self, root, version, redownload, reprocess, retransform, log_level, pre_transform, transform, cache_transform, download_cache, gene_id_converter, **kwargs)
     93 if version == "latest":
     94     with log_file_context(self.plogger, self.info_log_path):
---> 95         self._download()
     96         self._process()
     97 else:

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/base.py:260, in BaseData._download(self)
    258 if self.redownload or not self.download_completed():
    259     self.plogger.info(f"Start downloading {self.classname}...")
--> 260     self.download()

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/data/annotation/base.py:30, in BaseAnnotationData.download(self)
     23 """Download raw annotation table.
     24 
     25 Note:
     26     The raw file is assumed to be gzipped.
     27 
     28 """
     29 self.plogger.info(f"Download annotation from: {self.annotation_url}")
---> 30 download_unzip(
     31     self.annotation_url,
     32     self.raw_dir,
     33     zip_type=self.annotation_file_zip_type,
     34     rename=self.raw_files[0],
     35     logger=self.plogger,
     36 )

File ~/miniconda3/envs/study_bias/lib/python3.12/site-packages/obnb/util/download.py:115, in download_unzip(url, root, zip_type, rename, logger)
    113 elif zip_type == "gzip":
    114     with open(path := osp.join(root, filename), "wb") as f:
--> 115         f.write(gzip.decompress(content))
    116     logger.info(f"File saved to {path!r}")
    117 elif zip_type == "none":

File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:627, in decompress(data)
    625 while True:
    626     fp = io.BytesIO(data)
--> 627     if _read_gzip_header(fp) is None:
    628         return b"".join(decompressed_members)
    629     # Use a zlib raw deflate compressor

File ~/miniconda3/envs/study_bias/lib/python3.12/gzip.py:456, in _read_gzip_header(fp)
    453     return None
    455 if magic != b'\037\213':
--> 456     raise BadGzipFile('Not a gzipped file (%r)' % magic)
    458 (method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))
    459 if method != 8:

BadGzipFile: Not a gzipped file (b'<!')

Other potentially helpful info:

contents of data/DisGeNET/info/run.log:

[INFO][2025-02-03 11:53:54,869][base][_process] Start processing DisGeNET...
[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...

contents of data/DisGeNETAnnotation/info:

[INFO][2025-02-03 11:53:54,872][base][_download] Start downloading DisGeNETAnnotation...
[INFO][2025-02-03 11:53:54,872][base][download] Download annotation from: https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:54,873][download][download_unzip] Downloading zip archive from https://www.disgenet.org/static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
[INFO][2025-02-03 11:53:56,125][download][download_unzip] Download completed, start unpacking...

data/DisGeNETAnnotation/raw/all_gene_disease_associations.tsv file exits. data/DisGeNET/raw/ is empty

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions