diff --git a/DEPLOYING.md b/DEPLOYING.md index 7b0c7c2..f086f77 100644 --- a/DEPLOYING.md +++ b/DEPLOYING.md @@ -32,7 +32,6 @@ Below are the steps to perform a production deployment of BioAPI. BioAPI uses three genomic databases for its operation. These databases must be loaded in MongoDB. You can import all the databases in two ways: - ### Import using public DB backup (recommended) To import all databases in MongoDB: @@ -67,7 +66,6 @@ To import all databases in MongoDB: 4. Stop services with the command `docker compose -f docker-compose.dev.yml down` 5. Roll up the changes in the `docker-compose.dev.yml` file to remove the backup file from the `volumes` section. Restart all the services again. - ### Manually import the different databases Alternatively (but **not recommended** due to high computational demands) you can run a separate ETL process to download from source, process and import the databases into MongoDB. @@ -101,6 +99,7 @@ docker-compose up -d ``` By default, BioAPI runs on `localhost:8000`. +Test BioAPI with Swagger on `localhost:8000/apidocs` If you want to stop all services, you can execute: diff --git a/bio-api/bioapi.py b/bio-api/bioapi.py index abba7e8..dba63d1 100755 --- a/bio-api/bioapi.py +++ b/bio-api/bioapi.py @@ -3,15 +3,17 @@ import json import gzip import logging + +from flask import Flask, jsonify, make_response, abort, render_template, request +from flasgger import Swagger, swag_from from db import get_mongo_connection from concurrent.futures import ThreadPoolExecutor import configparser -import urllib.parse from typing import List, Dict, Optional, Any -from flask import Flask, jsonify, make_response, abort, render_template, request from utils import map_gene from gprofiler import GProfiler + # Gets production flag IS_DEBUG: bool = os.environ.get('DEBUG', 'true') == 'true' @@ -515,9 +517,6 @@ def bfs_on_terms(term_id, relations: Optional[List[str]] = None, general_depth=0 return list(graph.values()) -# PharmGKB - - def cancer_drugs_related_to_gene(gene: str) -> List: """ Gets all cancer related drugs associated with a gene . @@ -527,6 +526,7 @@ def cancer_drugs_related_to_gene(gene: str) -> List: collection_pharm = mydb["pharmgkb"] return list(collection_pharm.find({"genes": gene}, {"_id": 0})) + def get_data_from_oncokb(genes: List[str], query: str) -> Dict[str, Dict[str, Any]]: """ Gets all data from OncoKB database associated with a gene list. @@ -643,33 +643,41 @@ def associated_string_genes(gene_symbol: str, min_combined_score: int = 400) -> return res -# Documentation of included services -services = [ - {"name": "Genes symbols validator", "url": "[POST] /gene-symbols"}, - {"name": "Genes symbols finder", "url": "[GET] /gene-symbols-finder"}, - {"name": "Genes information", "url": "[POST] /information-of-genes"}, - {"name": "Gene Groups", "url": "[GET] /genes-of-its-group/"}, - {"name": "Genes of a metabolic pathway", "url": "[GET] /pathway-genes//"}, - {"name": "Metabolic pathways from different genes", "url": "[POST] /pathways-in-common"}, - {"name": "Gene expression", "url": "[POST] /expression-of-genes"}, - {"name": "Therapies and actionable genes in cancer", "url": "[POST] /information-of-oncokb"}, - {"name": "Gene Ontology terms related to a list of genes", "url": "[POST] /genes-to-terms"}, - {"name": "Gene Ontology terms related to another specific term", "url": "[POST] /related-terms"}, - {"name": "Cancer related drugs", "url": "[POST] /drugs-pharm-gkb"}, - {"name": "Predicted functional associations network", "url": "[POST] /string-relations"}, - {"name": "Drugs that regulate a gene", "url": "[GET] /drugs-regulating-gene/"} -] - - def create_app(): # Creates and configures the app flask_app = Flask(__name__, instance_relative_config=True) + swagger_config = { + "headers": [ + ], + "openapi": "3.0.0", + "specs": [ + { + "endpoint": "swagger", + "route": "/apispec.json", + "rule_filter": lambda rule: True, + "model_filter": lambda tag: True + } + ], + "title": "BioAPI", + "uiversion": 3, + "version": VERSION, + "termsOfService": False, + "swagger_ui": True, + "static_url_path": "/", + "specs_route": "/apidocs/", + "description": """ +## A powerful abstraction of genomics databases. +BioAPI is part of the Multiomix project. For more information, visit our [website](https://omicsdatascience.org/). +To contribute: [OmicsDatascience](https://github.com/omics-datascience/BioAPI)""" + } + + Swagger(flask_app, config=swagger_config) + # Endpoints @flask_app.route("/") def homepage(): - # return render_template('index.html', title=f"API v{VERSION}", services=services) - return render_template('homepage.html', version=VERSION, services=services) + return render_template('homepage.html', version=VERSION) @flask_app.route("/ping") def ping_ok(): @@ -678,12 +686,13 @@ def ping_ok(): return make_response(output, 200, headers) @flask_app.route("/gene-symbols", methods=['POST']) + @swag_from("swagger_specs/geneSymbols.yml") def gene_symbols(): """Receives a list of gene IDs in any standard and returns the standardized corresponding gene IDs. In case it is not found it returns an empty list for the specific not found gene.""" response = {} if request.method == 'POST': - body = request.get_json() # type: ignore + body = request.get_json() if "gene_ids" not in body: abort(400, "gene_ids is mandatory") @@ -700,16 +709,17 @@ def gene_symbols(): return make_response(response, 200, headers) @flask_app.route("/gene-symbols-finder/", methods=['GET']) + @swag_from("swagger_specs/geneSymbolFinder.yml") def gene_symbol_finder(): """Takes a string of any length and returns a list of genes that contain that search criteria.""" if "query" not in request.args: abort(400, "'query' parameter is mandatory") else: - query = request.args.get('query') # type: ignore + query = request.args.get('query') limit = 50 if "limit" in request.args: - limit_arg = request.args.get('limit') # type: ignore + limit_arg = request.args.get('limit') if limit_arg.isnumeric(): limit = int(limit_arg) else: @@ -722,6 +732,7 @@ def gene_symbol_finder(): abort(400, e) @flask_app.route("/information-of-genes", methods=['POST']) + @swag_from("swagger_specs/informationOfGenes.yml") def information_of_genes(): """Receives a list of gene IDs and returns information about them.""" body = request.get_json() # type: ignore @@ -739,7 +750,8 @@ def information_of_genes(): return make_response(response, 200, headers) @flask_app.route("/genes-of-its-group/", methods=['GET']) - def genes_in_the_same_group(gene_id): + @swag_from("swagger_specs/genesOfItsGroup.yml") + def genes_in_the_same_group(gene_id: str): response = {"gene_id": None, "groups": [], "locus_group": None, "locus_type": None} try: @@ -774,6 +786,7 @@ def genes_in_the_same_group(gene_id): return make_response(response, 200, headers) @flask_app.route("/pathway-genes//", methods=['GET']) + @swag_from("swagger_specs/genesOfMetabolicPathway.yml") def pathway_genes(pathway_source, pathway_id): if pathway_source.lower() not in PATHWAYS_SOURCES: abort(404, f'{pathway_source} is an invalid pathway source') @@ -782,6 +795,7 @@ def pathway_genes(pathway_source, pathway_id): return make_response(response, 200, headers) @flask_app.route("/pathways-in-common", methods=['POST']) + @swag_from("swagger_specs/pathwaysInCommon.yml") def pathways_in_common(): body = request.get_json() # type: ignore if "gene_ids" not in body: @@ -802,6 +816,7 @@ def pathways_in_common(): return make_response(response, 200, headers) @flask_app.route("/expression-of-genes", methods=['POST']) + @swag_from("swagger_specs/expressionOfGenes.yml") def expression_data_from_gtex(): body = request.get_json() # type: ignore @@ -839,6 +854,7 @@ def expression_data_from_gtex(): return jsonify(expression_data) @flask_app.route("/genes-to-terms", methods=['POST']) + @swag_from("swagger_specs/genesToTerms.yml") def genes_to_go_terms(): """Receives a list of genes and returns the related terms""" valid_filter_types = ["union", "intersection", "enrichment"] @@ -926,6 +942,7 @@ def genes_to_go_terms(): return jsonify(response) @flask_app.route("/related-terms", methods=['POST']) + @swag_from("swagger_specs/relatedTerms.yml") def related_terms(): """Receives a term and returns the related terms""" valid_ontology_types = ["biological_process", @@ -973,6 +990,7 @@ def related_terms(): return jsonify(response) @flask_app.route("/information-of-oncokb", methods=['POST']) + @swag_from("swagger_specs/informationOfOncokb.yml") def oncokb_data(): body = request.get_json() # type: ignore @@ -993,6 +1011,7 @@ def oncokb_data(): return jsonify(data) @flask_app.route("/drugs-pharm-gkb", methods=['POST']) + @swag_from("swagger_specs/cancerDrugsRelatedToGenes.yml") def cancer_drugs_related_to_genes(): """Receives genes and returns the related drugs""" response = {} @@ -1007,6 +1026,7 @@ def cancer_drugs_related_to_genes(): return jsonify(response) @flask_app.route("/string-relations", methods=['POST']) + @swag_from("swagger_specs/stringRelations.yml") def string_relations_to_gene(): body = request.get_json() optionals = {} @@ -1024,6 +1044,7 @@ def string_relations_to_gene(): return jsonify(res) @flask_app.route("/drugs-regulating-gene/", methods=['GET']) + @swag_from("swagger_specs/drugsRegulatingGene.yml") def drugs_regulating_gene(gene_id): return { "link": "https://go.drugbank.com/pharmaco/transcriptomics?q%5Bg%5B0%5D%5D%5Bm%5D=or&q%5Bg%5B0%5D%5D" diff --git a/bio-api/swagger_specs/cancerDrugsRelatedToGenes.yml b/bio-api/swagger_specs/cancerDrugsRelatedToGenes.yml new file mode 100644 index 0000000..3048334 --- /dev/null +++ b/bio-api/swagger_specs/cancerDrugsRelatedToGenes.yml @@ -0,0 +1,22 @@ +tags: + - Accionable Genes and Drugs +summary: "Cancer related drugs" +description: "Gets a list of drugs from the PharmGKB database related to a list of genes." +operationId: "Cancer related drugs" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + description: "List of gene identifiers." + items: + type: string + example: ["JAK2", "EGFR"] +responses: + 200: + description: "List of all information related to drugs and genes." + diff --git a/bio-api/swagger_specs/drugsRegulatingGene.yml b/bio-api/swagger_specs/drugsRegulatingGene.yml new file mode 100644 index 0000000..3b5dc71 --- /dev/null +++ b/bio-api/swagger_specs/drugsRegulatingGene.yml @@ -0,0 +1,16 @@ +tags: + - Accionable Genes and Drugs +summary: "Drugs that regulate a gene expression." +description: "Service that takes gene symbol and returns a link to https://go.drugbank.com with all the drugs that upregulate and down regulate its expresion. Useful for embeding." +operationId: "Drugs that regulate genetic expression." +parameters: + - in: path + name: gene_id + description: "Identifier of the gene." + required: true + schema: + type: string + example: "TP53" +responses: + 200: + description: "URL that points to the information on the DrugBank website." \ No newline at end of file diff --git a/bio-api/swagger_specs/expressionOfGenes.yml b/bio-api/swagger_specs/expressionOfGenes.yml new file mode 100644 index 0000000..4ad24d5 --- /dev/null +++ b/bio-api/swagger_specs/expressionOfGenes.yml @@ -0,0 +1,61 @@ +tags: + - Gene Expression +summary: "Gene expression" +description: "Gets gene expression in healthy tissue." +operationId: "Gene expression in healthy tissues" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + description: "List of gene identifiers." + items: + type: string + example: ["BRCA1", "BRCA2"] + tissue: + type: string + enum: + - Adipose Tissue + - Adrenal Gland + - Bladder + - Blood + - Blood Vessel + - Brain + - Breast + - Cervix Uteri + - Colon + - Esophagus + - Fallopian Tube + - Heart + - Kidney + - Liver + - Lung + - Muscle + - Nerve + - Ovary + - Pancreas + - Pituitary + - Prostate + - Salivary Gland + - Skin + - Small Intestine + - Spleen + - Stomach + - Testis + - Thyroid + - Uterus + - Vagina + example: "Skin" + type: + type: string + enum: + - json + - gzip + example: "gzip" +responses: + 200: + description: "Expression values ​​of each gene according to the GTEx database." \ No newline at end of file diff --git a/bio-api/swagger_specs/geneSymbolFinder.yml b/bio-api/swagger_specs/geneSymbolFinder.yml new file mode 100644 index 0000000..973ab14 --- /dev/null +++ b/bio-api/swagger_specs/geneSymbolFinder.yml @@ -0,0 +1,23 @@ +tags: + - Gene Nomenclature +summary: "Gene symbol finder" +description: "Service that takes a string of any length and returns a list of genes that contain that search criteria." +operationId: "Gene symbol finder" +parameters: + - in: query + name: query + description: "Gene search string." + required: true + schema: + type: string + example: "TP" + - in: query + name: limit + description: "Limit the number of results returned (Default 50)." + required: false + schema: + type: integer + example: 10 +responses: + 200: + description: "List of genes containing that search criterion in bioinformatics databases." diff --git a/bio-api/swagger_specs/geneSymbols.yml b/bio-api/swagger_specs/geneSymbols.yml new file mode 100644 index 0000000..0efd424 --- /dev/null +++ b/bio-api/swagger_specs/geneSymbols.yml @@ -0,0 +1,21 @@ +tags: + - Gene Nomenclature +summary: "Gene symbol validator" +description: "Searches the identifier of a list of genes of different genomics databases and returns the approved symbols according to HGNC nomenclature." +operationId: "Gene symbol" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + description: "List of gene identifiers." + items: + type: string + example: ["FANCS", "BRCC1"] +responses: + 200: + description: "HGNC approved gene symbols." diff --git a/bio-api/swagger_specs/genesOfItsGroup.yml b/bio-api/swagger_specs/genesOfItsGroup.yml new file mode 100644 index 0000000..1ae7c0b --- /dev/null +++ b/bio-api/swagger_specs/genesOfItsGroup.yml @@ -0,0 +1,16 @@ +tags: + - Genes Information +summary: "Gene Groups" +description: "Gets the identifier of a gene, validates it and then returns the group of genes to which it belongs according to HGNC, and all the other genes that belong to the same group." +operationId: "Gene Groups" +parameters: + - in: path + name: gene_id + description: "Identifier of the gene for any database." + required: true + schema: + type: string + example: "EGFR" +responses: + 200: + description: "Group of genes to which gene_id belongs according to HGNC." diff --git a/bio-api/swagger_specs/genesOfMetabolicPathway.yml b/bio-api/swagger_specs/genesOfMetabolicPathway.yml new file mode 100644 index 0000000..306fce3 --- /dev/null +++ b/bio-api/swagger_specs/genesOfMetabolicPathway.yml @@ -0,0 +1,35 @@ +tags: + - Metabolic Pathway +summary: "Genes of a metabolic pathway" +description: "Get the list of genes that are involved in a pathway for a given database." +operationId: "Genes of Metabolic Pathway" +parameters: + - in: path + name: pathway_source + description: "Database to query." + required: true + schema: + type: string + enum: + - kegg + - biocarta + - ehmn + - humancyc + - inoh + - netpath + - pid + - reactome + - smpdb + - signalink + - wikipathways + example: "kegg" + - in: path + name: pathway_id + description: "Pathway identifier in the source database." + required: true + schema: + type: string + example: "hsa00740" +responses: + 200: + description: "List of genes involved in the metabolic pathway." \ No newline at end of file diff --git a/bio-api/swagger_specs/genesToTerms.yml b/bio-api/swagger_specs/genesToTerms.yml new file mode 100644 index 0000000..f77138f --- /dev/null +++ b/bio-api/swagger_specs/genesToTerms.yml @@ -0,0 +1,48 @@ +tags: + - Gene Ontology +summary: "Gene Ontology terms related to a list of genes" +description: "Gets the list of related terms for a list of genes." +operationId: "Gene ontology terms" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + description: "List of gene identifiers." + items: + type: string + example: ["TMCO4"] + filter_type: + type: string + enum: + - intersection + - union + - enrichment + example: "intersection" + relation_type: + type: array + items: + type: string + enum: + - enables + - involved_in + - part_of + - located_in + example: ["enables","involved_in","part_of","located_in"] + ontology_type: + type: array + items: + type: string + enum: + - biological_process + - molecular_function + - cellular_component + example: ["biological_process", "molecular_function", "cellular_component"] +responses: + 200: + description: "The response you get is a list of GO terms that meet the query conditions." + diff --git a/bio-api/swagger_specs/informationOfGenes.yml b/bio-api/swagger_specs/informationOfGenes.yml new file mode 100644 index 0000000..acd7cc8 --- /dev/null +++ b/bio-api/swagger_specs/informationOfGenes.yml @@ -0,0 +1,21 @@ +tags: + - Genes Information +summary: "Genes information" +description: "From a list of valid genes, it obtains different information for the human reference genomes GRCh38 and GRCh37." +operationId: "Genes information" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + description: "List of gene identifiers." + items: + type: string + example: ["MC1R", "ALK"] +responses: + 200: + description: "Gene information" diff --git a/bio-api/swagger_specs/informationOfOncokb.yml b/bio-api/swagger_specs/informationOfOncokb.yml new file mode 100644 index 0000000..83d3069 --- /dev/null +++ b/bio-api/swagger_specs/informationOfOncokb.yml @@ -0,0 +1,21 @@ +tags: + - Accionable Genes and Drugs +summary: "Therapies and Actionable Genes in Cancer" +description: "This service retrieves information on FDA-approved precision oncology therapies, actionable genes, and drugs obtained from the OncoKB database, at a therapeutic, diagnostic, and prognostic level." +operationId: "Therapies and Actionable Genes in Cancer" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + items: + type: string + example: ["ATM", "EGFR"] +responses: + 200: + description: "OncoKB database information for each gene." + diff --git a/bio-api/swagger_specs/pathwaysInCommon.yml b/bio-api/swagger_specs/pathwaysInCommon.yml new file mode 100644 index 0000000..2c06450 --- /dev/null +++ b/bio-api/swagger_specs/pathwaysInCommon.yml @@ -0,0 +1,21 @@ +tags: + - Metabolic Pathway +summary: "Metabolic pathways from different genes" +description: "Gets the common pathways for a list of genes." +operationId: "Common pathways for genes" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_ids: + type: array + description: "List of gene identifiers." + items: + type: string + example: ["HLA-B", "BRAF"] +responses: + 200: + description: "A list of pathways that include the genes sent as parameters." diff --git a/bio-api/swagger_specs/relatedTerms.yml b/bio-api/swagger_specs/relatedTerms.yml new file mode 100644 index 0000000..e2812cf --- /dev/null +++ b/bio-api/swagger_specs/relatedTerms.yml @@ -0,0 +1,44 @@ +tags: + - Gene Ontology +summary: "Gene Ontology terms related to another specific term" +description: "Gets the list of related terms to a term." +operationId: "Related gene ontology terms" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + term_id: + type: string + example: "0000079" + relations: + type: array + items: + type: string + enum: + - part_of + - regulates + - has_part + example: ["part_of","regulates","has_part"] + ontology_type: + type: array + items: + type: string + enum: + - biological_process + - molecular_function + - cellular_component + example: ["biological_process", "molecular_function", "cellular_component"] + general_depth: + type: int + example: 5 + to_root: + type: int + example: 0 + +responses: + 200: + description: "The response you get is a list of GO terms that meet the query conditions." + diff --git a/bio-api/swagger_specs/stringRelations.yml b/bio-api/swagger_specs/stringRelations.yml new file mode 100644 index 0000000..03d6bdc --- /dev/null +++ b/bio-api/swagger_specs/stringRelations.yml @@ -0,0 +1,25 @@ +tags: + - Functional interactions +summary: "Predicted functional associations network" +description: "For a given gene, this service gets from the String database a list of genes and their relationships to it." +operationId: "Cancer related drugs" +requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + gene_id: + type: string + description: "Gene identifier." + items: + type: string + example: "MX2" + min_combined_score: + type: int + example: 976 +responses: + 200: + description: "List of genes and their relationships according to the String database." + diff --git a/bio-api/templates/homepage.html b/bio-api/templates/homepage.html index e1fb0ac..9bce328 100644 --- a/bio-api/templates/homepage.html +++ b/bio-api/templates/homepage.html @@ -1,54 +1,19 @@ - - + - - - BioAPI v{{ version }} - + + BioAPI {{version}} + -

BioAPI v{{ version }}

-

Services included:

-
    - {% for service in services %} -
  • - {{ service.name }} - {{ service.url }} -
  • - {% endfor %} -
- +

BioAPI {{ version }}

+

A powerful abstraction of genomics databases

+

Documentation

\ No newline at end of file diff --git a/config/bioapi_conf/requirements.txt b/config/bioapi_conf/requirements.txt index 3e33ad2..359693f 100755 --- a/config/bioapi_conf/requirements.txt +++ b/config/bioapi_conf/requirements.txt @@ -1,9 +1,15 @@ pymongo==4.6.1 -Flask==3.0.0 -flask-cors==4.0.0 +Flask==3.1.0 +flask-cors==5.0.0 gunicorn==21.2.0 ConfigParser==6.0.0 -pytest==7.1.2 +pytest==8.3.3 tqdm==4.66.1 gprofiler-official==1.0.0 -Werkzeug==3.0.1 \ No newline at end of file +Werkzeug==3.0.1 +flask_swagger_ui==4.11.1 +apispec==6.7.1 +apispec-webframeworks==1.2.0 +marshmallow==3.23.1 +setuptools==75.5.0 +flasgger==0.9.7.1 \ No newline at end of file