From 295a00f1c915f9dab30f6801a05401d0d3ce6eb2 Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Wed, 22 Jun 2022 10:35:19 +0200 Subject: [PATCH 1/9] updated to newer python. Still some critical vulnerabilities --- Dockerfile | 8 +++++--- docker-assets/assets/build/install.sh | 2 +- requirements.txt | 7 ++++--- setup.py | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index e768cf9..5606fdd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,5 @@ -FROM python:3.6.8 +#FROM python:3.6.8 +FROM python:3.9.13 MAINTAINER albert.merono@vu.nl # Default values for env variables @@ -22,13 +23,13 @@ ENV GRLC_INSTALL_DIR="${GRLC_HOME}/grlc" \ GRLC_RUNTIME_DIR="${GRLC_CACHE_DIR}/runtime" RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y nginx git-core logrotate python-pip locales gettext-base sudo build-essential apt-utils \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y nginx git-core logrotate python3-pip locales gettext-base sudo build-essential apt-utils \ && update-locale LANG=C.UTF-8 LC_MESSAGES=POSIX \ && locale-gen en_US.UTF-8 \ && DEBIAN_FRONTEND=noninteractive dpkg-reconfigure locales \ && rm -rf /var/lib/apt/lists/* -RUN curl -sL https://deb.nodesource.com/setup_10.x | sudo -E bash - +RUN curl -sL https://deb.nodesource.com/setup_14.x | sudo -E bash - RUN apt-get update && apt-get install -y nodejs COPY ./ ${GRLC_INSTALL_DIR} @@ -48,3 +49,4 @@ VOLUME ["${GRLC_DATA_DIR}", "${GRLC_LOG_DIR}"] WORKDIR ${GRLC_INSTALL_DIR} ENTRYPOINT ["/sbin/entrypoint.sh"] CMD ["app:start"] + diff --git a/docker-assets/assets/build/install.sh b/docker-assets/assets/build/install.sh index e0ef183..88d4dfa 100644 --- a/docker-assets/assets/build/install.sh +++ b/docker-assets/assets/build/install.sh @@ -18,8 +18,8 @@ passwd -d ${GRLC_USER} cd ${GRLC_INSTALL_DIR} chown ${GRLC_USER}:${GRLC_USER} ${GRLC_HOME} -R - pip install --upgrade pip +pip install 'setuptools<58' pip install . npm install git2prov diff --git a/requirements.txt b/requirements.txt index d12ef86..093277d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,11 @@ docopt==0.6.2 docutils==0.17.1 Flask==1.0.2 Flask-Cors==3.0.6 -gevent==1.4.0 -greenlet==0.4.15 +itsdangerous==2.0.1 +gevent==21.12.0 +greenlet==1.1.0 html5lib==1.0.1 +setuptools<58 isodate==0.5.4 keepalive==0.5 MarkupSafe==0.23 @@ -16,7 +18,6 @@ rdflib-jsonld==0.4.0 requests==2.20.0 six==1.12.0 simplejson==3.16.0 -setuptools>=38.6.0 SPARQLTransformer==2.1.1 SPARQLWrapper==1.8.2 werkzeug>=0.16.0 diff --git a/setup.py b/setup.py index 21f524a..f9190b8 100644 --- a/setup.py +++ b/setup.py @@ -55,5 +55,5 @@ package_data = { 'grlc': grlc_data }, include_package_data=True, data_files=[('citation/grlc', ['CITATION.cff'])], - python_requires='>=3.7, <=3.8', + python_requires='>=3.9, <=3.10', ) From 3ffc02caad54c8425e0e79350339de5deda6eb95 Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Fri, 24 Jun 2022 09:41:43 +0200 Subject: [PATCH 2/9] upgrade in docker image --- Dockerfile | 5 +++++ docker-assets/assets/build/install.sh | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5606fdd..7421e48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ #FROM python:3.6.8 FROM python:3.9.13 MAINTAINER albert.merono@vu.nl +RUN apt-get update && apt-get full-upgrade -y # Default values for env variables ARG GRLC_GITHUB_ACCESS_TOKEN= @@ -29,7 +30,11 @@ RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive dpkg-reconfigure locales \ && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get dist-upgrade -y + + RUN curl -sL https://deb.nodesource.com/setup_14.x | sudo -E bash - +RUN chmod a+r /usr/share/keyrings/nodesource.gpg RUN apt-get update && apt-get install -y nodejs COPY ./ ${GRLC_INSTALL_DIR} diff --git a/docker-assets/assets/build/install.sh b/docker-assets/assets/build/install.sh index 88d4dfa..c1f8f32 100644 --- a/docker-assets/assets/build/install.sh +++ b/docker-assets/assets/build/install.sh @@ -31,7 +31,7 @@ sed -i \ /etc/nginx/nginx.conf # configure gitlab log rotation - cat > /etc/logrotate.d/grlc << EOF + cat > /etc/logrotate.d/grlc << EOF1 ${GRLC_LOG_DIR}/grlc/*.log { weekly missingok @@ -41,10 +41,10 @@ sed -i \ notifempty copytruncate } - EOF +EOF1 # configure gitlab vhost log rotation - cat > /etc/logrotate.d/grlc-nginx << EOF + cat > /etc/logrotate.d/grlc-nginx << EOF2 ${GRLC_LOG_DIR}/nginx/*.log { weekly missingok @@ -54,4 +54,4 @@ sed -i \ notifempty copytruncate } - EOF +EOF2 From 061e6928af958fbe4ab0c0592e22a11c8c7241ee Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Tue, 12 Jul 2022 13:27:07 +0200 Subject: [PATCH 3/9] dockerfile will now build --- docker-assets/assets/build/install.sh | 5 ++++- requirements.txt | 2 -- src/fileLoaders.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docker-assets/assets/build/install.sh b/docker-assets/assets/build/install.sh index c1f8f32..d1b5199 100644 --- a/docker-assets/assets/build/install.sh +++ b/docker-assets/assets/build/install.sh @@ -20,7 +20,10 @@ cd ${GRLC_INSTALL_DIR} chown ${GRLC_USER}:${GRLC_USER} ${GRLC_HOME} -R pip install --upgrade pip pip install 'setuptools<58' -pip install . +pip install 'docutils' +python setup.py install_egg_info +pip install 'setuptools<56' +pip install -e . npm install git2prov diff --git a/requirements.txt b/requirements.txt index 093277d..e757044 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,10 @@ docopt==0.6.2 -docutils==0.17.1 Flask==1.0.2 Flask-Cors==3.0.6 itsdangerous==2.0.1 gevent==21.12.0 greenlet==1.1.0 html5lib==1.0.1 -setuptools<58 isodate==0.5.4 keepalive==0.5 MarkupSafe==0.23 diff --git a/src/fileLoaders.py b/src/fileLoaders.py index efde936..69dff47 100644 --- a/src/fileLoaders.py +++ b/src/fileLoaders.py @@ -73,7 +73,8 @@ def __init__(self, user, repo, subdir=None, sha=None, prov=None): self.prov = prov gh = Github(static.ACCESS_TOKEN) try: - self.gh_repo = gh.get_repo(user + '/' + repo, lazy=False) + #self.gh_repo = gh.get_repo(user + '/' + repo, lazy=False) + raise Exception("GitHub Access is disabled for this grlc server") except BadCredentialsException: raise Exception('BadCredentials: have you set up github_access_token on config.ini ?') except Exception: From 15468352d3648017af2253aa362d94babae7b02e Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Tue, 12 Jul 2022 14:01:28 +0200 Subject: [PATCH 4/9] sitch off github and docker build --- docker-assets/assets/build/install.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-assets/assets/build/install.sh b/docker-assets/assets/build/install.sh index d1b5199..954b463 100644 --- a/docker-assets/assets/build/install.sh +++ b/docker-assets/assets/build/install.sh @@ -21,11 +21,11 @@ chown ${GRLC_USER}:${GRLC_USER} ${GRLC_HOME} -R pip install --upgrade pip pip install 'setuptools<58' pip install 'docutils' -python setup.py install_egg_info -pip install 'setuptools<56' -pip install -e . +pip install . + +#npm install git2prov +#npm audit fix -npm install git2prov #move nginx logs to ${GITLAB_LOG_DIR}/nginx sed -i \ From b8a85ff22f6209ca04ce8dd8fb6b81843ef5806b Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Fri, 15 Jul 2022 09:18:24 +0200 Subject: [PATCH 5/9] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aa7b2e7..9df2021 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@

-[![PyPI version](https://badge.fury.io/py/grlc.svg)](https://badge.fury.io/py/grlc) -[![DOI](https://zenodo.org/badge/46131212.svg)](https://zenodo.org/badge/latestdoi/46131212) -[![Build Status](https://travis-ci.org/CLARIAH/grlc.svg?branch=master)](https://travis-ci.org/CLARIAH/grlc) +# NOTE: This is a highly crippled version of the original grlc server +It is intended to be used in secure environments. Specifically, the GitHub and YAML file integration has been disabled. Only local queries will be available. + +## Original Documentation from https://raw.githubusercontent.com/CLARIAH/grlc/ is below grlc, the git repository linked data API constructor, automatically builds Web APIs using shared SPARQL queries. http://grlc.io/ From 5d80b216d411c00e0853f7f0a919bb8a7851e4dd Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Wed, 20 Jul 2022 11:10:36 +0200 Subject: [PATCH 6/9] removing functionality and updating dockerfile to new standards --- Dockerfile | 3 ++- requirements.txt | 5 +++-- src/fileLoaders.py | 52 ++++++++++++++++++++++++++-------------------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7421e48..9b57d85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ #FROM python:3.6.8 FROM python:3.9.13 -MAINTAINER albert.merono@vu.nl +LABEL org.opencontainers.image.authors="ORIGINAL: albert.merono@vu.nl; THIS VERSION: mark.wilkinson@upm.es" +LABEL org.opencontainers.image.documentation="https://github.com/markwilkinson/grlc/blob/master/README.md" RUN apt-get update && apt-get full-upgrade -y # Default values for env variables diff --git a/requirements.txt b/requirements.txt index e757044..b860284 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ docopt==0.6.2 Flask==1.0.2 -Flask-Cors==3.0.6 +Flask-Cors==3.0.9 +urllib3==1.26.5 itsdangerous==2.0.1 gevent==21.12.0 greenlet==1.1.0 @@ -13,7 +14,7 @@ pyparsing==2.0.7 PyYAML==5.4 rdflib==5.0.0 rdflib-jsonld==0.4.0 -requests==2.20.0 +requests six==1.12.0 simplejson==3.16.0 SPARQLTransformer==2.1.1 diff --git a/src/fileLoaders.py b/src/fileLoaders.py index 69dff47..0602067 100644 --- a/src/fileLoaders.py +++ b/src/fileLoaders.py @@ -71,14 +71,15 @@ def __init__(self, user, repo, subdir=None, sha=None, prov=None): self.subdir = (subdir + "/") if subdir else "" self.sha = sha if sha else NotSet self.prov = prov - gh = Github(static.ACCESS_TOKEN) + #gh = Github(static.ACCESS_TOKEN) try: #self.gh_repo = gh.get_repo(user + '/' + repo, lazy=False) raise Exception("GitHub Access is disabled for this grlc server") except BadCredentialsException: raise Exception('BadCredentials: have you set up github_access_token on config.ini ?') except Exception: - raise Exception('Repo not found: ' + user + '/' + repo) + raise Exception('GitHub Access has been disabled for this server' ) +# raise Exception('Repo not found: ' + user + '/' + repo) def fetchFiles(self): """Returns a list of file items contained on the github repo.""" @@ -263,27 +264,32 @@ class URLLoader(BaseLoader): specification from a specification YAML file located on a remote server.""" def __init__(self, spec_url): - """Create a new URLLoader. - - Keyword arguments: - spec_url -- URL where the specification YAML file is located.""" - headers = {'Accept' : 'text/yaml'} - resp = requests.get(spec_url, headers=headers) - if resp.status_code == 200: - self.spec = yaml.load(resp.text) - self.spec['url'] = spec_url - self.spec['files'] = {} - for queryUrl in self.spec['queries']: - queryNameExt = path.basename(queryUrl) - queryName = path.splitext(queryNameExt)[0] # Remove extention - item = { - 'name': queryName, - 'download_url': queryUrl - } - self.spec['files'][queryNameExt] = item - del self.spec['queries'] - else: - raise Exception(resp.text) + try: + raise Exception("YAML file access is disabled for this grlc server") + except Exception: + raise Exception("YAML file access is disabled for this grlc server") + + # """Create a new URLLoader. + + # Keyword arguments: + # spec_url -- URL where the specification YAML file is located.""" + # headers = {'Accept' : 'text/yaml'} + # resp = requests.get(spec_url, headers=headers) + # if resp.status_code == 200: + # self.spec = yaml.load(resp.text) + # self.spec['url'] = spec_url + # self.spec['files'] = {} + # for queryUrl in self.spec['queries']: + # queryNameExt = path.basename(queryUrl) + # queryName = path.splitext(queryNameExt)[0] # Remove extention + # item = { + # 'name': queryName, + # 'download_url': queryUrl + # } + # self.spec['files'][queryNameExt] = item + # del self.spec['queries'] + # else: + # raise Exception(resp.text) def fetchFiles(self): """Returns a list of file items contained on specification.""" From 34ca2857c4ec7081b94f4d5c62573543b7f49f09 Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Wed, 31 Jul 2024 11:39:33 +0200 Subject: [PATCH 7/9] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 9df2021..76e3f70 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +# Shallot +## Kinda like grlc, but not as powerful ;-) +

# NOTE: This is a highly crippled version of the original grlc server From c7d4aec369add4e62918511f17f073c1b7641a4d Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Wed, 31 Jul 2024 17:55:27 +0200 Subject: [PATCH 8/9] some samples to help interpret the bugs --- openapi.json | 229 +++++++++++++++++++++++++++++++++++++++++++++++++++ openapi.yaml | 106 ++++++++++++++++++++++++ swagger.json | 133 ++++++++++++++++++++++++++++++ 3 files changed, 468 insertions(+) create mode 100644 openapi.json create mode 100644 openapi.yaml create mode 100644 swagger.json diff --git a/openapi.json b/openapi.json new file mode 100644 index 0000000..7650e77 --- /dev/null +++ b/openapi.json @@ -0,0 +1,229 @@ +{ + "openapi": "3.0.1", + "info": { + "title": "Duchenne Parent Project Shallot", + "description": "The FAIR Data Point Shallot server for the Duchenne Parent Project", + "contact": { + "name": "Mark Wilkinson", + "url": "https://fairdata.systems" + }, + "version": "local" + }, + "servers": [ + { + "url": "//fairdata.services/api-local/" + } + ], + "paths": { + "/count": { + "get": { + "tags": [ + "Patient Count" + ], + "summary": "Returns the number of patients in the registry with the corresponding disease code", + "description": "\n\n```\n#+ summary: Returns the number of patients in the registry with the corresponding disease code\n#+ tags:\n#+ - Patient Count\n#+ defaults:\n#+ - type: http://www.orpha.net/ORDO/Orphanet_98896\n#+ endpoint_in_url: False\n\nPREFIX sio: \nselect (count(?p) as ?count) where { \n ?p sio:SIO_000228 ?role . # person has role role\n ?role sio:SIO_000356 ?process . # is realized in process\n ?process sio:SIO_000229 ?output . #has output output\n ?output sio:SIO_000628 ?attribute . # output refers to attribute\n\t?attribute a ?_type_iri . # attribute is a orphacode\n}\n\n```", + "parameters": [ + { + "name": "type", + "in": "query", + "description": "A value of type string (iri) that will substitute ?_type_iri in the original query", + "required": true, + "schema": { + "type": "string", + "format": "iri", + "default": "http://www.orpha.net/ORDO/Orphanet_98896" + } + } + ], + "responses": { + "200": { + "description": "Query response", + "content": { + "text/csv": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "text/html": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + } + } + }, + "default": { + "description": "Unexpected error", + "content": { + "text/csv": { + "schema": { + "$ref": "#/components/schemas/Message" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/Message" + } + }, + "text/html": { + "schema": { + "$ref": "#/components/schemas/Message" + } + } + } + } + } + } + }, + "/kpi-ttd": { + "get": { + "tags": [ + "KPI diagnosis-delay" + ], + "summary": "Returns the Key Performance Indicator of the delay between symptom onset and diagnosis. This data is aggregated by disease, and by year of diagnosis, and is measured in days.", + "description": "\n\n```\n#+ summary: Returns the Key Performance Indicator of the delay between symptom onset and diagnosis. This data is aggregated by disease, and by year of diagnosis, and is measured in days.\n#+ tags:\n#+ - KPI diagnosis-delay\n#+ defaults:\n#+ \n#+ endpoint_in_url: False\n\n################################################################\n# list diagnosis and time from onset to diagnosis\n################################################################\n\nPREFIX sio: \nPREFIX rdfs: \nPREFIX rdf: \nPREFIX xsd: \nPREFIX ofn: \n\nSELECT DISTINCT ?ORDO ?yearOfDiagnosis (xsd:integer(ROUND(AVG(?timeOnsetToDiagnosis))) as ?avgoffset)\nWHERE {\n BIND(xsd:integer(ofn:asDays(?onsetdate - ?diagnosisdate)) AS ?timeOnsetToDiagnosis)\n# BIND(xsd:integer(ofn:asDays(?diagnosisdate - ?onsetdate)) AS ?timeOnsetToDiagnosis)\n BIND(SUBSTR(str(?diagnosisdate), 1,4) AS ?yearOfDiagnosis)\n {\n SELECT ?ORDO ?diagnosisdate WHERE {\n GRAPH ?g {\n ?person sio:SIO_000228 ?role1 . # person has role role\n ?role1 sio:SIO_000356 ?process1 . # is realized in process\n ?process1 a . # diagnostic process\n ?process1 sio:SIO_000229 ?output1 . #has output output \n ?output1 a . # diagnosis code\n ?output1 sio:SIO_000628 ?diagnosis1 . # output refers to attribute\n ?diagnosis1 a ?ORDO .\n FILTER(!(?ORDO = sio:SIO_000614)) . # not an \"attribute\" diagnosis\n\t \t}\n \t\t?g sio:SIO_000680 ?startdate .\n \t\t?startdate sio:SIO_000300 ?diagnosisdate .\n }\n }\n {\n SELECT ?onsetdate WHERE {\n ?person sio:SIO_000228 ?role2 . # person has role role\n ?role2 sio:SIO_000356 ?process2 . # is realized in process\n ?process2 sio:SIO_000229 ?output2 . #has output output\n ?output2 sio:SIO_000300 ?onsetdate .\n ?output2 sio:SIO_000628 ?attribute2 . # output refers to attribute\n ?attribute2 a . \n }\n }\n} group by ?ORDO ?yearOfDiagnosis order by ?yearOfDiagnosis ?ORDO\n\n\n```", + "responses": { + "200": { + "description": "Query response", + "content": { + "text/csv": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "text/html": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + } + } + }, + "default": { + "description": "Unexpected error", + "content": { + "text/csv": { + "schema": { + "$ref": "#/components/schemas/Message" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/Message" + } + }, + "text/html": { + "schema": { + "$ref": "#/components/schemas/Message" + } + } + } + } + } + } + }, + "/phenotype-frequencies": { + "get": { + "tags": [ + "Phenotype frequency" + ], + "summary": "Returns the number of patients in the registry that have had a phenotype code at any time", + "description": "\n\n```\n#+ summary: Returns the number of patients in the registry that have had a phenotype code at any time\n#+ tags:\n#+ - Phenotype frequency\n#+ defaults:\n#+ \n#+ endpoint_in_url: False\n\nPREFIX sio: \nselect ?type (count(?type) as ?frequency) where {\n select distinct ?p ?type where {\n ?p sio:SIO_000228 ?role . # person has role role\n ?role sio:SIO_000356 ?process . # is realized in process\n ?process sio:SIO_000229 ?output . #has output output\n ?output sio:SIO_000628 ?attribute . # output refers to attribute\n ?attribute a ?type .\n FILTER(!(?type = sio:SIO_000614)) . # not an \"attribute\" type\n }\n} group by ?type\n\n```", + "responses": { + "200": { + "description": "Query response", + "content": { + "text/csv": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "text/html": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + } + } + }, + "default": { + "description": "Unexpected error", + "content": { + "text/csv": { + "schema": { + "$ref": "#/components/schemas/Message" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/Message" + } + }, + "text/html": { + "schema": { + "$ref": "#/components/schemas/Message" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "Message": { + "type": "string" + } + } + }, + "x-original-swagger-version": "2.0" +} \ No newline at end of file diff --git a/openapi.yaml b/openapi.yaml new file mode 100644 index 0000000..ed0debe --- /dev/null +++ b/openapi.yaml @@ -0,0 +1,106 @@ +openapi: "3.0.1" +info: + title: SPARQL OpenAPI + version: "10" + description: | + This is a heavily trimmed copy of the RDF4J REST API that includes only the SPARQL endpoint GET and POST definitions + + From this endpoint we serve the BGV Administrative Collection information (collecting group, date, etc.) + +externalDocs: + url: https://rdf4j.org/documentation/reference/rest-api/ + +servers: + - url: https://bgv.cbgp.upm.es + description: SPARQL server for the BANCO DE GERMOPLASMA VEGETAL-UPM + +tags: + - name: SPARQL + description: SPARQL Query execution + +components: + requestBodies: + RdfData: + description: RDF data payload + content: + application/rdf+xml: + schema: + type: object + xml: + name: RDF + namespace: http://www.w3.org/1999/02/22-rdf-syntax-ns# + text/plain: + schema: + type: string + text/turtle: + schema: + type: string + text/rdf+n3: + schema: + type: string + text/x-nquads: + schema: + type: string + application/ld+json: + schema: + type: object + format: json + application/rdf+json: + schema: + type: object + format: json + application/trix: + schema: + type: object + xml: + name: TriX + application/x-trig: + schema: + type: string + application/x-binary-rdf: + schema: + type: string + format: binary + responses: + 200SparqlResult: + description: SPARQL query result + content: + application/sparql-results+json: + examples: + SelectQueryResult: + $ref: "#/components/examples/SparqlJsonBindings" + examples: + SparqlJsonBindings: + value: + head: + vars: [ "s", "p", "o" ] + results: + bindings: + - s: + type: "uri" + value: "http://example.org/s1" + - p: + type: "uri" + value: "http://example.org/p1" + - o: + type: "literal" + value: "foo" +paths: + /repositories/administrative: + get: + tags: + - SPARQL + summary: Execute SPARQL query + description: | + Execute a SPARQL query on the repository. The result format is based on the type of result (boolean, variable bindings, or RDF data) and the negotiated acceptable content-type. Note that RDF4J supports executing SPARQL queries with either a GET or a POST request. POST is supported for queries that are too large to be encoded as a query parameter. + parameters: + - name: query + in: query + description: The query to evaluate + required: true + schema: + type: string + example: SELECT DISTINCT ?type WHERE {?s a ?type} + responses: + '200': + $ref: "#/components/responses/200SparqlResult" \ No newline at end of file diff --git a/swagger.json b/swagger.json new file mode 100644 index 0000000..0e742c0 --- /dev/null +++ b/swagger.json @@ -0,0 +1,133 @@ +{ + "basePath": "/api-local/", + "definitions": { + "Message": { + "type": "string" + } + }, + "host": "fairdata.services", + "info": { + "contact": { + "name": "Mark Wilkinson", + "url": "https://fairdata.systems" + }, + "description": "The FAIR Data Point Shallot server for the Duchenne Parent Project", + "title": "Duchenne Parent Project Shallot", + "version": "local" + }, + "next_commit": null, + "paths": { + "/count": { + "get": { + "description": "\n\n```\n#+ summary: Returns the number of patients in the registry with the corresponding disease code\n#+ tags:\n#+ - Patient Count\n#+ defaults:\n#+ - type: http://www.orpha.net/ORDO/Orphanet_98896\n#+ endpoint_in_url: False\n\nPREFIX sio: \nselect (count(?p) as ?count) where { \n ?p sio:SIO_000228 ?role . # person has role role\n ?role sio:SIO_000356 ?process . # is realized in process\n ?process sio:SIO_000229 ?output . #has output output\n ?output sio:SIO_000628 ?attribute . # output refers to attribute\n\t?attribute a ?_type_iri . # attribute is a orphacode\n}\n\n```", + "parameters": [ + { + "default": "http://www.orpha.net/ORDO/Orphanet_98896", + "description": "A value of type string (iri) that will substitute ?_type_iri in the original query", + "format": "iri", + "in": "query", + "name": "type", + "required": true, + "type": "string" + } + ], + "produces": [ + "text/csv", + "application/json", + "text/html" + ], + "responses": { + "200": { + "description": "Query response", + "schema": { + "items": { + "properties": null, + "type": "object" + }, + "type": "array" + } + }, + "default": { + "description": "Unexpected error", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Returns the number of patients in the registry with the corresponding disease code", + "tags": [ + "Patient Count" + ] + } + }, + "/kpi-ttd": { + "get": { + "description": "\n\n```\n#+ summary: Returns the Key Performance Indicator of the delay between symptom onset and diagnosis. This data is aggregated by disease, and by year of diagnosis, and is measured in days.\n#+ tags:\n#+ - KPI diagnosis-delay\n#+ defaults:\n#+ \n#+ endpoint_in_url: False\n\n################################################################\n# list diagnosis and time from onset to diagnosis\n################################################################\n\nPREFIX sio: \nPREFIX rdfs: \nPREFIX rdf: \nPREFIX xsd: \nPREFIX ofn: \n\nSELECT DISTINCT ?ORDO ?yearOfDiagnosis (xsd:integer(ROUND(AVG(?timeOnsetToDiagnosis))) as ?avgoffset)\nWHERE {\n BIND(xsd:integer(ofn:asDays(?onsetdate - ?diagnosisdate)) AS ?timeOnsetToDiagnosis)\n# BIND(xsd:integer(ofn:asDays(?diagnosisdate - ?onsetdate)) AS ?timeOnsetToDiagnosis)\n BIND(SUBSTR(str(?diagnosisdate), 1,4) AS ?yearOfDiagnosis)\n {\n SELECT ?ORDO ?diagnosisdate WHERE {\n GRAPH ?g {\n ?person sio:SIO_000228 ?role1 . # person has role role\n ?role1 sio:SIO_000356 ?process1 . # is realized in process\n ?process1 a . # diagnostic process\n ?process1 sio:SIO_000229 ?output1 . #has output output \n ?output1 a . # diagnosis code\n ?output1 sio:SIO_000628 ?diagnosis1 . # output refers to attribute\n ?diagnosis1 a ?ORDO .\n FILTER(!(?ORDO = sio:SIO_000614)) . # not an \"attribute\" diagnosis\n\t \t}\n \t\t?g sio:SIO_000680 ?startdate .\n \t\t?startdate sio:SIO_000300 ?diagnosisdate .\n }\n }\n {\n SELECT ?onsetdate WHERE {\n ?person sio:SIO_000228 ?role2 . # person has role role\n ?role2 sio:SIO_000356 ?process2 . # is realized in process\n ?process2 sio:SIO_000229 ?output2 . #has output output\n ?output2 sio:SIO_000300 ?onsetdate .\n ?output2 sio:SIO_000628 ?attribute2 . # output refers to attribute\n ?attribute2 a . \n }\n }\n} group by ?ORDO ?yearOfDiagnosis order by ?yearOfDiagnosis ?ORDO\n\n\n```", + "parameters": [], + "produces": [ + "text/csv", + "application/json", + "text/html" + ], + "responses": { + "200": { + "description": "Query response", + "schema": { + "items": { + "properties": null, + "type": "object" + }, + "type": "array" + } + }, + "default": { + "description": "Unexpected error", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Returns the Key Performance Indicator of the delay between symptom onset and diagnosis. This data is aggregated by disease, and by year of diagnosis, and is measured in days.", + "tags": [ + "KPI diagnosis-delay" + ] + } + }, + "/phenotype-frequencies": { + "get": { + "description": "\n\n```\n#+ summary: Returns the number of patients in the registry that have had a phenotype code at any time\n#+ tags:\n#+ - Phenotype frequency\n#+ defaults:\n#+ \n#+ endpoint_in_url: False\n\nPREFIX sio: \nselect ?type (count(?type) as ?frequency) where {\n select distinct ?p ?type where {\n ?p sio:SIO_000228 ?role . # person has role role\n ?role sio:SIO_000356 ?process . # is realized in process\n ?process sio:SIO_000229 ?output . #has output output\n ?output sio:SIO_000628 ?attribute . # output refers to attribute\n ?attribute a ?type .\n FILTER(!(?type = sio:SIO_000614)) . # not an \"attribute\" type\n }\n} group by ?type\n\n```", + "parameters": [], + "produces": [ + "text/csv", + "application/json", + "text/html" + ], + "responses": { + "200": { + "description": "Query response", + "schema": { + "items": { + "properties": null, + "type": "object" + }, + "type": "array" + } + }, + "default": { + "description": "Unexpected error", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Returns the number of patients in the registry that have had a phenotype code at any time", + "tags": [ + "Phenotype frequency" + ] + } + } + }, + "prev_commit": null, + "schemes": [], + "swagger": "2.0" +} \ No newline at end of file From a6c0005705cf7060e270284b86b65d355a30d032 Mon Sep 17 00:00:00 2001 From: Mark Wilkinson Date: Wed, 23 Apr 2025 10:37:17 +0200 Subject: [PATCH 9/9] removing non-shallot stuff --- .zenodo.json | 28 ------ CONTRIBUTING.md | 73 --------------- openapi.json | 229 ------------------------------------------------ paper.bib | 140 ----------------------------- paper.md | 42 --------- 5 files changed, 512 deletions(-) delete mode 100644 .zenodo.json delete mode 100644 CONTRIBUTING.md delete mode 100644 openapi.json delete mode 100644 paper.bib delete mode 100644 paper.md diff --git a/.zenodo.json b/.zenodo.json deleted file mode 100644 index 2bc432c..0000000 --- a/.zenodo.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "creators": [ - { - "affiliation": "King's College London", - "name": "Meroño-Peñuela, Albert", - "orcid": "0000-0003-4646-5842" - }, - { - "affiliation": "Netherlands eScience Center", - "name": "Martinez, Carlos", - "orcid": "0000-0001-5565-7577" - } - ], - "description": "grlc, the git repository linked data API constructor, automatically builds Web APIs using SPARQL queries stored in git repositories.", - "keywords": [ - "swagger-ui", - "sparql", - "linked-data", - "semantic-web", - "linked-data-api" - ], - "license": { - "id": "MIT" - }, - "publication_date": "2021-11-03", - "title": "grlc: the git repository linked data API constructor", - "version": "1.3.7" -} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index c8ee408..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,73 +0,0 @@ -Thank you very much for your interest in contributing to grlc! It's people like you that truly make the Semantic Web more accessible to everyone :) - -## Communication channels - -If you would like to get in touch with the grlc developers, and with other users of grlc, you can reach us in two ways: - - Via Twitter, by using the grlc handle (**@grlcldapi**). Follow this account to hear about updates. - - Via the grlc [mailing list](https://groups.google.com/g/grlc-list/). Sign up to the mailing list to ask questions and make suggestions. - -## Filing bug reports - -The official channel to file bug reports is via our GitHub's [issue tracker](https://github.com/CLARIAH/grlc/issues). When doing so make sure that: -- Your issue title briefly describes the bug -- You include log output (try `docker logs grlc_grlc_1` if you daemonized your instance) -- Name the file/module if known/available -- You tag your issue as **bug** - -## Sending feature requests - -As with bug reports, for requesting features please use the [issue tracker](https://github.com/CLARIAH/grlc/issues) as well and this time: -- Describe briefly the feature in the title -- Describe the desired feature -- Describe your use case so we understand what you are using grlc for -- Name the file/module if known/available -- Tag the issue as **enhancement** - -## Sending pull requests - -If you would like to contribute to the code directly, please send in a [pull request (PR)](https://github.com/CLARIAH/grlc/pulls). Please make sure that: - - The title of your PR briefly describes the content - - Describe in detail what your PR contributes - - If your PR addresses a specific issue, indicate the issue number - - Assign @albertmeronyo or @c-martinez as reviewer of your PR. - -## Testing environment - -To get started with hacking grlc, follow these steps to create a local testing environment (you'll need [docker](https://www.docker.com/) and [docker-compose](https://docs.docker.com/compose/)): - -1. `docker pull clariah/grlc:latest` -2. `git clone https://github.com/CLARIAH/grlc` -3. `cd grlc` -4. Create a `docker-compose.yml` which matches your needs. For example: -``` -version: '2' -services: - grlc: - build: ./ - restart: unless-stopped - ports: - - "8001:80" - environment: - - DEBUG=true - - USERMAP_GID=1000 - - USERMAP_UID=1000 - - GRLC_GITHUB_ACCESS_TOKEN=xxx - - GRLC_SERVER_NAME=grlc.io -``` - -5. `docker-compose up` -6. Your local grlc instance should be available at http://localhost:8001 and should respond to code modifications you make on `` - -You're good to pick any issue at the [issue tracker](https://github.com/CLARIAH/grlc/issues) marked as **enhancement** and start implementing it :) - -## Governance model - -As creators of grlc, [@albertmeronyo](https://github.com/albertmeronyo) and [@c-martinez](http://github.com/c-martinez) are benevolent dictators for this project. This means that they have a final say of the direction of the project. This DOES NOT mean they are not willing to listen to suggestion (on the contrary, they *love* to hear new ideas)! - -## Contributing - -All grlc contributors will be listed in the [CONTRIBUTORS.md](CONTRIBUTORS.md) file. Also, [notes of new releases](https://github.com/CLARIAH/grlc/releases) will mention who contributed to that specific release. - -## Questions - -Please open an issue at the [issue tracker](https://github.com/CLARIAH/grlc/issues) and tag it as **question** diff --git a/openapi.json b/openapi.json deleted file mode 100644 index 7650e77..0000000 --- a/openapi.json +++ /dev/null @@ -1,229 +0,0 @@ -{ - "openapi": "3.0.1", - "info": { - "title": "Duchenne Parent Project Shallot", - "description": "The FAIR Data Point Shallot server for the Duchenne Parent Project", - "contact": { - "name": "Mark Wilkinson", - "url": "https://fairdata.systems" - }, - "version": "local" - }, - "servers": [ - { - "url": "//fairdata.services/api-local/" - } - ], - "paths": { - "/count": { - "get": { - "tags": [ - "Patient Count" - ], - "summary": "Returns the number of patients in the registry with the corresponding disease code", - "description": "\n\n```\n#+ summary: Returns the number of patients in the registry with the corresponding disease code\n#+ tags:\n#+ - Patient Count\n#+ defaults:\n#+ - type: http://www.orpha.net/ORDO/Orphanet_98896\n#+ endpoint_in_url: False\n\nPREFIX sio: \nselect (count(?p) as ?count) where { \n ?p sio:SIO_000228 ?role . # person has role role\n ?role sio:SIO_000356 ?process . # is realized in process\n ?process sio:SIO_000229 ?output . #has output output\n ?output sio:SIO_000628 ?attribute . # output refers to attribute\n\t?attribute a ?_type_iri . # attribute is a orphacode\n}\n\n```", - "parameters": [ - { - "name": "type", - "in": "query", - "description": "A value of type string (iri) that will substitute ?_type_iri in the original query", - "required": true, - "schema": { - "type": "string", - "format": "iri", - "default": "http://www.orpha.net/ORDO/Orphanet_98896" - } - } - ], - "responses": { - "200": { - "description": "Query response", - "content": { - "text/csv": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - }, - "application/json": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - }, - "text/html": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - } - } - }, - "default": { - "description": "Unexpected error", - "content": { - "text/csv": { - "schema": { - "$ref": "#/components/schemas/Message" - } - }, - "application/json": { - "schema": { - "$ref": "#/components/schemas/Message" - } - }, - "text/html": { - "schema": { - "$ref": "#/components/schemas/Message" - } - } - } - } - } - } - }, - "/kpi-ttd": { - "get": { - "tags": [ - "KPI diagnosis-delay" - ], - "summary": "Returns the Key Performance Indicator of the delay between symptom onset and diagnosis. This data is aggregated by disease, and by year of diagnosis, and is measured in days.", - "description": "\n\n```\n#+ summary: Returns the Key Performance Indicator of the delay between symptom onset and diagnosis. This data is aggregated by disease, and by year of diagnosis, and is measured in days.\n#+ tags:\n#+ - KPI diagnosis-delay\n#+ defaults:\n#+ \n#+ endpoint_in_url: False\n\n################################################################\n# list diagnosis and time from onset to diagnosis\n################################################################\n\nPREFIX sio: \nPREFIX rdfs: \nPREFIX rdf: \nPREFIX xsd: \nPREFIX ofn: \n\nSELECT DISTINCT ?ORDO ?yearOfDiagnosis (xsd:integer(ROUND(AVG(?timeOnsetToDiagnosis))) as ?avgoffset)\nWHERE {\n BIND(xsd:integer(ofn:asDays(?onsetdate - ?diagnosisdate)) AS ?timeOnsetToDiagnosis)\n# BIND(xsd:integer(ofn:asDays(?diagnosisdate - ?onsetdate)) AS ?timeOnsetToDiagnosis)\n BIND(SUBSTR(str(?diagnosisdate), 1,4) AS ?yearOfDiagnosis)\n {\n SELECT ?ORDO ?diagnosisdate WHERE {\n GRAPH ?g {\n ?person sio:SIO_000228 ?role1 . # person has role role\n ?role1 sio:SIO_000356 ?process1 . # is realized in process\n ?process1 a . # diagnostic process\n ?process1 sio:SIO_000229 ?output1 . #has output output \n ?output1 a . # diagnosis code\n ?output1 sio:SIO_000628 ?diagnosis1 . # output refers to attribute\n ?diagnosis1 a ?ORDO .\n FILTER(!(?ORDO = sio:SIO_000614)) . # not an \"attribute\" diagnosis\n\t \t}\n \t\t?g sio:SIO_000680 ?startdate .\n \t\t?startdate sio:SIO_000300 ?diagnosisdate .\n }\n }\n {\n SELECT ?onsetdate WHERE {\n ?person sio:SIO_000228 ?role2 . # person has role role\n ?role2 sio:SIO_000356 ?process2 . # is realized in process\n ?process2 sio:SIO_000229 ?output2 . #has output output\n ?output2 sio:SIO_000300 ?onsetdate .\n ?output2 sio:SIO_000628 ?attribute2 . # output refers to attribute\n ?attribute2 a . \n }\n }\n} group by ?ORDO ?yearOfDiagnosis order by ?yearOfDiagnosis ?ORDO\n\n\n```", - "responses": { - "200": { - "description": "Query response", - "content": { - "text/csv": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - }, - "application/json": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - }, - "text/html": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - } - } - }, - "default": { - "description": "Unexpected error", - "content": { - "text/csv": { - "schema": { - "$ref": "#/components/schemas/Message" - } - }, - "application/json": { - "schema": { - "$ref": "#/components/schemas/Message" - } - }, - "text/html": { - "schema": { - "$ref": "#/components/schemas/Message" - } - } - } - } - } - } - }, - "/phenotype-frequencies": { - "get": { - "tags": [ - "Phenotype frequency" - ], - "summary": "Returns the number of patients in the registry that have had a phenotype code at any time", - "description": "\n\n```\n#+ summary: Returns the number of patients in the registry that have had a phenotype code at any time\n#+ tags:\n#+ - Phenotype frequency\n#+ defaults:\n#+ \n#+ endpoint_in_url: False\n\nPREFIX sio: \nselect ?type (count(?type) as ?frequency) where {\n select distinct ?p ?type where {\n ?p sio:SIO_000228 ?role . # person has role role\n ?role sio:SIO_000356 ?process . # is realized in process\n ?process sio:SIO_000229 ?output . #has output output\n ?output sio:SIO_000628 ?attribute . # output refers to attribute\n ?attribute a ?type .\n FILTER(!(?type = sio:SIO_000614)) . # not an \"attribute\" type\n }\n} group by ?type\n\n```", - "responses": { - "200": { - "description": "Query response", - "content": { - "text/csv": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - }, - "application/json": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - }, - "text/html": { - "schema": { - "type": "array", - "items": { - "type": "object", - "properties": {} - } - } - } - } - }, - "default": { - "description": "Unexpected error", - "content": { - "text/csv": { - "schema": { - "$ref": "#/components/schemas/Message" - } - }, - "application/json": { - "schema": { - "$ref": "#/components/schemas/Message" - } - }, - "text/html": { - "schema": { - "$ref": "#/components/schemas/Message" - } - } - } - } - } - } - } - }, - "components": { - "schemas": { - "Message": { - "type": "string" - } - } - }, - "x-original-swagger-version": "2.0" -} \ No newline at end of file diff --git a/paper.bib b/paper.bib deleted file mode 100644 index a9aee32..0000000 --- a/paper.bib +++ /dev/null @@ -1,140 +0,0 @@ -@article{Espinoza2021, -title = {Crossing the chasm between ontology engineering and application development: A survey}, -journal = {Journal of Web Semantics}, -volume = {70}, -pages = {100655}, -year = {2021}, -issn = {1570-8268}, -doi = {10.1016/j.websem.2021.100655}, -url = {https://www.sciencedirect.com/science/article/pii/S1570826821000305}, -author = {Paola Espinoza-Arias and Daniel Garijo and Oscar Corcho}, -keywords = {Ontology, OWL, Ontology engineering, Web API, Application development, Knowledge graph}, -abstract = {The adoption of Knowledge Graphs (KGs) by public and private organizations to integrate and publish data has increased in recent years. Ontologies play a crucial role in providing the structure for KGs, but are usually disregarded when designing Application Programming Interfaces (APIs) to enable browsing KGs in a developer-friendly manner. In this paper we provide a systematic review of the state of the art on existing approaches to ease access to ontology-based KG data} -} - -@InProceedings{Merono_ISWC2019, -author="Lisena, Pasquale -and Mero{\~{n}}o-Pe{\~{n}}uela, Albert -and Kuhn, Tobias -and Troncy, Rapha{\"e}l", -editor="Ghidini, Chiara -and Hartig, Olaf -and Maleshkova, Maria -and Sv{\'a}tek, Vojt{\v{e}}ch -and Cruz, Isabel -and Hogan, Aidan -and Song, Jie -and Lefran{\c{c}}ois, Maxime -and Gandon, Fabien", -title="Easy Web API Development with SPARQL Transformer", -booktitle="The Semantic Web -- ISWC 2019", -year="2019", -publisher="Springer International Publishing", -address="Cham", -pages="454--470", -abstract="In a document-based world as the one of Web APIs, the triple-based output of SPARQL endpoints can be a barrier for developers who want to integrate Linked Data in their applications. A different JSON output can be obtained with SPARQL Transformer, which relies on a single JSON object for defining which data should be extracted from the endpoint and which shape should they assume. We propose a new approach that amounts to merge SPARQL bindings on the base of identifiers and the integration in the grlc API framework to create new bridges between the Web of Data and the Web of applications.", -isbn="978-3-030-30796-7", -doi="10.1007/978-3-030-30796-7_28" -} - -@phdthesis{Singh2019, - doi = {10.18174/505685}, - url = {https://doi.org/10.18174/505685}, - publisher = {Wageningen University and Research}, - author = {Gurnoor Singh}, - title = {Genomics data integration for knowledge discovery using genome annotations from molecular databases and scientific literature}, - institution = "Wageningen University", - year = "2019" -} - -@InProceedings{Merono_ISWC2016, -author="Mero{\~{n}}o-Pe{\~{n}}uela, Albert -and Hoekstra, Rinke", -editor="Sack, Harald -and Rizzo, Giuseppe -and Steinmetz, Nadine -and Mladeni{\'{c}}, Dunja -and Auer, S{\"o}ren -and Lange, Christoph", -title="grlc Makes GitHub Taste Like Linked Data APIs", -booktitle="The Semantic Web", -year="2016", -publisher="Springer International Publishing", -address="Cham", -pages="342--353", -abstract="Building Web APIs on top of SPARQL endpoints is becoming common practice. It enables universal access to the integration favorable data space of Linked Data. In the majority of use cases, users cannot be expected to learn SPARQL to query this data space. Web APIs are the most common way to enable programmatic access to data on the Web. However, the implementation of Web APIs around Linked Data is often a tedious and repetitive process. Recent work speeds up this Linked Data API construction by wrapping it around SPARQL queries, which carry out the API functionality under the hood. Inspired by this development, in this paper we present grlc, a lightweight server that takes SPARQL queries curated in GitHub repositories, and translates them to Linked Data APIs on the fly.", -isbn="978-3-319-47602-5", -doi="10.1007/978-3-319-47602-5_48" -} - -@InProceedings{Merono_ESWC2017, -author="Mero{\~{n}}o-Pe{\~{n}}uela, Albert -and Hoekstra, Rinke", -editor="Blomqvist, Eva -and Hose, Katja -and Paulheim, Heiko -and {\L}awrynowicz, Agnieszka -and Ciravegna, Fabio -and Hartig, Olaf", -title="SPARQL2Git: Transparent SPARQL and Linked Data API Curation via Git", -booktitle="The Semantic Web: ESWC 2017 Satellite Events", -year="2017", -publisher="Springer International Publishing", -address="Cham", -pages="143--148", -abstract="In this demo, we show how an effective and application agnostic way of curating SPARQL queries can be achieved by leveraging Git-based architectures. Often, SPARQL queries are hard-coded into Linked Data consuming applications. This tight coupling poses issues in code maintainability, since these queries are prone to change to adapt to new situations; and query reuse, since queries that might be useful in other applications remain inaccessible. In order to enable decoupling, version control, availability and accessibility of SPARQL queries, we propose SPARQL2Git, an interface for editing, curating and storing SPARQL queries that uses cloud based Git repositories (such as GitHub) as a backend. We describe the query management capabilities of SPARQL2Git, its convenience for SPARQL users that lack Git knowledge, and its combination with grlc to easily generate Linked Data APIs.", -isbn="978-3-319-70407-4", -doi="10.1007/978-3-319-70407-4_27" -} - -@InProceedings{Merono_ISWC2017, -author="Mero{\~{n}}o-Pe{\~{n}}uela, Albert -and Hoekstra, Rinke", -editor="d'Amato, Claudia -and Fernandez, Miriam -and Tamma, Valentina -and Lecue, Freddy -and Cudr{\'e}-Mauroux, Philippe -and Sequeda, Juan -and Lange, Christoph -and Heflin, Jeff", -title="Automatic Query-Centric API for Routine Access to Linked Data", -booktitle="The Semantic Web -- ISWC 2017", -year="2017", -publisher="Springer International Publishing", -address="Cham", -pages="334--349", -abstract="Despite the advatages of Linked Data as a data integration paradigm, accessing and consuming Linked Data is still a cumbersome task. Linked Data applications need to use technologies such as RDF and SPARQL that, despite their expressive power, belong to the data integration stack. As a result, applications and data cannot be cleanly separated: SPARQL queries, endpoint addresses, namespaces, and URIs end up as part of the application code. Many publishers address these problems by building RESTful APIs around their Linked Data. However, this solution has two pitfalls: these APIs are costly to maintain; and they blackbox functionality by hiding the queries they use. In this paper we describe grlc, a gateway between Linked Data applications and the LOD cloud that offers a RESTful, reusable and uniform means to routinely access any Linked Data. It generates an OpenAPI compatible API by using parametrized queries shared on the Web. The resulting APIs require no coding, rely on low-cost external query storage and versioning services, contain abundant provenance information, and integrate access to different publishing paradigms into a single API. We evaluate grlc qualitatively, by describing its reported value by current users; and quantitatively, by measuring the added overhead at generating API specifications and answering to calls.", -isbn="978-3-319-68204-4", -doi="10.1007/978-3-319-68204-4_30" -} - -@article{Verborgh2016, - doi = {10.1016/j.websem.2016.03.003}, - url = {https://doi.org/10.1016/j.websem.2016.03.003}, - year = {2016}, - month = mar, - publisher = {Elsevier {BV}}, - volume = {37-38}, - pages = {184--206}, - author = {Ruben Verborgh and Miel Vander Sande and Olaf Hartig and Joachim Van Herwegen and Laurens De Vocht and Ben De Meester and Gerald Haesendonck and Pieter Colpaert}, - title = {Triple Pattern Fragments: A low-cost knowledge graph interface for the Web}, - journal = {Journal of Web Semantics} -} - -@article{Daquino2021, - author = {Marilena Daquino and - Ivan Heibi and - Silvio Peroni and - David M. Shotton}, - title = {Creating Restful APIs over {SPARQL} endpoints with {RAMOSE}}, - journal = {CoRR}, - volume = {abs/2007.16079}, - year = {2020}, - url = {https://arxiv.org/abs/2007.16079}, - archivePrefix = {arXiv}, - eprint = {2007.16079}, - timestamp = {Mon, 03 Aug 2020 14:32:13 +0200}, - biburl = {https://dblp.org/rec/journals/corr/abs-2007-16079.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} -} diff --git a/paper.md b/paper.md deleted file mode 100644 index b18afb6..0000000 --- a/paper.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: 'grlc: the git repository linked data API constructor.' -tags: - - Python - - linked data - - API builder -authors: - - name: Albert Meroño-Peñuela - orcid: 0000-0003-4646-5842 - affiliation: 1 - - name: Carlos Martinez-Ortiz - orcid: 0000-0001-5565-7577 - affiliation: 2 -affiliations: - - name: King's College London - index: 1 - - name: Netherlands eScience Center - index: 2 -date: XX April 2020 -bibliography: paper.bib ---- - -# Summary - -RDF is a knowledge representation format (and an enabling technology for Linked Open Data) which has gained popularity over the years and it continues to be adopted in different domains such as life sciences and humanities. RDF data is typically represented as sets of triples, consisting of subject, predicate and object, and is usually stored in a triple store. SPARQL is one of the most commonly used query languages used to retrieve linked data from a triple store. However writing SPARQL queries is not a trivial task and requires some degree of expertise. - -Domain experts usually face the challenge of having to learn SPARQL, when all they want is to be able to access the information contained in the triple store. Knowledge engineers with the necessary expertise can help domain experts write SPARQL queries, but they still need to modify part of the query every time they want to extract new data. - -`grlc` is a lightweight server that takes SPARQL queries (stored in a GitHub repository, local file storage or listed in a URL), and translates them to Linked Data Web APIs. This enables universal access to Linked Data. Users are not required to know SPARQL to query their data, but instead can access a web API. In this way, `grlc` enables researchers to easily access knowledge represented in RDF format. - -`grlc` uses the [BASIL convention](https://github.com/basilapi/basil/wiki/SPARQL-variable-name-convention-for-WEB-API-parameters-mapping) for SPARQL variable mapping and supports LD fragments [@Verborgh2016]. - -`grlc` has been used in a number of scientific publications [@Merono_ISWC2016,@Merono_ISWC2017,@Merono_ESWC2017,@Merono_ISWC2019] and PhD thesis [@Singh2019]. - -Other comparable approaches exist, which allow users to access Linked Open Data without requiring to know SPARQL; for example [OpenPHACTS](https://github.com/openphacts) and RAMOSE [@Daquino2021] are two of the most notable ones. For an extensive review of other related work, a recent survey on API approaches for knowledge graphs [@Espinoza2021]. - - -# Acknowledgements - -We acknowledge contributions from Pasquale Lisena, Richard Zijdeman and Rinke Hoekstra. - -# References