Skip to content

Add support for integrity header #1998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion pygeoapi/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from datetime import datetime
from functools import partial
from gzip import compress
import hashlib
from http import HTTPStatus
import logging
import re
Expand Down Expand Up @@ -99,6 +100,10 @@
(F_NETCDF, 'application/x-netcdf'),
))

#: Digest integrity methods supported
DIGEST_METHODS = ['sha1', 'sha256', 'sha384', 'sha512',
'sha3-256', 'sha3-384', 'sha3-512']

#: Locale used for system responses (e.g. exceptions)
SYSTEM_LOCALE = l10n.Locale('en', 'US')

Expand Down Expand Up @@ -165,6 +170,32 @@ def apply_gzip(headers: dict, content: Union[str, bytes]) -> Union[str, bytes]:
return content


def apply_integrity(headers: dict, content: Union[str, bytes]):
"""
Apply content header integret hash to header.
"""

try:
hash_method = headers.pop('Want-Content-Digest')
except KeyError:
LOGGER.debug('No digest requested')
return

try:
LOGGER.debug(f'Hashing with {hash_method}')
hash_func = hashlib.new(hash_method)

charset = CHARSET[0]
content_bytes = (content if isinstance(content, bytes)
else content.encode(charset))

hash_func.update(content_bytes)
headers['Content-Digest'] = f'{hash_method}={hash_func.hexdigest()}'

except ValueError:
raise ValueError(f'Unsupported hash method: {hash_method}')


class APIRequest:
"""
Transforms an incoming server-specific Request into an object
Expand Down Expand Up @@ -235,6 +266,9 @@ def __init__(self, request, supported_locales):
# Determine format
self._format = self._get_format(request.headers)

# Determine digest
self._digest = self._get_digest(request.headers)

# Get received headers
self._headers = self.get_request_headers(request.headers)

Expand Down Expand Up @@ -348,6 +382,19 @@ def _get_format(self, headers) -> Union[str, None]:

return format_ or None

def _get_digest(self, headers) -> Union[str, None]:
"""
Get `Request` digest type from query parameters or headers.

:param headers: Dict of Request headers
:returns: digest method or None if not found/specified
"""
h = headers.get('Want-Content-Digest', headers.get('want-content-digest', '')).strip() # noqa
# basic support for complex types (i.e. with "q=0.x")
for hash_method in (t.split(';')[0].strip().lower() for t in h.split(',') if t): # noqa
if hash_method in DIGEST_METHODS:
return hash_method

@property
def data(self) -> bytes:
"""Returns the additional data send with the Request (bytes)"""
Expand Down Expand Up @@ -464,6 +511,7 @@ def is_valid(self, additional_formats=None) -> bool:
def get_response_headers(self, force_lang: l10n.Locale = None,
force_type: str = None,
force_encoding: str = None,
force_digest: str = None,
**custom_headers) -> dict:
"""
Prepares and returns a dictionary with Response object headers.
Expand All @@ -488,6 +536,7 @@ def get_response_headers(self, force_lang: l10n.Locale = None,
:param force_lang: An optional Content-Language header override.
:param force_type: An optional Content-Type header override.
:param force_encoding: An optional Content-Encoding header override.
:param force_digest: An optional Want-Digest header override.
:returns: A header dict
"""

Expand All @@ -507,6 +556,11 @@ def get_response_headers(self, force_lang: l10n.Locale = None,
elif F_GZIP in self._headers.get('Accept-Encoding', ''):
headers['Content-Encoding'] = F_GZIP

if force_digest:
headers['Want-Content-Digest'] = force_digest
elif self._digest:
headers['Want-Content-Digest'] = self._digest

return headers

def get_request_headers(self, headers) -> dict:
Expand All @@ -519,7 +573,7 @@ def get_request_headers(self, headers) -> dict:
:returns: A header dict
"""

headers_ = {item[0]: item[1] for item in headers.items()}
headers_ = {item[0].title(): item[1] for item in headers.items()}
return headers_


Expand Down
3 changes: 2 additions & 1 deletion pygeoapi/django_/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from django.conf import settings
from django.http import HttpRequest, HttpResponse

from pygeoapi.api import API, APIRequest, apply_gzip
from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity
import pygeoapi.api as core_api
import pygeoapi.api.coverages as coverages_api
import pygeoapi.api.environmental_data_retrieval as edr_api
Expand Down Expand Up @@ -550,6 +550,7 @@ def execute_from_django(api_function, request: HttpRequest, *args,
else:

headers, status, content = api_function(api_, api_request, *args)
apply_integrity(headers, content)
content = apply_gzip(headers, content)

# Convert API payload to a django response
Expand Down
3 changes: 2 additions & 1 deletion pygeoapi/flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from flask import (Flask, Blueprint, make_response, request,
send_from_directory, Response, Request)

from pygeoapi.api import API, APIRequest, apply_gzip
from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity
import pygeoapi.api as core_api
import pygeoapi.api.coverages as coverages_api
import pygeoapi.api.environmental_data_retrieval as edr_api
Expand Down Expand Up @@ -151,6 +151,7 @@ def execute_from_flask(api_function, request: Request, *args,
headers, status, content = actual_api.get_format_exception(api_request)
else:
headers, status, content = api_function(actual_api, api_request, *args)
apply_integrity(headers, content)
content = apply_gzip(headers, content)

response = make_response(content, status)
Expand Down
3 changes: 2 additions & 1 deletion pygeoapi/starlette_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
)
import uvicorn

from pygeoapi.api import API, APIRequest, apply_gzip
from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity
import pygeoapi.api as core_api
import pygeoapi.api.coverages as coverages_api
import pygeoapi.api.environmental_data_retrieval as edr_api
Expand Down Expand Up @@ -133,6 +133,7 @@ async def execute_from_starlette(api_function, request: Request, *args,
headers, status, content = await loop.run_in_executor(
None, call_api_threadsafe, loop, api_function,
actual_api, api_request, *args)
apply_integrity(headers, content)
# NOTE: that gzip currently doesn't work in starlette
# https://github.com/geopython/pygeoapi/issues/1591
content = apply_gzip(headers, content)
Expand Down
84 changes: 83 additions & 1 deletion tests/api/test_itemtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from shapely.geometry import Point

from pygeoapi.api import (API, FORMAT_TYPES, F_GZIP, F_HTML, F_JSONLD,
apply_gzip)
apply_gzip, apply_integrity)
from pygeoapi.api.itemtypes import (
get_collection_queryables, get_collection_item,
get_collection_items, manage_collection_item)
Expand Down Expand Up @@ -415,6 +415,88 @@ def test_collection_items_gzip_csv(config, api_, openapi):
assert rsp_csv == rsp_csv_


def test_collection_no_digest(api_):
req_digest = mock_api_request()
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert 'Content-Digest' not in rsp_digest_headers

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert 'Content-Digest' not in rsp_digest_headers


def test_collection_with_digest(api_):
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100,sha1')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' # noqa

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha256')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha256=f24c899027516b64c13734caf12a5506c8137f8520ab1b08b936e8e14f43faa4' # noqa

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha384')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha384=2e875167e36a9d70a11bef48d290dd439741514f28e19680a4eb049f2aeaca96092280dce1458c6072650a678840ee83' # noqa

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA512')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha512=a57169dd6a947237df9ab8640cf6bedd57e54cb854cc8843f4aac08c30d4e2c402af8b637b8823f6953b90d61f8fc37db95a68cce9ee0d7b9cc9186fcbf5978a' # noqa

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-256')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha3-256=52bd7167f2c74131287e313dc0e6959502626a44069e6b3ab9059aa00cf15c22' # noqa

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-384')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha3-384=335b5d9c02c174325b8d9f039ca1acd6783d1d457d1105a091b31baeca023c5896665d5fd7417fbc7ee946231e7ba990' # noqa

req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA3-512')
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
assert rsp_digest_headers['Content-Type'] == 'application/json'
assert rsp_digest_headers['Content-Digest'] == 'sha3-512=79f736ddfbc8faca1623c6eb365e48e422aa30d1ebb51cc5aa0b046b1966d8256f2cc1399d3669069d965f56a5148522d05e7d63b78b7b76282034f8e77fb8c2' # noqa


def test_collection_with_digest_and_gzip(api_):
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA1,sha256',
HTTP_ACCEPT_ENCODING=F_GZIP)
rsp_digest_headers, _, rsp_digest = get_collection_item(
api_, req_digest, 'obs', '371')
apply_integrity(rsp_digest_headers, rsp_digest)
apply_gzip(rsp_digest_headers, rsp_digest)

assert rsp_digest_headers['Content-Type'] == \
'application/json; charset=utf-8'
assert rsp_digest_headers['Content-Digest'] == \
'sha1=0d4818c86215ba031044b27e28cb3170936e8c53'
assert rsp_digest_headers['Content-Encoding'] == F_GZIP


def test_get_collection_items_crs(config, api_):

# Invalid CRS query parameter
Expand Down