From 51ea02ad715ec57c45359b62ccff5409664eca57 Mon Sep 17 00:00:00 2001 From: Daniel Moreno Date: Mon, 7 Jul 2025 20:10:40 +0100 Subject: [PATCH 1/3] add base schema validation --- pygeometa/core.py | 27 +++++++++++++++++++++------ pygeometa/schemas/__init__.py | 21 +++++---------------- pygeometa/schemas/base.py | 20 ++++++++++++++++++++ 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/pygeometa/core.py b/pygeometa/core.py index be4b0a81..0af377c3 100644 --- a/pygeometa/core.py +++ b/pygeometa/core.py @@ -357,13 +357,14 @@ def import_metadata(schema: str, metadata: str) -> dict: def transform_metadata(input_schema: str, output_schema: str, - metadata: str) -> str: + metadata: str, validate: bool = False) -> str | None: """ Transform metadata :param input_schema: input schema / format :param output_schema: output schema / format - :metadata: metadata string + :param metadata: metadata string + :param validate: whether to validate output :returns: transformed metadata or `None` """ @@ -374,6 +375,8 @@ def transform_metadata(input_schema: str, output_schema: str, LOGGER.info(f'Processing into {output_schema}') schema_object_output = load_schema(output_schema) content = schema_object_output.write(content) + if validate and not schema_object_output.validate(content): + raise RuntimeError('Validation failed') except Exception as err: LOGGER.debug(err) return None @@ -563,21 +566,29 @@ def import_(ctx, metadata_file, schema, output, verbosity): type=click.Path(exists=True, resolve_path=True, dir_okay=True, file_okay=False), help='Locally defined metadata schema') +@click.option('--validate', required=False, is_flag=True,) @cli_options.OPTION_VERBOSITY -def generate(ctx, mcf, schema, schema_local, output, verbosity): +def generate(ctx, mcf, schema, schema_local, output, validate, verbosity): """generate metadata""" if schema is None and schema_local is None: raise click.UsageError('Missing arguments') elif None not in [schema, schema_local]: raise click.UsageError('schema / schema_local are mutually exclusive') + if schema_local and validate: + raise click.UsageError('validation / schema_local are mutually exclusive') # noqa mcf_dict = read_mcf(mcf) if schema is not None: LOGGER.info(f'Processing {mcf} into {schema}') schema_object = load_schema(schema) + if validate and not schema_object.has_mode('validate'): + raise click.ClickException('Selected schema does not support validation') # noqa content = schema_object.write(mcf_dict) + if validate: + if not schema_object.validate(content): + raise click.ClickException('Validation failed') else: content = render_j2_template(mcf_dict, template_dir=schema_local) @@ -614,7 +625,7 @@ def schemas(ctx, verbosity): click.echo('Supported schemas') for schema in get_supported_schemas(details=True): - s = f"{schema['id']} (read: {schema['read']}, write: {schema['write']}): {schema['description']}" # noqa + s = f"{schema['id']} (read: {schema['read']}, write: {schema['write']}, validate: {schema['validate']}): {schema['description']}" # noqa click.echo(s) @@ -645,12 +656,16 @@ def validate(ctx, mcf, verbosity): @click.option('--output-schema', required=True, type=click.Choice(get_supported_schemas()), help='Metadata schema of input file') +@click.option('--validate', required=False, is_flag=True,) def transform(ctx, metadata_file, input_schema, output_schema, output, - verbosity): + validate, verbosity): """transform metadata""" + if validate and output_schema.has_mode('validate'): + raise click.ClickException('Output schema does not support validation') + content = transform_metadata(input_schema, output_schema, - metadata_file.read()) + metadata_file.read(), validate) if content is None: raise click.ClickException('No supported input schema detected/found') diff --git a/pygeometa/schemas/__init__.py b/pygeometa/schemas/__init__.py index 79f2f0f1..51a9fa1d 100644 --- a/pygeometa/schemas/__init__.py +++ b/pygeometa/schemas/__init__.py @@ -75,19 +75,6 @@ def get_supported_schemas(details: bool = False, :returns: list of supported schemas """ - - def has_mode(plugin: BaseOutputSchema, mode: str) -> bool: - enabled = False - - try: - _ = getattr(plugin, mode)('test') - except NotImplementedError: - pass - except Exception: - enabled = True - - return enabled - schema_matrix = [] LOGGER.debug('Generating list of supported schemas') @@ -102,14 +89,16 @@ def has_mode(plugin: BaseOutputSchema, mode: str) -> bool: for key in SCHEMAS.keys(): schema = load_schema(key) - can_read = has_mode(schema, 'import_') - can_write = has_mode(schema, 'write') + can_read = schema.has_mode('import_') + can_write = schema.has_mode('write') + can_validate = schema.has_mode('validate') schema_matrix.append({ 'id': key, 'description': schema.description, 'read': can_read, - 'write': can_write + 'write': can_write, + 'validate': can_validate }) if include_autodetect: diff --git a/pygeometa/schemas/base.py b/pygeometa/schemas/base.py index 5dc865cc..f272b9cc 100644 --- a/pygeometa/schemas/base.py +++ b/pygeometa/schemas/base.py @@ -98,5 +98,25 @@ def import_(self, metadata: str) -> dict: raise NotImplementedError() + def validate(self, metadata: Union[dict, str]) -> bool: + """ + Validate metadata against schema + + :param metadata: metadata content + + :returns: `bool` of validation result + """ + + raise NotImplementedError() + + def has_mode(self, mode: str) -> bool: + """ + Check if schema implementation supports a mode + + :param mode: mode to check, e.g. 'import_', 'write', 'validate' + :returns: `bool` indicating whether mode is supported + """ + return mode in self.__class__.__dict__ + def __repr__(self): return f'<{self.name.upper()}OutputSchema> {self.name}' From ca8dd964aa52f2d694dd8126790f9074c0d521cb Mon Sep 17 00:00:00 2001 From: Daniel Moreno Date: Mon, 7 Jul 2025 20:10:58 +0100 Subject: [PATCH 2/3] add OGC API records validation --- pygeometa/schemas/ogcapi_records/__init__.py | 50 +++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/pygeometa/schemas/ogcapi_records/__init__.py b/pygeometa/schemas/ogcapi_records/__init__.py index bee01ba6..9a350842 100644 --- a/pygeometa/schemas/ogcapi_records/__init__.py +++ b/pygeometa/schemas/ogcapi_records/__init__.py @@ -42,12 +42,18 @@ # OTHER DEALINGS IN THE SOFTWARE. # # ================================================================= - +import json from datetime import date, datetime import logging import os from typing import Union +import requests +import yaml +from jsonschema import validate as jsonschema_validate +from jsonschema import RefResolver +from jsonschema.exceptions import ValidationError + from pygeometa import __version__ from pygeometa.core import get_charstring from pygeometa.helpers import json_dumps @@ -470,3 +476,45 @@ def generate_date(self, date_value: str) -> str: raise RuntimeError(msg) return value + + def validate(self, metadata: Union[dict, str]) -> bool: + """ + Validate metadata against schema + + :param metadata: OGC Records metadata content + + :returns: `bool` of validation result + """ + + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except TypeError: + return False + + schema_uri = 'https://schemas.opengis.net/ogcapi/records/part1/1.0/openapi/schemas/recordGeoJSON.yaml' # noqa + + def yaml_loader(uri: str) -> dict: + r = requests.get(uri) + r.raise_for_status() + return yaml.safe_load(r.text) + + schema_dict = yaml_loader(schema_uri) + + resolver = RefResolver( + base_uri=schema_uri, + referrer=schema_dict, + handlers={'http': yaml_loader, 'https': yaml_loader} + ) + + try: + jsonschema_validate( + instance=metadata, + schema=schema_dict, + resolver=resolver + ) + except ValidationError as err: + LOGGER.error(f'Validation error: {err.message}') + return False + + return True From ff2a86734812b253000163e674e7d1048811d9c6 Mon Sep 17 00:00:00 2001 From: Daniel Moreno Date: Mon, 7 Jul 2025 20:37:03 +0100 Subject: [PATCH 3/3] OGC Records time can't be defined and null --- pygeometa/schemas/ogcapi_records/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygeometa/schemas/ogcapi_records/__init__.py b/pygeometa/schemas/ogcapi_records/__init__.py index 9a350842..fb8a556a 100644 --- a/pygeometa/schemas/ogcapi_records/__init__.py +++ b/pygeometa/schemas/ogcapi_records/__init__.py @@ -167,7 +167,7 @@ def write(self, mcf: dict, stringify: str = True) -> Union[dict, str]: record['time']['resolution'] = mcf['identification']['extents']['temporal'][0]['resolution'] # noqa except (IndexError, KeyError): - record['time'] = None + pass LOGGER.debug('Checking for dates')