diff --git a/CHANGELOG.md b/CHANGELOG.md index f28bfaa..3eb55d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 0.0.3 + +### Features + +* **OTEL middleware added** + + ## 0.0.2 ### Enhancements diff --git a/requirements/cli.in b/requirements/cli.in index 2ef4460..f4cbb7b 100644 --- a/requirements/cli.in +++ b/requirements/cli.in @@ -2,3 +2,5 @@ uvicorn fastapi click unstructured-ingest +opentelemetry-instrumentation-fastapi +opentelemetry-exporter-otlp-proto-grpc diff --git a/requirements/cli.txt b/requirements/cli.txt index c533ab8..5942101 100644 --- a/requirements/cli.txt +++ b/requirements/cli.txt @@ -1,213 +1,136 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile cli.in +# pip-compile requirements//cli.in # annotated-types==0.7.0 # via pydantic anyio==4.4.0 - # via - # httpx - # starlette - # watchfiles -backoff==2.2.1 - # via unstructured -beautifulsoup4==4.12.3 - # via unstructured -certifi==2024.7.4 - # via - # httpcore - # httpx - # requests - # unstructured-client -chardet==5.2.0 - # via unstructured -charset-normalizer==3.3.2 - # via - # requests - # unstructured-client + # via starlette +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi click==8.1.7 # via - # -r cli.in - # nltk - # typer + # -r requirements//cli.in + # unstructured-ingest # uvicorn dataclasses-json==0.6.7 - # via - # unstructured - # unstructured-client -deepdiff==7.0.1 - # via unstructured-client -dnspython==2.6.1 - # via email-validator -email-validator==2.2.0 - # via fastapi -emoji==2.12.1 - # via unstructured -fastapi==0.111.1 - # via -r cli.in -fastapi-cli==0.0.4 - # via fastapi -filetype==1.2.0 - # via unstructured + # via unstructured-ingest +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-semantic-conventions +exceptiongroup==1.2.2 + # via anyio +fastapi==0.112.2 + # via -r requirements//cli.in +googleapis-common-protos==1.65.0 + # via opentelemetry-exporter-otlp-proto-grpc +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 - # via - # httpcore - # uvicorn -httpcore==1.0.5 - # via httpx -httptools==0.6.1 # via uvicorn -httpx==0.27.0 - # via - # fastapi - # unstructured-client -idna==3.7 - # via - # anyio - # email-validator - # httpx - # requests - # unstructured-client -jinja2==3.1.4 - # via fastapi -joblib==1.4.2 - # via nltk -jsonpath-python==1.0.6 - # via unstructured-client -langdetect==1.0.9 - # via unstructured -lxml==5.2.2 - # via unstructured -markdown-it-py==3.0.0 - # via rich -markupsafe==2.1.5 - # via jinja2 -marshmallow==3.21.3 - # via - # dataclasses-json - # unstructured-client -mdurl==0.1.2 - # via markdown-it-py +idna==3.8 + # via anyio +importlib-metadata==8.0.0 + # via opentelemetry-api +marshmallow==3.22.0 + # via dataclasses-json mypy-extensions==1.0.0 + # via typing-inspect +numpy==2.1.0 + # via pandas +opentelemetry-api==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via opentelemetry-exporter-otlp-proto-grpc +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via -r requirements//cli.in +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements//cli.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc +opentelemetry-sdk==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # unstructured-ingest +opentelemetry-semantic-conventions==0.47b0 # via - # typing-inspect - # unstructured-client -nest-asyncio==1.6.0 - # via unstructured-client -nltk==3.8.1 - # via unstructured -numpy==1.26.4 + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 # via - # pandas - # unstructured -ordered-set==4.1.0 - # via deepdiff + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi packaging==24.1 - # via - # marshmallow - # unstructured-client + # via marshmallow pandas==2.2.2 # via unstructured-ingest -psutil==6.0.0 - # via unstructured +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto pydantic==2.8.2 - # via fastapi + # via + # fastapi + # unstructured-ingest pydantic-core==2.20.1 # via pydantic -pygments==2.18.0 - # via rich -pypdf==4.3.1 - # via unstructured-client python-dateutil==2.9.0.post0 # via # pandas - # unstructured-client # unstructured-ingest -python-dotenv==1.0.1 - # via uvicorn -python-iso639==2024.4.27 - # via unstructured -python-magic==0.4.27 - # via unstructured -python-multipart==0.0.9 - # via fastapi pytz==2024.1 # via pandas -pyyaml==6.0.1 - # via uvicorn -rapidfuzz==3.9.5 - # via unstructured -regex==2024.7.24 - # via nltk -requests==2.32.3 - # via - # requests-toolbelt - # unstructured - # unstructured-client -requests-toolbelt==1.0.0 - # via unstructured-client -rich==13.7.1 - # via typer -shellingham==1.5.4 - # via typer six==1.16.0 - # via - # langdetect - # python-dateutil - # unstructured-client + # via python-dateutil sniffio==1.3.1 - # via - # anyio - # httpx -soupsieve==2.5 - # via beautifulsoup4 -starlette==0.37.2 + # via anyio +starlette==0.38.2 # via fastapi -tabulate==0.9.0 - # via unstructured -tqdm==4.66.4 - # via - # nltk - # unstructured -typer==0.12.3 - # via fastapi-cli +tqdm==4.66.5 + # via unstructured-ingest typing-extensions==4.12.2 # via - # emoji + # anyio + # asgiref # fastapi + # opentelemetry-sdk # pydantic # pydantic-core - # typer # typing-inspect - # unstructured - # unstructured-client + # uvicorn typing-inspect==0.9.0 - # via - # dataclasses-json - # unstructured-client + # via dataclasses-json tzdata==2024.1 # via pandas -unstructured==0.15.0 - # via unstructured-ingest -unstructured-client==0.25.0 - # via unstructured -unstructured-ingest==0.0.0 - # via -r cli.in -urllib3==2.2.2 - # via - # requests - # unstructured-client -uvicorn[standard]==0.30.3 - # via - # -r cli.in - # fastapi -uvloop==0.19.0 - # via uvicorn -watchfiles==0.22.0 - # via uvicorn -websockets==12.0 - # via uvicorn +unstructured-ingest==0.0.8 + # via -r requirements//cli.in +uvicorn==0.30.6 + # via -r requirements//cli.in wrapt==1.16.0 - # via unstructured + # via + # deprecated + # opentelemetry-instrumentation +zipp==3.20.1 + # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements/constraints.txt b/requirements/constraints.txt index e18a0ba..b83b7de 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1 +1 @@ -unstructured-ingest==0.0.0 +unstructured-ingest==0.0.8 diff --git a/requirements/lint.txt b/requirements/lint.txt index 2fd7f27..b037ee3 100644 --- a/requirements/lint.txt +++ b/requirements/lint.txt @@ -6,11 +6,11 @@ # autoflake==2.3.1 # via -r requirements//lint.in -black==24.4.2 +black==24.8.0 # via -r requirements//lint.in click==8.1.7 # via black -flake8==7.1.0 +flake8==7.1.1 # via # -r requirements//lint.in # flake8-print @@ -18,7 +18,7 @@ flake8-print==5.0.0 # via -r requirements//lint.in mccabe==0.7.0 # via flake8 -mypy==1.10.1 +mypy==1.11.2 # via -r requirements//lint.in mypy-extensions==1.0.0 # via @@ -30,7 +30,7 @@ pathspec==0.12.1 # via black platformdirs==4.2.2 # via black -pycodestyle==2.12.0 +pycodestyle==2.12.1 # via # flake8 # flake8-print @@ -38,7 +38,7 @@ pyflakes==3.2.0 # via # autoflake # flake8 -ruff==0.5.0 +ruff==0.6.2 # via -r requirements//lint.in tomli==2.0.1 # via diff --git a/requirements/release.txt b/requirements/release.txt index 873ae45..dfeb2ba 100644 --- a/requirements/release.txt +++ b/requirements/release.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile release.in +# pip-compile requirements//release.in # backports-tarfile==1.2.0 # via jaraco-context @@ -12,25 +12,25 @@ charset-normalizer==3.3.2 # via requests docutils==0.21.2 # via readme-renderer -idna==3.7 +idna==3.8 # via requests -importlib-metadata==8.2.0 +importlib-metadata==8.4.0 # via # keyring # twine jaraco-classes==3.4.0 # via keyring -jaraco-context==5.3.0 +jaraco-context==6.0.1 # via keyring -jaraco-functools==4.0.1 +jaraco-functools==4.0.2 # via keyring -keyring==25.2.1 +keyring==25.3.0 # via twine markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -more-itertools==10.3.0 +more-itertools==10.4.0 # via # jaraco-classes # jaraco-functools @@ -52,15 +52,15 @@ requests-toolbelt==1.0.0 # via twine rfc3986==2.0.0 # via twine -rich==13.7.1 +rich==13.8.0 # via twine twine==5.1.1 - # via -r release.in + # via -r requirements//release.in urllib3==2.2.2 # via # requests # twine -wheel==0.43.0 - # via -r release.in -zipp==3.19.2 +wheel==0.44.0 + # via -r requirements//release.in +zipp==3.20.1 # via importlib-metadata diff --git a/requirements/test.txt b/requirements/test.txt index b36d7cb..4c863eb 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,7 +4,7 @@ # # pip-compile requirements//test.in # -exceptiongroup==1.2.1 +exceptiongroup==1.2.2 # via pytest iniconfig==2.0.0 # via pytest @@ -12,7 +12,7 @@ packaging==24.1 # via pytest pluggy==1.5.0 # via pytest -pytest==8.2.2 +pytest==8.3.2 # via -r requirements//test.in tomli==2.0.1 # via pytest diff --git a/requirements/validate.txt b/requirements/validate.txt index 1105641..6e4f4a7 100644 --- a/requirements/validate.txt +++ b/requirements/validate.txt @@ -4,13 +4,13 @@ # # pip-compile requirements//validate.in # -certifi==2024.6.2 +certifi==2024.7.4 # via requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via -r requirements//validate.in -idna==3.7 +idna==3.8 # via requests requests==2.32.3 # via -r requirements//validate.in diff --git a/test/test_schema.py b/test/test_schema.py index 95e10e7..0322560 100644 --- a/test/test_schema.py +++ b/test/test_schema.py @@ -554,6 +554,10 @@ def fn(a: FileData) -> list[FileData]: ], "default": None, }, + "filesize_bytes": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + "default": None, + }, }, "required": [], }, @@ -643,6 +647,10 @@ def fn(a: FileData) -> list[FileData]: ], "default": None, }, + "filesize_bytes": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + "default": None, + }, }, "required": [], }, diff --git a/unstructured_platform_plugins/__version__.py b/unstructured_platform_plugins/__version__.py index 5b34010..4dc74f8 100644 --- a/unstructured_platform_plugins/__version__.py +++ b/unstructured_platform_plugins/__version__.py @@ -1 +1 @@ -__version__ = "0.0.2" # pragma: no cover +__version__ = "0.0.3" # pragma: no cover diff --git a/unstructured_platform_plugins/etl_uvicorn/api_generator.py b/unstructured_platform_plugins/etl_uvicorn/api_generator.py index 7451b71..6b7d384 100644 --- a/unstructured_platform_plugins/etl_uvicorn/api_generator.py +++ b/unstructured_platform_plugins/etl_uvicorn/api_generator.py @@ -6,10 +6,12 @@ from typing import Any, Callable, Optional from fastapi import FastAPI, status +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from pydantic import BaseModel from starlette.responses import RedirectResponse from uvicorn.importer import import_from_string +from unstructured_platform_plugins.etl_uvicorn.otel import get_metric_provider, get_trace_provider from unstructured_platform_plugins.etl_uvicorn.utils import ( get_func, get_input_schema, @@ -170,4 +172,8 @@ async def get_id() -> str: except TypeError as e: raise TypeError(f"failed to validate function schema: {e}") from e + FastAPIInstrumentor.instrument_app( + fastapi_app, tracer_provider=get_trace_provider(), meter_provider=get_metric_provider() + ) + return fastapi_app diff --git a/unstructured_platform_plugins/etl_uvicorn/otel.py b/unstructured_platform_plugins/etl_uvicorn/otel.py new file mode 100644 index 0000000..f0ad35e --- /dev/null +++ b/unstructured_platform_plugins/etl_uvicorn/otel.py @@ -0,0 +1,107 @@ +import os +from typing import Literal, TypedDict + +from opentelemetry.environment_variables import OTEL_METRICS_EXPORTER, OTEL_TRACES_EXPORTER +from opentelemetry.sdk.environment_variables import OTEL_SERVICE_NAME +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import ( + ConsoleMetricExporter, + MetricReader, + PeriodicExportingMetricReader, +) +from opentelemetry.sdk.resources import SERVICE_NAME, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + ConsoleSpanExporter, + SimpleSpanProcessor, +) + +TraceExporterType = Literal["otlp", "jaeger", "zipkin", "console"] +MetricExporterType = Literal["otlp", "prometheus", "none"] + + +class OtelSettings(TypedDict): + service_name: str + trace_exporters: list[TraceExporterType] + metric_exporters: list[MetricExporterType] + + +def get_settings() -> OtelSettings: + service_name = os.environ.get(OTEL_SERVICE_NAME, "unknown_service") + trace_exporters = os.environ.get(OTEL_TRACES_EXPORTER) + trace_exporters = trace_exporters.split(",") if trace_exporters else [] + + metric_exporters = os.environ.get(OTEL_METRICS_EXPORTER) + metric_exporters = metric_exporters.split(",") if metric_exporters else [] + return OtelSettings( + service_name=service_name, + trace_exporters=trace_exporters, + metric_exporters=metric_exporters, + ) + + +def get_trace_provider() -> TracerProvider: + settings = get_settings() + print(settings) + provider = TracerProvider(resource=Resource({SERVICE_NAME: settings["service_name"]})) + + for trace_exporter_type in settings["trace_exporters"]: + _add_trace_exporter(exporter_type=trace_exporter_type, provider=provider) + + return provider + + +def get_metric_provider() -> MeterProvider: + settings = get_settings() + readers = [] + for metric_exporter_type in settings["metric_exporters"]: + readers.append(_get_metrics_reader(exporter_type=metric_exporter_type)) + return MeterProvider( + resource=Resource({SERVICE_NAME: settings["service_name"]}), metric_readers=readers + ) + + +def _add_trace_exporter(exporter_type: TraceExporterType, provider: TracerProvider): + if exporter_type == "otlp": + _add_traces_otlp_exporter( + provider, + ) + + elif exporter_type == "console": + _add_traces_console_exporter(provider) + else: + raise NotImplementedError(f"{exporter_type} implementation not supported yet") + + +def _get_metrics_reader(exporter_type: MetricExporterType) -> MetricReader: + if exporter_type == "otlp": + return _get_metric_otlp_reader() + if exporter_type == "console": + return _get_metric_console_reader() + raise NotImplementedError(f"{exporter_type} implementation not supported yet") + + +def _add_traces_console_exporter(provider: TracerProvider) -> None: + exporter = ConsoleSpanExporter() + processor = SimpleSpanProcessor(exporter) + provider.add_span_processor(processor) + + +def _add_traces_otlp_exporter(provider: TracerProvider) -> None: + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + + exporter = OTLPSpanExporter() + processor = SimpleSpanProcessor(exporter) + provider.add_span_processor(processor) + + +def _get_metric_otlp_reader() -> MetricReader: + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter + + exporter = OTLPMetricExporter() + return PeriodicExportingMetricReader(exporter) + + +def _get_metric_console_reader() -> MetricReader: + exporter = ConsoleMetricExporter() + return PeriodicExportingMetricReader(exporter)