Skip to content
Merged

Dev #71

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 0 additions & 72 deletions .github/workflows/README.md

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

strategy:
matrix:
os: [ "ubuntu-latest", "macos-latest", "windows-latest" ]
os: [ "ubuntu-latest" ]
python-version: ["3.10", "3.11", "3.12", "3.13"]

defaults:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/daily-import-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
import-test:
runs-on: macos-latest
runs-on: ubuntu-latest

steps:
- name: Set up Python 3.13
Expand Down
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

- **Refactor**: Code reorganization that doesn't change functionality but improves structure or maintainability

## [0.19.0](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.19.0) - 2025-09-08
### Added
- Tool calling support in `DocumentLLM.chat(...)`.

## [0.18.0](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.18.0) - 2025-09-01
### Added
- Chat: Added optional `chat_session` parameter (accepts a `ChatSession`) to preserve message history across turns in `DocumentLLM.chat()`. When this parameter is omitted, chat is single-turn, without message history.
- Chat: Added optional `chat_session` parameter (accepts a `ChatSession`) to preserve message history across turns in `DocumentLLM.chat(...)`. When this parameter is omitted, chat is single-turn, without message history.

## [0.17.1](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.17.1) - 2025-08-26
### Changed
Expand Down
2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ This software includes the following third-party components:

Core Dependencies:
- aiolimiter: Rate limiting for asynchronous operations
- fastjsonschema: Fast JSON schema validator
- genai-prices: LLM pricing data and utilities (by Pydantic) to automatically estimate costs
- Jinja2: Templating engine
- litellm: LLM interface library (this software uses only MIT-licensed portions of LiteLLM and does not utilize any components from the enterprise/ directory)
Expand All @@ -44,7 +45,6 @@ Development Dependencies:
- interrogate: Python docstring coverage checker
- memory-profiler: Python memory usage monitoring tool
- nbformat: Notebook format utilities
- pip-tools: Dependency management
- pre-commit: Pre-commit hooks
- pympler: Python memory analysis for object-level memory measurement
- pyright: Static type checker for Python
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ This project is automatically scanned for security vulnerabilities using multipl
ContextGem relies on these excellent open-source packages:

- [aiolimiter](https://github.com/mjpieters/aiolimiter): Powerful rate limiting for async operations
- [fastjsonschema](https://github.com/horejsek/python-fastjsonschema): Ultra-fast JSON schema validation
- [genai-prices](https://github.com/pydantic/genai-prices): LLM pricing data and utilities (by Pydantic) to automatically estimate costs
- [Jinja2](https://github.com/pallets/jinja): Fast, expressive, extensible templating engine used for prompt rendering
- [litellm](https://github.com/BerriAI/litellm): Unified interface to multiple LLM providers with seamless provider switching
Expand Down
5 changes: 4 additions & 1 deletion contextgem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
ContextGem - Effortless LLM extraction from documents
"""

__version__ = "0.18.0"
__version__ = "0.19.0"
__author__ = "Shcherbak AI AS"

from contextgem.public import (
Expand Down Expand Up @@ -49,6 +49,7 @@
StringExample,
create_image,
image_to_base64,
register_tool,
reload_logger_settings,
)

Expand Down Expand Up @@ -92,4 +93,6 @@
"JsonObjectClassStruct",
# Converters
"DocxConverter",
# Decorators
"register_tool",
)
2 changes: 2 additions & 0 deletions contextgem/internal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
DocxConverterError,
LLMAPIError,
LLMExtractionError,
LLMToolLoopLimitError,
)
from contextgem.internal.items import (
_BooleanItem,
Expand Down Expand Up @@ -236,5 +237,6 @@
# Exceptions
"LLMExtractionError",
"LLMAPIError",
"LLMToolLoopLimitError",
"DocxConverterError",
)
14 changes: 12 additions & 2 deletions contextgem/internal/base/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@

from contextgem.internal.base.mixins import _PostInitCollectorMixin
from contextgem.internal.base.serialization import _InstanceSerializer
from contextgem.internal.typings.types import JSONDictField, LLMRoleAny


if TYPE_CHECKING:
from contextgem.internal.data_models import (
_LLMCostOutputContainer,
_LLMUsageOutputContainer,
)
from contextgem.internal.typings.aliases import LLMRoleAny


class _AbstractInstanceBase(_PostInitCollectorMixin, _InstanceSerializer, ABC):
"""
Abstract base for instance-like Pydantic models.
"""

custom_data: dict = Field(
custom_data: JSONDictField = Field(
default_factory=dict,
description="A serializable dictionary for storing additional custom data "
"related to the instance.",
Expand Down Expand Up @@ -110,3 +110,13 @@ def reset_usage_and_cost(self) -> None:
may support optional filters (e.g., by role) where applicable.
"""
pass

@abstractmethod
def _warn_tools_ignored_if_enabled(self) -> None:
"""
Abstract method, to be implemented by subclasses.

Should warn that tools are ignored during extraction
workflows if tools are configured. Tools are supported only in ``llm.chat(...)``.
"""
pass
2 changes: 1 addition & 1 deletion contextgem/internal/base/aspects.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from contextgem.internal.base.paras_and_sents import _Paragraph, _Sentence
from contextgem.internal.decorators import _disable_direct_initialization
from contextgem.internal.items import _StringItem
from contextgem.internal.typings.aliases import (
from contextgem.internal.typings.types import (
LLMRoleAspect,
NonEmptyStr,
ReferenceDepth,
Expand Down
2 changes: 1 addition & 1 deletion contextgem/internal/base/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from contextgem.internal.decorators import (
_post_init_method,
)
from contextgem.internal.typings.aliases import (
from contextgem.internal.typings.types import (
AssignedInstancesAttrName,
JustificationDepth,
)
Expand Down
12 changes: 6 additions & 6 deletions contextgem/internal/base/concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,17 @@
_LabelConceptItemValueModel,
)
from contextgem.internal.loggers import logger
from contextgem.internal.typings.aliases import (
ClassificationType,
LLMRoleAny,
NonEmptyStr,
ReferenceDepth,
)
from contextgem.internal.typings.typed_class_utils import (
_get_model_fields,
_is_typed_class,
_raise_dict_class_type_error,
)
from contextgem.internal.typings.types import (
ClassificationType,
LLMRoleAny,
NonEmptyStr,
ReferenceDepth,
)
from contextgem.internal.typings.types_normalization import _normalize_type_annotation
from contextgem.internal.typings.types_to_strings import (
JSON_PRIMITIVE_TYPES,
Expand Down
2 changes: 1 addition & 1 deletion contextgem/internal/base/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)
from contextgem.internal.loggers import logger
from contextgem.internal.registry import _publicize
from contextgem.internal.typings.aliases import NonEmptyStr, SaTModelId
from contextgem.internal.typings.types import NonEmptyStr, SaTModelId
from contextgem.internal.typings.validators import _validate_sequence_is_list
from contextgem.internal.utils import (
_check_paragraphs_match_in_text,
Expand Down
23 changes: 3 additions & 20 deletions contextgem/internal/base/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@

from typing import Any

from pydantic import Field, field_validator
from pydantic import Field

from contextgem.internal.base.instances import _InstanceBase
from contextgem.internal.decorators import _disable_direct_initialization
from contextgem.internal.typings.aliases import NonEmptyStr
from contextgem.internal.utils import _is_json_serializable
from contextgem.internal.typings.types import JSONDictField, NonEmptyStr


class _Example(_InstanceBase):
Expand Down Expand Up @@ -60,24 +59,8 @@ class _JsonObjectExample(_Example):
Internal implementation of the JsonObjectExample class.
"""

content: dict[str, Any] = Field(
content: JSONDictField = Field(
...,
min_length=1,
description="A JSON-serializable dict that holds the content of the extracted item example.",
)

@field_validator("content")
@classmethod
def _validate_content_serializable(cls, value: dict[str, Any]) -> dict[str, Any]:
"""
Validates that the `content` field is serializable to JSON.

:param value: The value of the `content` field to validate.
:type value: dict[str, Any]
:return: The validated `content` value.
:rtype: dict[str, Any]
:raises ValueError: If the `content` value is not serializable.
"""
if not _is_json_serializable(value):
raise ValueError("`content` must be JSON serializable.")
return value
2 changes: 1 addition & 1 deletion contextgem/internal/base/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from contextgem.internal.base.instances import _InstanceBase
from contextgem.internal.decorators import _disable_direct_initialization
from contextgem.internal.typings.aliases import NonEmptyStr
from contextgem.internal.typings.types import NonEmptyStr


@_disable_direct_initialization
Expand Down
2 changes: 1 addition & 1 deletion contextgem/internal/base/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from contextgem.internal.decorators import (
_disable_direct_initialization,
)
from contextgem.internal.typings.aliases import NonEmptyStr
from contextgem.internal.typings.types import NonEmptyStr


@_disable_direct_initialization
Expand Down
Loading