diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f7778a..bbb722b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,7 @@ jobs: timeout-minutes: 10 name: lint runs-on: ${{ github.repository == 'stainless-sdks/codex-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} + if: github.event_name == 'push' || github.event.pull_request.head.repo.fork steps: - uses: actions/checkout@v4 @@ -35,7 +36,7 @@ jobs: run: ./scripts/lint upload: - if: github.repository == 'stainless-sdks/codex-python' + if: github.repository == 'stainless-sdks/codex-python' && (github.event_name == 'push' || github.event.pull_request.head.repo.fork) timeout-minutes: 10 name: upload permissions: diff --git a/.release-please-manifest.json b/.release-please-manifest.json index aa84875..1c0bb88 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.22" + ".": "0.1.0-alpha.23" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 04c1386..c509062 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 65 -openapi_spec_hash: 80696dc202de8bacc0e43506d7c210b0 +openapi_spec_hash: f63d4542b4bd1530ced013eb686cab99 config_hash: 14b2643a0ec60cf326dfed00939644ff diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b2988f..80023ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## 0.1.0-alpha.23 (2025-06-30) + +Full Changelog: [v0.1.0-alpha.22...v0.1.0-alpha.23](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.22...v0.1.0-alpha.23) + +### Features + +* **api:** api update ([31096f4](https://github.com/cleanlab/codex-python/commit/31096f4820a7bfdd204b0a2d1d84ab1e36e32d0c)) +* **api:** api update ([be06884](https://github.com/cleanlab/codex-python/commit/be06884d321ca5009c9d82346c1b74c7429f82fa)) +* **api:** api update ([41b210d](https://github.com/cleanlab/codex-python/commit/41b210dc69c2b9c45eeab01a0afac6a4563d41f2)) + + +### Bug Fixes + +* **ci:** correct conditional ([45d3bc0](https://github.com/cleanlab/codex-python/commit/45d3bc05ab56d3e67d036ce84b2c9a1f2d8cfd69)) +* **ci:** release-doctor — report correct token name ([1a5e444](https://github.com/cleanlab/codex-python/commit/1a5e444226c829392181d98bc06f8cfb8bf13bd9)) + + +### Chores + +* **ci:** only run for pushes and fork pull requests ([6b590bd](https://github.com/cleanlab/codex-python/commit/6b590bd454e939b8453d95c239ee85be1a326909)) + ## 0.1.0-alpha.22 (2025-06-24) Full Changelog: [v0.1.0-alpha.21...v0.1.0-alpha.22](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.21...v0.1.0-alpha.22) diff --git a/bin/check-release-environment b/bin/check-release-environment index a1446a7..b845b0f 100644 --- a/bin/check-release-environment +++ b/bin/check-release-environment @@ -3,7 +3,7 @@ errors=() if [ -z "${PYPI_TOKEN}" ]; then - errors+=("The CODEX_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.") + errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.") fi lenErrors=${#errors[@]} diff --git a/pyproject.toml b/pyproject.toml index b71f9f0..16e362c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.22" +version = "0.1.0-alpha.23" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" diff --git a/src/codex/_version.py b/src/codex/_version.py index a88a1c3..18f2dcb 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.22" # x-release-please-version +__version__ = "0.1.0-alpha.23" # x-release-please-version diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index 22b5caf..fc5a43a 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -212,10 +212,10 @@ def update( self, project_id: str, *, - config: project_update_params.Config, - name: str, - auto_clustering_enabled: bool | NotGiven = NOT_GIVEN, + auto_clustering_enabled: Optional[bool] | NotGiven = NOT_GIVEN, + config: Optional[project_update_params.Config] | NotGiven = NOT_GIVEN, description: Optional[str] | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -241,10 +241,10 @@ def update( f"/api/projects/{project_id}", body=maybe_transform( { - "config": config, - "name": name, "auto_clustering_enabled": auto_clustering_enabled, + "config": config, "description": description, + "name": name, }, project_update_params.ProjectUpdateParams, ), @@ -820,10 +820,10 @@ async def update( self, project_id: str, *, - config: project_update_params.Config, - name: str, - auto_clustering_enabled: bool | NotGiven = NOT_GIVEN, + auto_clustering_enabled: Optional[bool] | NotGiven = NOT_GIVEN, + config: Optional[project_update_params.Config] | NotGiven = NOT_GIVEN, description: Optional[str] | NotGiven = NOT_GIVEN, + name: Optional[str] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -849,10 +849,10 @@ async def update( f"/api/projects/{project_id}", body=await async_maybe_transform( { - "config": config, - "name": name, "auto_clustering_enabled": auto_clustering_enabled, + "config": config, "description": description, + "name": name, }, project_update_params.ProjectUpdateParams, ), diff --git a/src/codex/resources/projects/query_logs.py b/src/codex/resources/projects/query_logs.py index 32ec739..e97243e 100644 --- a/src/codex/resources/projects/query_logs.py +++ b/src/codex/resources/projects/query_logs.py @@ -92,9 +92,12 @@ def list( created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN, created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN, custom_metadata: Optional[str] | NotGiven = NOT_GIVEN, + failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, + guardrailed: Optional[bool] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, offset: int | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] @@ -118,6 +121,12 @@ def list( custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"} + failed_evals: Filter by evals that failed + + guardrailed: Filter by guardrailed status + + passed_evals: Filter by evals that passed + primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) was_cache_hit: Filter by cache hit status @@ -144,9 +153,12 @@ def list( "created_at_end": created_at_end, "created_at_start": created_at_start, "custom_metadata": custom_metadata, + "failed_evals": failed_evals, + "guardrailed": guardrailed, "limit": limit, "offset": offset, "order": order, + "passed_evals": passed_evals, "primary_eval_issue": primary_eval_issue, "sort": sort, "was_cache_hit": was_cache_hit, @@ -164,9 +176,13 @@ def list_by_group( created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN, created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN, custom_metadata: Optional[str] | NotGiven = NOT_GIVEN, + failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, + guardrailed: Optional[bool] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, + needs_review: Optional[bool] | NotGiven = NOT_GIVEN, offset: int | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] @@ -191,6 +207,14 @@ def list_by_group( custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"} + failed_evals: Filter by evals that failed + + guardrailed: Filter by guardrailed status + + needs_review: Filter logs that need review + + passed_evals: Filter by evals that passed + primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) remediation_ids: List of groups to list child logs for @@ -219,9 +243,13 @@ def list_by_group( "created_at_end": created_at_end, "created_at_start": created_at_start, "custom_metadata": custom_metadata, + "failed_evals": failed_evals, + "guardrailed": guardrailed, "limit": limit, + "needs_review": needs_review, "offset": offset, "order": order, + "passed_evals": passed_evals, "primary_eval_issue": primary_eval_issue, "remediation_ids": remediation_ids, "sort": sort, @@ -240,9 +268,13 @@ def list_groups( created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN, created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN, custom_metadata: Optional[str] | NotGiven = NOT_GIVEN, + failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, + guardrailed: Optional[bool] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, + needs_review: Optional[bool] | NotGiven = NOT_GIVEN, offset: int | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] @@ -267,6 +299,14 @@ def list_groups( custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"} + failed_evals: Filter by evals that failed + + guardrailed: Filter by guardrailed status + + needs_review: Filter log groups that need review + + passed_evals: Filter by evals that passed + primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) was_cache_hit: Filter by cache hit status @@ -293,9 +333,13 @@ def list_groups( "created_at_end": created_at_end, "created_at_start": created_at_start, "custom_metadata": custom_metadata, + "failed_evals": failed_evals, + "guardrailed": guardrailed, "limit": limit, + "needs_review": needs_review, "offset": offset, "order": order, + "passed_evals": passed_evals, "primary_eval_issue": primary_eval_issue, "sort": sort, "was_cache_hit": was_cache_hit, @@ -406,9 +450,12 @@ async def list( created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN, created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN, custom_metadata: Optional[str] | NotGiven = NOT_GIVEN, + failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, + guardrailed: Optional[bool] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, offset: int | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] @@ -432,6 +479,12 @@ async def list( custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"} + failed_evals: Filter by evals that failed + + guardrailed: Filter by guardrailed status + + passed_evals: Filter by evals that passed + primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) was_cache_hit: Filter by cache hit status @@ -458,9 +511,12 @@ async def list( "created_at_end": created_at_end, "created_at_start": created_at_start, "custom_metadata": custom_metadata, + "failed_evals": failed_evals, + "guardrailed": guardrailed, "limit": limit, "offset": offset, "order": order, + "passed_evals": passed_evals, "primary_eval_issue": primary_eval_issue, "sort": sort, "was_cache_hit": was_cache_hit, @@ -478,9 +534,13 @@ async def list_by_group( created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN, created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN, custom_metadata: Optional[str] | NotGiven = NOT_GIVEN, + failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, + guardrailed: Optional[bool] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, + needs_review: Optional[bool] | NotGiven = NOT_GIVEN, offset: int | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] @@ -505,6 +565,14 @@ async def list_by_group( custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"} + failed_evals: Filter by evals that failed + + guardrailed: Filter by guardrailed status + + needs_review: Filter logs that need review + + passed_evals: Filter by evals that passed + primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) remediation_ids: List of groups to list child logs for @@ -533,9 +601,13 @@ async def list_by_group( "created_at_end": created_at_end, "created_at_start": created_at_start, "custom_metadata": custom_metadata, + "failed_evals": failed_evals, + "guardrailed": guardrailed, "limit": limit, + "needs_review": needs_review, "offset": offset, "order": order, + "passed_evals": passed_evals, "primary_eval_issue": primary_eval_issue, "remediation_ids": remediation_ids, "sort": sort, @@ -554,9 +626,13 @@ async def list_groups( created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN, created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN, custom_metadata: Optional[str] | NotGiven = NOT_GIVEN, + failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, + guardrailed: Optional[bool] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, + needs_review: Optional[bool] | NotGiven = NOT_GIVEN, offset: int | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN, primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] @@ -581,6 +657,14 @@ async def list_groups( custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"} + failed_evals: Filter by evals that failed + + guardrailed: Filter by guardrailed status + + needs_review: Filter log groups that need review + + passed_evals: Filter by evals that passed + primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) was_cache_hit: Filter by cache hit status @@ -607,9 +691,13 @@ async def list_groups( "created_at_end": created_at_end, "created_at_start": created_at_start, "custom_metadata": custom_metadata, + "failed_evals": failed_evals, + "guardrailed": guardrailed, "limit": limit, + "needs_review": needs_review, "offset": offset, "order": order, + "passed_evals": passed_evals, "primary_eval_issue": primary_eval_issue, "sort": sort, "was_cache_hit": was_cache_hit, diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py index 73dad67..3e24441 100644 --- a/src/codex/types/project_update_params.py +++ b/src/codex/types/project_update_params.py @@ -21,14 +21,14 @@ class ProjectUpdateParams(TypedDict, total=False): - config: Required[Config] + auto_clustering_enabled: Optional[bool] - name: Required[str] - - auto_clustering_enabled: bool + config: Optional[Config] description: Optional[str] + name: Optional[str] + class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): criteria: Required[str] diff --git a/src/codex/types/projects/query_log_list_by_group_params.py b/src/codex/types/projects/query_log_list_by_group_params.py index 66166a1..b44970a 100644 --- a/src/codex/types/projects/query_log_list_by_group_params.py +++ b/src/codex/types/projects/query_log_list_by_group_params.py @@ -21,12 +21,24 @@ class QueryLogListByGroupParams(TypedDict, total=False): custom_metadata: Optional[str] """Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}""" + failed_evals: Optional[List[str]] + """Filter by evals that failed""" + + guardrailed: Optional[bool] + """Filter by guardrailed status""" + limit: int + needs_review: Optional[bool] + """Filter logs that need review""" + offset: int order: Literal["asc", "desc"] + passed_evals: Optional[List[str]] + """Filter by evals that passed""" + primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index d11d827..9d1e0e6 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -10,10 +10,31 @@ "QueryLogListByGroupResponse", "QueryLogsByGroup", "QueryLogsByGroupQueryLog", + "QueryLogsByGroupQueryLogFormattedEscalationEvalScores", + "QueryLogsByGroupQueryLogFormattedEvalScores", + "QueryLogsByGroupQueryLogFormattedGuardrailEvalScores", "QueryLogsByGroupQueryLogContext", ] +class QueryLogsByGroupQueryLogFormattedEscalationEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogsByGroupQueryLogFormattedEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogsByGroupQueryLogFormattedGuardrailEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + class QueryLogsByGroupQueryLogContext(BaseModel): content: str """The actual content/text of the document.""" @@ -36,7 +57,9 @@ class QueryLogsByGroupQueryLog(BaseModel): created_at: datetime - formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None + formatted_escalation_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedEscalationEvalScores]] = None + + formatted_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedEvalScores]] = None """Format evaluation scores for frontend display with pass/fail status. Returns: Dictionary mapping eval keys to their formatted representation: { @@ -44,6 +67,8 @@ class QueryLogsByGroupQueryLog(BaseModel): eval_scores is None. """ + formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedGuardrailEvalScores]] = None + is_bad_response: bool project_id: str @@ -67,6 +92,9 @@ class QueryLogsByGroupQueryLog(BaseModel): escalated: Optional[bool] = None """If true, the question was escalated to Codex for an SME to review""" + escalation_evals: Optional[List[str]] = None + """Evals that should trigger escalation to SME""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -79,6 +107,9 @@ class QueryLogsByGroupQueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + guardrail_evals: Optional[List[str]] = None + """Evals that should trigger guardrail""" + guardrailed: Optional[bool] = None """If true, the response was guardrailed""" diff --git a/src/codex/types/projects/query_log_list_groups_params.py b/src/codex/types/projects/query_log_list_groups_params.py index 558ac0b..94d549f 100644 --- a/src/codex/types/projects/query_log_list_groups_params.py +++ b/src/codex/types/projects/query_log_list_groups_params.py @@ -21,12 +21,24 @@ class QueryLogListGroupsParams(TypedDict, total=False): custom_metadata: Optional[str] """Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}""" + failed_evals: Optional[List[str]] + """Filter by evals that failed""" + + guardrailed: Optional[bool] + """Filter by guardrailed status""" + limit: int + needs_review: Optional[bool] + """Filter log groups that need review""" + offset: int order: Literal["asc", "desc"] + passed_evals: Optional[List[str]] + """Filter by evals that passed""" + primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index fd87c30..7b77cc0 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -6,7 +6,32 @@ from ..._models import BaseModel -__all__ = ["QueryLogListGroupsResponse", "QueryLogGroup", "QueryLogGroupContext"] +__all__ = [ + "QueryLogListGroupsResponse", + "QueryLogGroup", + "QueryLogGroupFormattedEscalationEvalScores", + "QueryLogGroupFormattedEvalScores", + "QueryLogGroupFormattedGuardrailEvalScores", + "QueryLogGroupContext", +] + + +class QueryLogGroupFormattedEscalationEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogGroupFormattedEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogGroupFormattedGuardrailEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] class QueryLogGroupContext(BaseModel): @@ -31,7 +56,9 @@ class QueryLogGroup(BaseModel): created_at: datetime - formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None + formatted_escalation_eval_scores: Optional[Dict[str, QueryLogGroupFormattedEscalationEvalScores]] = None + + formatted_eval_scores: Optional[Dict[str, QueryLogGroupFormattedEvalScores]] = None """Format evaluation scores for frontend display with pass/fail status. Returns: Dictionary mapping eval keys to their formatted representation: { @@ -39,6 +66,8 @@ class QueryLogGroup(BaseModel): eval_scores is None. """ + formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogGroupFormattedGuardrailEvalScores]] = None + is_bad_response: bool needs_review: bool @@ -68,6 +97,9 @@ class QueryLogGroup(BaseModel): escalated: Optional[bool] = None """If true, the question was escalated to Codex for an SME to review""" + escalation_evals: Optional[List[str]] = None + """Evals that should trigger escalation to SME""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -80,6 +112,9 @@ class QueryLogGroup(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + guardrail_evals: Optional[List[str]] = None + """Evals that should trigger guardrail""" + guardrailed: Optional[bool] = None """If true, the response was guardrailed""" diff --git a/src/codex/types/projects/query_log_list_params.py b/src/codex/types/projects/query_log_list_params.py index 9cf3211..0f72b24 100644 --- a/src/codex/types/projects/query_log_list_params.py +++ b/src/codex/types/projects/query_log_list_params.py @@ -21,12 +21,21 @@ class QueryLogListParams(TypedDict, total=False): custom_metadata: Optional[str] """Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}""" + failed_evals: Optional[List[str]] + """Filter by evals that failed""" + + guardrailed: Optional[bool] + """Filter by guardrailed status""" + limit: int offset: int order: Literal["asc", "desc"] + passed_evals: Optional[List[str]] + """Filter by evals that passed""" + primary_eval_issue: Optional[ List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] ] diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index bfd37cd..fa04904 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -6,7 +6,32 @@ from ..._models import BaseModel -__all__ = ["QueryLogListResponse", "QueryLog", "QueryLogContext"] +__all__ = [ + "QueryLogListResponse", + "QueryLog", + "QueryLogFormattedEscalationEvalScores", + "QueryLogFormattedEvalScores", + "QueryLogFormattedGuardrailEvalScores", + "QueryLogContext", +] + + +class QueryLogFormattedEscalationEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogFormattedEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogFormattedGuardrailEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] class QueryLogContext(BaseModel): @@ -31,7 +56,9 @@ class QueryLog(BaseModel): created_at: datetime - formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None + formatted_escalation_eval_scores: Optional[Dict[str, QueryLogFormattedEscalationEvalScores]] = None + + formatted_eval_scores: Optional[Dict[str, QueryLogFormattedEvalScores]] = None """Format evaluation scores for frontend display with pass/fail status. Returns: Dictionary mapping eval keys to their formatted representation: { @@ -39,6 +66,8 @@ class QueryLog(BaseModel): eval_scores is None. """ + formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogFormattedGuardrailEvalScores]] = None + is_bad_response: bool project_id: str @@ -62,6 +91,9 @@ class QueryLog(BaseModel): escalated: Optional[bool] = None """If true, the question was escalated to Codex for an SME to review""" + escalation_evals: Optional[List[str]] = None + """Evals that should trigger escalation to SME""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -74,6 +106,9 @@ class QueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + guardrail_evals: Optional[List[str]] = None + """Evals that should trigger guardrail""" + guardrailed: Optional[bool] = None """If true, the response was guardrailed""" diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 3b813ee..8bb6128 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -6,7 +6,31 @@ from ..._models import BaseModel -__all__ = ["QueryLogRetrieveResponse", "Context"] +__all__ = [ + "QueryLogRetrieveResponse", + "FormattedEscalationEvalScores", + "FormattedEvalScores", + "FormattedGuardrailEvalScores", + "Context", +] + + +class FormattedEscalationEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class FormattedEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class FormattedGuardrailEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] class Context(BaseModel): @@ -31,7 +55,9 @@ class QueryLogRetrieveResponse(BaseModel): created_at: datetime - formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None + formatted_escalation_eval_scores: Optional[Dict[str, FormattedEscalationEvalScores]] = None + + formatted_eval_scores: Optional[Dict[str, FormattedEvalScores]] = None """Format evaluation scores for frontend display with pass/fail status. Returns: Dictionary mapping eval keys to their formatted representation: { @@ -39,6 +65,8 @@ class QueryLogRetrieveResponse(BaseModel): eval_scores is None. """ + formatted_guardrail_eval_scores: Optional[Dict[str, FormattedGuardrailEvalScores]] = None + is_bad_response: bool project_id: str @@ -62,6 +90,9 @@ class QueryLogRetrieveResponse(BaseModel): escalated: Optional[bool] = None """If true, the question was escalated to Codex for an SME to review""" + escalation_evals: Optional[List[str]] = None + """Evals that should trigger escalation to SME""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -74,6 +105,9 @@ class QueryLogRetrieveResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + guardrail_evals: Optional[List[str]] = None + """Evals that should trigger guardrail""" + guardrailed: Optional[bool] = None """If true, the response was guardrailed""" diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 4f9682b..b2315aa 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -6,7 +6,32 @@ from ..._models import BaseModel -__all__ = ["RemediationListResolvedLogsResponse", "QueryLog", "QueryLogContext"] +__all__ = [ + "RemediationListResolvedLogsResponse", + "QueryLog", + "QueryLogFormattedEscalationEvalScores", + "QueryLogFormattedEvalScores", + "QueryLogFormattedGuardrailEvalScores", + "QueryLogContext", +] + + +class QueryLogFormattedEscalationEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogFormattedEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] + + +class QueryLogFormattedGuardrailEvalScores(BaseModel): + score: float + + status: Literal["pass", "fail"] class QueryLogContext(BaseModel): @@ -31,7 +56,9 @@ class QueryLog(BaseModel): created_at: datetime - formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None + formatted_escalation_eval_scores: Optional[Dict[str, QueryLogFormattedEscalationEvalScores]] = None + + formatted_eval_scores: Optional[Dict[str, QueryLogFormattedEvalScores]] = None """Format evaluation scores for frontend display with pass/fail status. Returns: Dictionary mapping eval keys to their formatted representation: { @@ -39,6 +66,8 @@ class QueryLog(BaseModel): eval_scores is None. """ + formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogFormattedGuardrailEvalScores]] = None + is_bad_response: bool project_id: str @@ -62,6 +91,9 @@ class QueryLog(BaseModel): escalated: Optional[bool] = None """If true, the question was escalated to Codex for an SME to review""" + escalation_evals: Optional[List[str]] = None + """Evals that should trigger escalation to SME""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -74,6 +106,9 @@ class QueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + guardrail_evals: Optional[List[str]] = None + """Evals that should trigger guardrail""" + guardrailed: Optional[bool] = None """If true, the response was guardrailed""" diff --git a/tests/api_resources/projects/test_query_logs.py b/tests/api_resources/projects/test_query_logs.py index d75dcab..68d78ce 100644 --- a/tests/api_resources/projects/test_query_logs.py +++ b/tests/api_resources/projects/test_query_logs.py @@ -92,9 +92,12 @@ def test_method_list_with_all_params(self, client: Codex) -> None: created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"), created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"), custom_metadata="custom_metadata", + failed_evals=["string"], + guardrailed=True, limit=1, offset=0, order="asc", + passed_evals=["string"], primary_eval_issue=["hallucination"], sort="created_at", was_cache_hit=True, @@ -151,9 +154,13 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None: created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"), created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"), custom_metadata="custom_metadata", + failed_evals=["string"], + guardrailed=True, limit=1, + needs_review=True, offset=0, order="asc", + passed_evals=["string"], primary_eval_issue=["hallucination"], remediation_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], sort="created_at", @@ -211,9 +218,13 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None: created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"), created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"), custom_metadata="custom_metadata", + failed_evals=["string"], + guardrailed=True, limit=1, + needs_review=True, offset=0, order="asc", + passed_evals=["string"], primary_eval_issue=["hallucination"], sort="created_at", was_cache_hit=True, @@ -380,9 +391,12 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"), created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"), custom_metadata="custom_metadata", + failed_evals=["string"], + guardrailed=True, limit=1, offset=0, order="asc", + passed_evals=["string"], primary_eval_issue=["hallucination"], sort="created_at", was_cache_hit=True, @@ -439,9 +453,13 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"), created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"), custom_metadata="custom_metadata", + failed_evals=["string"], + guardrailed=True, limit=1, + needs_review=True, offset=0, order="asc", + passed_evals=["string"], primary_eval_issue=["hallucination"], remediation_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], sort="created_at", @@ -499,9 +517,13 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"), created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"), custom_metadata="custom_metadata", + failed_evals=["string"], + guardrailed=True, limit=1, + needs_review=True, offset=0, order="asc", + passed_evals=["string"], primary_eval_issue=["hallucination"], sort="created_at", was_cache_hit=True, diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index e7d7eb1..0764d9a 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -206,8 +206,6 @@ def test_path_params_retrieve(self, client: Codex) -> None: def test_method_update(self, client: Codex) -> None: project = client.projects.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - config={}, - name="name", ) assert_matches_type(ProjectReturnSchema, project, path=["response"]) @@ -216,6 +214,7 @@ def test_method_update(self, client: Codex) -> None: def test_method_update_with_all_params(self, client: Codex) -> None: project = client.projects.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + auto_clustering_enabled=True, config={ "clustering_use_llm_matching": True, "eval_config": { @@ -298,9 +297,8 @@ def test_method_update_with_all_params(self, client: Codex) -> None: "query_use_llm_matching": True, "upper_llm_match_distance_threshold": 0, }, - name="name", - auto_clustering_enabled=True, description="description", + name="name", ) assert_matches_type(ProjectReturnSchema, project, path=["response"]) @@ -309,8 +307,6 @@ def test_method_update_with_all_params(self, client: Codex) -> None: def test_raw_response_update(self, client: Codex) -> None: response = client.projects.with_raw_response.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - config={}, - name="name", ) assert response.is_closed is True @@ -323,8 +319,6 @@ def test_raw_response_update(self, client: Codex) -> None: def test_streaming_response_update(self, client: Codex) -> None: with client.projects.with_streaming_response.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - config={}, - name="name", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -340,8 +334,6 @@ def test_path_params_update(self, client: Codex) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): client.projects.with_raw_response.update( project_id="", - config={}, - name="name", ) @pytest.mark.skip() @@ -919,8 +911,6 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None: async def test_method_update(self, async_client: AsyncCodex) -> None: project = await async_client.projects.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - config={}, - name="name", ) assert_matches_type(ProjectReturnSchema, project, path=["response"]) @@ -929,6 +919,7 @@ async def test_method_update(self, async_client: AsyncCodex) -> None: async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> None: project = await async_client.projects.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + auto_clustering_enabled=True, config={ "clustering_use_llm_matching": True, "eval_config": { @@ -1011,9 +1002,8 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> "query_use_llm_matching": True, "upper_llm_match_distance_threshold": 0, }, - name="name", - auto_clustering_enabled=True, description="description", + name="name", ) assert_matches_type(ProjectReturnSchema, project, path=["response"]) @@ -1022,8 +1012,6 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> async def test_raw_response_update(self, async_client: AsyncCodex) -> None: response = await async_client.projects.with_raw_response.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - config={}, - name="name", ) assert response.is_closed is True @@ -1036,8 +1024,6 @@ async def test_raw_response_update(self, async_client: AsyncCodex) -> None: async def test_streaming_response_update(self, async_client: AsyncCodex) -> None: async with async_client.projects.with_streaming_response.update( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - config={}, - name="name", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -1053,8 +1039,6 @@ async def test_path_params_update(self, async_client: AsyncCodex) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): await async_client.projects.with_raw_response.update( project_id="", - config={}, - name="name", ) @pytest.mark.skip()