diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2f7778a..bbb722b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,6 +17,7 @@ jobs:
     timeout-minutes: 10
     name: lint
     runs-on: ${{ github.repository == 'stainless-sdks/codex-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
       - uses: actions/checkout@v4
 
@@ -35,7 +36,7 @@ jobs:
         run: ./scripts/lint
 
   upload:
-    if: github.repository == 'stainless-sdks/codex-python'
+    if: github.repository == 'stainless-sdks/codex-python' && (github.event_name == 'push' || github.event.pull_request.head.repo.fork)
     timeout-minutes: 10
     name: upload
     permissions:
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index aa84875..1c0bb88 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.22"
+  ".": "0.1.0-alpha.23"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 04c1386..c509062 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 65
-openapi_spec_hash: 80696dc202de8bacc0e43506d7c210b0
+openapi_spec_hash: f63d4542b4bd1530ced013eb686cab99
 config_hash: 14b2643a0ec60cf326dfed00939644ff
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b2988f..80023ff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Changelog
 
+## 0.1.0-alpha.23 (2025-06-30)
+
+Full Changelog: [v0.1.0-alpha.22...v0.1.0-alpha.23](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.22...v0.1.0-alpha.23)
+
+### Features
+
+* **api:** api update ([31096f4](https://github.com/cleanlab/codex-python/commit/31096f4820a7bfdd204b0a2d1d84ab1e36e32d0c))
+* **api:** api update ([be06884](https://github.com/cleanlab/codex-python/commit/be06884d321ca5009c9d82346c1b74c7429f82fa))
+* **api:** api update ([41b210d](https://github.com/cleanlab/codex-python/commit/41b210dc69c2b9c45eeab01a0afac6a4563d41f2))
+
+
+### Bug Fixes
+
+* **ci:** correct conditional ([45d3bc0](https://github.com/cleanlab/codex-python/commit/45d3bc05ab56d3e67d036ce84b2c9a1f2d8cfd69))
+* **ci:** release-doctor — report correct token name ([1a5e444](https://github.com/cleanlab/codex-python/commit/1a5e444226c829392181d98bc06f8cfb8bf13bd9))
+
+
+### Chores
+
+* **ci:** only run for pushes and fork pull requests ([6b590bd](https://github.com/cleanlab/codex-python/commit/6b590bd454e939b8453d95c239ee85be1a326909))
+
 ## 0.1.0-alpha.22 (2025-06-24)
 
 Full Changelog: [v0.1.0-alpha.21...v0.1.0-alpha.22](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.21...v0.1.0-alpha.22)
diff --git a/bin/check-release-environment b/bin/check-release-environment
index a1446a7..b845b0f 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -3,7 +3,7 @@
 errors=()
 
 if [ -z "${PYPI_TOKEN}" ]; then
-  errors+=("The CODEX_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+  errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
 lenErrors=${#errors[@]}
diff --git a/pyproject.toml b/pyproject.toml
index b71f9f0..16e362c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "codex-sdk"
-version = "0.1.0-alpha.22"
+version = "0.1.0-alpha.23"
 description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/codex/_version.py b/src/codex/_version.py
index a88a1c3..18f2dcb 100644
--- a/src/codex/_version.py
+++ b/src/codex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "codex"
-__version__ = "0.1.0-alpha.22"  # x-release-please-version
+__version__ = "0.1.0-alpha.23"  # x-release-please-version
diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py
index 22b5caf..fc5a43a 100644
--- a/src/codex/resources/projects/projects.py
+++ b/src/codex/resources/projects/projects.py
@@ -212,10 +212,10 @@ def update(
         self,
         project_id: str,
         *,
-        config: project_update_params.Config,
-        name: str,
-        auto_clustering_enabled: bool | NotGiven = NOT_GIVEN,
+        auto_clustering_enabled: Optional[bool] | NotGiven = NOT_GIVEN,
+        config: Optional[project_update_params.Config] | NotGiven = NOT_GIVEN,
         description: Optional[str] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -241,10 +241,10 @@ def update(
             f"/api/projects/{project_id}",
             body=maybe_transform(
                 {
-                    "config": config,
-                    "name": name,
                     "auto_clustering_enabled": auto_clustering_enabled,
+                    "config": config,
                     "description": description,
+                    "name": name,
                 },
                 project_update_params.ProjectUpdateParams,
             ),
@@ -820,10 +820,10 @@ async def update(
         self,
         project_id: str,
         *,
-        config: project_update_params.Config,
-        name: str,
-        auto_clustering_enabled: bool | NotGiven = NOT_GIVEN,
+        auto_clustering_enabled: Optional[bool] | NotGiven = NOT_GIVEN,
+        config: Optional[project_update_params.Config] | NotGiven = NOT_GIVEN,
         description: Optional[str] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -849,10 +849,10 @@ async def update(
             f"/api/projects/{project_id}",
             body=await async_maybe_transform(
                 {
-                    "config": config,
-                    "name": name,
                     "auto_clustering_enabled": auto_clustering_enabled,
+                    "config": config,
                     "description": description,
+                    "name": name,
                 },
                 project_update_params.ProjectUpdateParams,
             ),
diff --git a/src/codex/resources/projects/query_logs.py b/src/codex/resources/projects/query_logs.py
index 32ec739..e97243e 100644
--- a/src/codex/resources/projects/query_logs.py
+++ b/src/codex/resources/projects/query_logs.py
@@ -92,9 +92,12 @@ def list(
         created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
+        failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         primary_eval_issue: Optional[
             List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
         ]
@@ -118,6 +121,12 @@ def list(
 
           custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}
 
+          failed_evals: Filter by evals that failed
+
+          guardrailed: Filter by guardrailed status
+
+          passed_evals: Filter by evals that passed
+
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
           was_cache_hit: Filter by cache hit status
@@ -144,9 +153,12 @@ def list(
                         "created_at_end": created_at_end,
                         "created_at_start": created_at_start,
                         "custom_metadata": custom_metadata,
+                        "failed_evals": failed_evals,
+                        "guardrailed": guardrailed,
                         "limit": limit,
                         "offset": offset,
                         "order": order,
+                        "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
                         "was_cache_hit": was_cache_hit,
@@ -164,9 +176,13 @@ def list_by_group(
         created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
+        failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         primary_eval_issue: Optional[
             List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
         ]
@@ -191,6 +207,14 @@ def list_by_group(
 
           custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}
 
+          failed_evals: Filter by evals that failed
+
+          guardrailed: Filter by guardrailed status
+
+          needs_review: Filter logs that need review
+
+          passed_evals: Filter by evals that passed
+
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
           remediation_ids: List of groups to list child logs for
@@ -219,9 +243,13 @@ def list_by_group(
                         "created_at_end": created_at_end,
                         "created_at_start": created_at_start,
                         "custom_metadata": custom_metadata,
+                        "failed_evals": failed_evals,
+                        "guardrailed": guardrailed,
                         "limit": limit,
+                        "needs_review": needs_review,
                         "offset": offset,
                         "order": order,
+                        "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "remediation_ids": remediation_ids,
                         "sort": sort,
@@ -240,9 +268,13 @@ def list_groups(
         created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
+        failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         primary_eval_issue: Optional[
             List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
         ]
@@ -267,6 +299,14 @@ def list_groups(
 
           custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}
 
+          failed_evals: Filter by evals that failed
+
+          guardrailed: Filter by guardrailed status
+
+          needs_review: Filter log groups that need review
+
+          passed_evals: Filter by evals that passed
+
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
           was_cache_hit: Filter by cache hit status
@@ -293,9 +333,13 @@ def list_groups(
                         "created_at_end": created_at_end,
                         "created_at_start": created_at_start,
                         "custom_metadata": custom_metadata,
+                        "failed_evals": failed_evals,
+                        "guardrailed": guardrailed,
                         "limit": limit,
+                        "needs_review": needs_review,
                         "offset": offset,
                         "order": order,
+                        "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
                         "was_cache_hit": was_cache_hit,
@@ -406,9 +450,12 @@ async def list(
         created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
+        failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         primary_eval_issue: Optional[
             List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
         ]
@@ -432,6 +479,12 @@ async def list(
 
           custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}
 
+          failed_evals: Filter by evals that failed
+
+          guardrailed: Filter by guardrailed status
+
+          passed_evals: Filter by evals that passed
+
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
           was_cache_hit: Filter by cache hit status
@@ -458,9 +511,12 @@ async def list(
                         "created_at_end": created_at_end,
                         "created_at_start": created_at_start,
                         "custom_metadata": custom_metadata,
+                        "failed_evals": failed_evals,
+                        "guardrailed": guardrailed,
                         "limit": limit,
                         "offset": offset,
                         "order": order,
+                        "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
                         "was_cache_hit": was_cache_hit,
@@ -478,9 +534,13 @@ async def list_by_group(
         created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
+        failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         primary_eval_issue: Optional[
             List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
         ]
@@ -505,6 +565,14 @@ async def list_by_group(
 
           custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}
 
+          failed_evals: Filter by evals that failed
+
+          guardrailed: Filter by guardrailed status
+
+          needs_review: Filter logs that need review
+
+          passed_evals: Filter by evals that passed
+
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
           remediation_ids: List of groups to list child logs for
@@ -533,9 +601,13 @@ async def list_by_group(
                         "created_at_end": created_at_end,
                         "created_at_start": created_at_start,
                         "custom_metadata": custom_metadata,
+                        "failed_evals": failed_evals,
+                        "guardrailed": guardrailed,
                         "limit": limit,
+                        "needs_review": needs_review,
                         "offset": offset,
                         "order": order,
+                        "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "remediation_ids": remediation_ids,
                         "sort": sort,
@@ -554,9 +626,13 @@ async def list_groups(
         created_at_end: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         created_at_start: Union[str, datetime, None] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
+        failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        passed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         primary_eval_issue: Optional[
             List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
         ]
@@ -581,6 +657,14 @@ async def list_groups(
 
           custom_metadata: Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}
 
+          failed_evals: Filter by evals that failed
+
+          guardrailed: Filter by guardrailed status
+
+          needs_review: Filter log groups that need review
+
+          passed_evals: Filter by evals that passed
+
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
           was_cache_hit: Filter by cache hit status
@@ -607,9 +691,13 @@ async def list_groups(
                         "created_at_end": created_at_end,
                         "created_at_start": created_at_start,
                         "custom_metadata": custom_metadata,
+                        "failed_evals": failed_evals,
+                        "guardrailed": guardrailed,
                         "limit": limit,
+                        "needs_review": needs_review,
                         "offset": offset,
                         "order": order,
+                        "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
                         "was_cache_hit": was_cache_hit,
diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py
index 73dad67..3e24441 100644
--- a/src/codex/types/project_update_params.py
+++ b/src/codex/types/project_update_params.py
@@ -21,14 +21,14 @@
 
 
 class ProjectUpdateParams(TypedDict, total=False):
-    config: Required[Config]
+    auto_clustering_enabled: Optional[bool]
 
-    name: Required[str]
-
-    auto_clustering_enabled: bool
+    config: Optional[Config]
 
     description: Optional[str]
 
+    name: Optional[str]
+
 
 class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
     criteria: Required[str]
diff --git a/src/codex/types/projects/query_log_list_by_group_params.py b/src/codex/types/projects/query_log_list_by_group_params.py
index 66166a1..b44970a 100644
--- a/src/codex/types/projects/query_log_list_by_group_params.py
+++ b/src/codex/types/projects/query_log_list_by_group_params.py
@@ -21,12 +21,24 @@ class QueryLogListByGroupParams(TypedDict, total=False):
     custom_metadata: Optional[str]
     """Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}"""
 
+    failed_evals: Optional[List[str]]
+    """Filter by evals that failed"""
+
+    guardrailed: Optional[bool]
+    """Filter by guardrailed status"""
+
     limit: int
 
+    needs_review: Optional[bool]
+    """Filter logs that need review"""
+
     offset: int
 
     order: Literal["asc", "desc"]
 
+    passed_evals: Optional[List[str]]
+    """Filter by evals that passed"""
+
     primary_eval_issue: Optional[
         List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
     ]
diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py
index d11d827..9d1e0e6 100644
--- a/src/codex/types/projects/query_log_list_by_group_response.py
+++ b/src/codex/types/projects/query_log_list_by_group_response.py
@@ -10,10 +10,31 @@
     "QueryLogListByGroupResponse",
     "QueryLogsByGroup",
     "QueryLogsByGroupQueryLog",
+    "QueryLogsByGroupQueryLogFormattedEscalationEvalScores",
+    "QueryLogsByGroupQueryLogFormattedEvalScores",
+    "QueryLogsByGroupQueryLogFormattedGuardrailEvalScores",
     "QueryLogsByGroupQueryLogContext",
 ]
 
 
+class QueryLogsByGroupQueryLogFormattedEscalationEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogsByGroupQueryLogFormattedEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogsByGroupQueryLogFormattedGuardrailEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
 class QueryLogsByGroupQueryLogContext(BaseModel):
     content: str
     """The actual content/text of the document."""
@@ -36,7 +57,9 @@ class QueryLogsByGroupQueryLog(BaseModel):
 
     created_at: datetime
 
-    formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None
+    formatted_escalation_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedEscalationEvalScores]] = None
+
+    formatted_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedEvalScores]] = None
     """Format evaluation scores for frontend display with pass/fail status.
 
     Returns: Dictionary mapping eval keys to their formatted representation: {
@@ -44,6 +67,8 @@ class QueryLogsByGroupQueryLog(BaseModel):
     eval_scores is None.
     """
 
+    formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedGuardrailEvalScores]] = None
+
     is_bad_response: bool
 
     project_id: str
@@ -67,6 +92,9 @@ class QueryLogsByGroupQueryLog(BaseModel):
     escalated: Optional[bool] = None
     """If true, the question was escalated to Codex for an SME to review"""
 
+    escalation_evals: Optional[List[str]] = None
+    """Evals that should trigger escalation to SME"""
+
     eval_issue_labels: Optional[List[str]] = None
     """Labels derived from evaluation scores"""
 
@@ -79,6 +107,9 @@ class QueryLogsByGroupQueryLog(BaseModel):
     evaluated_response: Optional[str] = None
     """The response being evaluated from the RAG system (before any remediation)"""
 
+    guardrail_evals: Optional[List[str]] = None
+    """Evals that should trigger guardrail"""
+
     guardrailed: Optional[bool] = None
     """If true, the response was guardrailed"""
 
diff --git a/src/codex/types/projects/query_log_list_groups_params.py b/src/codex/types/projects/query_log_list_groups_params.py
index 558ac0b..94d549f 100644
--- a/src/codex/types/projects/query_log_list_groups_params.py
+++ b/src/codex/types/projects/query_log_list_groups_params.py
@@ -21,12 +21,24 @@ class QueryLogListGroupsParams(TypedDict, total=False):
     custom_metadata: Optional[str]
     """Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}"""
 
+    failed_evals: Optional[List[str]]
+    """Filter by evals that failed"""
+
+    guardrailed: Optional[bool]
+    """Filter by guardrailed status"""
+
     limit: int
 
+    needs_review: Optional[bool]
+    """Filter log groups that need review"""
+
     offset: int
 
     order: Literal["asc", "desc"]
 
+    passed_evals: Optional[List[str]]
+    """Filter by evals that passed"""
+
     primary_eval_issue: Optional[
         List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
     ]
diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py
index fd87c30..7b77cc0 100644
--- a/src/codex/types/projects/query_log_list_groups_response.py
+++ b/src/codex/types/projects/query_log_list_groups_response.py
@@ -6,7 +6,32 @@
 
 from ..._models import BaseModel
 
-__all__ = ["QueryLogListGroupsResponse", "QueryLogGroup", "QueryLogGroupContext"]
+__all__ = [
+    "QueryLogListGroupsResponse",
+    "QueryLogGroup",
+    "QueryLogGroupFormattedEscalationEvalScores",
+    "QueryLogGroupFormattedEvalScores",
+    "QueryLogGroupFormattedGuardrailEvalScores",
+    "QueryLogGroupContext",
+]
+
+
+class QueryLogGroupFormattedEscalationEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogGroupFormattedEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogGroupFormattedGuardrailEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
 
 
 class QueryLogGroupContext(BaseModel):
@@ -31,7 +56,9 @@ class QueryLogGroup(BaseModel):
 
     created_at: datetime
 
-    formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None
+    formatted_escalation_eval_scores: Optional[Dict[str, QueryLogGroupFormattedEscalationEvalScores]] = None
+
+    formatted_eval_scores: Optional[Dict[str, QueryLogGroupFormattedEvalScores]] = None
     """Format evaluation scores for frontend display with pass/fail status.
 
     Returns: Dictionary mapping eval keys to their formatted representation: {
@@ -39,6 +66,8 @@ class QueryLogGroup(BaseModel):
     eval_scores is None.
     """
 
+    formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogGroupFormattedGuardrailEvalScores]] = None
+
     is_bad_response: bool
 
     needs_review: bool
@@ -68,6 +97,9 @@ class QueryLogGroup(BaseModel):
     escalated: Optional[bool] = None
     """If true, the question was escalated to Codex for an SME to review"""
 
+    escalation_evals: Optional[List[str]] = None
+    """Evals that should trigger escalation to SME"""
+
     eval_issue_labels: Optional[List[str]] = None
     """Labels derived from evaluation scores"""
 
@@ -80,6 +112,9 @@ class QueryLogGroup(BaseModel):
     evaluated_response: Optional[str] = None
     """The response being evaluated from the RAG system (before any remediation)"""
 
+    guardrail_evals: Optional[List[str]] = None
+    """Evals that should trigger guardrail"""
+
     guardrailed: Optional[bool] = None
     """If true, the response was guardrailed"""
 
diff --git a/src/codex/types/projects/query_log_list_params.py b/src/codex/types/projects/query_log_list_params.py
index 9cf3211..0f72b24 100644
--- a/src/codex/types/projects/query_log_list_params.py
+++ b/src/codex/types/projects/query_log_list_params.py
@@ -21,12 +21,21 @@ class QueryLogListParams(TypedDict, total=False):
     custom_metadata: Optional[str]
     """Filter by custom metadata as JSON string: {"key1": "value1", "key2": "value2"}"""
 
+    failed_evals: Optional[List[str]]
+    """Filter by evals that failed"""
+
+    guardrailed: Optional[bool]
+    """Filter by guardrailed status"""
+
     limit: int
 
     offset: int
 
     order: Literal["asc", "desc"]
 
+    passed_evals: Optional[List[str]]
+    """Filter by evals that passed"""
+
     primary_eval_issue: Optional[
         List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
     ]
diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py
index bfd37cd..fa04904 100644
--- a/src/codex/types/projects/query_log_list_response.py
+++ b/src/codex/types/projects/query_log_list_response.py
@@ -6,7 +6,32 @@
 
 from ..._models import BaseModel
 
-__all__ = ["QueryLogListResponse", "QueryLog", "QueryLogContext"]
+__all__ = [
+    "QueryLogListResponse",
+    "QueryLog",
+    "QueryLogFormattedEscalationEvalScores",
+    "QueryLogFormattedEvalScores",
+    "QueryLogFormattedGuardrailEvalScores",
+    "QueryLogContext",
+]
+
+
+class QueryLogFormattedEscalationEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogFormattedEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogFormattedGuardrailEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
 
 
 class QueryLogContext(BaseModel):
@@ -31,7 +56,9 @@ class QueryLog(BaseModel):
 
     created_at: datetime
 
-    formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None
+    formatted_escalation_eval_scores: Optional[Dict[str, QueryLogFormattedEscalationEvalScores]] = None
+
+    formatted_eval_scores: Optional[Dict[str, QueryLogFormattedEvalScores]] = None
     """Format evaluation scores for frontend display with pass/fail status.
 
     Returns: Dictionary mapping eval keys to their formatted representation: {
@@ -39,6 +66,8 @@ class QueryLog(BaseModel):
     eval_scores is None.
     """
 
+    formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogFormattedGuardrailEvalScores]] = None
+
     is_bad_response: bool
 
     project_id: str
@@ -62,6 +91,9 @@ class QueryLog(BaseModel):
     escalated: Optional[bool] = None
     """If true, the question was escalated to Codex for an SME to review"""
 
+    escalation_evals: Optional[List[str]] = None
+    """Evals that should trigger escalation to SME"""
+
     eval_issue_labels: Optional[List[str]] = None
     """Labels derived from evaluation scores"""
 
@@ -74,6 +106,9 @@ class QueryLog(BaseModel):
     evaluated_response: Optional[str] = None
     """The response being evaluated from the RAG system (before any remediation)"""
 
+    guardrail_evals: Optional[List[str]] = None
+    """Evals that should trigger guardrail"""
+
     guardrailed: Optional[bool] = None
     """If true, the response was guardrailed"""
 
diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py
index 3b813ee..8bb6128 100644
--- a/src/codex/types/projects/query_log_retrieve_response.py
+++ b/src/codex/types/projects/query_log_retrieve_response.py
@@ -6,7 +6,31 @@
 
 from ..._models import BaseModel
 
-__all__ = ["QueryLogRetrieveResponse", "Context"]
+__all__ = [
+    "QueryLogRetrieveResponse",
+    "FormattedEscalationEvalScores",
+    "FormattedEvalScores",
+    "FormattedGuardrailEvalScores",
+    "Context",
+]
+
+
+class FormattedEscalationEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class FormattedEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class FormattedGuardrailEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
 
 
 class Context(BaseModel):
@@ -31,7 +55,9 @@ class QueryLogRetrieveResponse(BaseModel):
 
     created_at: datetime
 
-    formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None
+    formatted_escalation_eval_scores: Optional[Dict[str, FormattedEscalationEvalScores]] = None
+
+    formatted_eval_scores: Optional[Dict[str, FormattedEvalScores]] = None
     """Format evaluation scores for frontend display with pass/fail status.
 
     Returns: Dictionary mapping eval keys to their formatted representation: {
@@ -39,6 +65,8 @@ class QueryLogRetrieveResponse(BaseModel):
     eval_scores is None.
     """
 
+    formatted_guardrail_eval_scores: Optional[Dict[str, FormattedGuardrailEvalScores]] = None
+
     is_bad_response: bool
 
     project_id: str
@@ -62,6 +90,9 @@ class QueryLogRetrieveResponse(BaseModel):
     escalated: Optional[bool] = None
     """If true, the question was escalated to Codex for an SME to review"""
 
+    escalation_evals: Optional[List[str]] = None
+    """Evals that should trigger escalation to SME"""
+
     eval_issue_labels: Optional[List[str]] = None
     """Labels derived from evaluation scores"""
 
@@ -74,6 +105,9 @@ class QueryLogRetrieveResponse(BaseModel):
     evaluated_response: Optional[str] = None
     """The response being evaluated from the RAG system (before any remediation)"""
 
+    guardrail_evals: Optional[List[str]] = None
+    """Evals that should trigger guardrail"""
+
     guardrailed: Optional[bool] = None
     """If true, the response was guardrailed"""
 
diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py
index 4f9682b..b2315aa 100644
--- a/src/codex/types/projects/remediation_list_resolved_logs_response.py
+++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py
@@ -6,7 +6,32 @@
 
 from ..._models import BaseModel
 
-__all__ = ["RemediationListResolvedLogsResponse", "QueryLog", "QueryLogContext"]
+__all__ = [
+    "RemediationListResolvedLogsResponse",
+    "QueryLog",
+    "QueryLogFormattedEscalationEvalScores",
+    "QueryLogFormattedEvalScores",
+    "QueryLogFormattedGuardrailEvalScores",
+    "QueryLogContext",
+]
+
+
+class QueryLogFormattedEscalationEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogFormattedEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
+
+
+class QueryLogFormattedGuardrailEvalScores(BaseModel):
+    score: float
+
+    status: Literal["pass", "fail"]
 
 
 class QueryLogContext(BaseModel):
@@ -31,7 +56,9 @@ class QueryLog(BaseModel):
 
     created_at: datetime
 
-    formatted_eval_scores: Optional[Dict[str, Dict[str, Union[float, Literal["pass", "fail"]]]]] = None
+    formatted_escalation_eval_scores: Optional[Dict[str, QueryLogFormattedEscalationEvalScores]] = None
+
+    formatted_eval_scores: Optional[Dict[str, QueryLogFormattedEvalScores]] = None
     """Format evaluation scores for frontend display with pass/fail status.
 
     Returns: Dictionary mapping eval keys to their formatted representation: {
@@ -39,6 +66,8 @@ class QueryLog(BaseModel):
     eval_scores is None.
     """
 
+    formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogFormattedGuardrailEvalScores]] = None
+
     is_bad_response: bool
 
     project_id: str
@@ -62,6 +91,9 @@ class QueryLog(BaseModel):
     escalated: Optional[bool] = None
     """If true, the question was escalated to Codex for an SME to review"""
 
+    escalation_evals: Optional[List[str]] = None
+    """Evals that should trigger escalation to SME"""
+
     eval_issue_labels: Optional[List[str]] = None
     """Labels derived from evaluation scores"""
 
@@ -74,6 +106,9 @@ class QueryLog(BaseModel):
     evaluated_response: Optional[str] = None
     """The response being evaluated from the RAG system (before any remediation)"""
 
+    guardrail_evals: Optional[List[str]] = None
+    """Evals that should trigger guardrail"""
+
     guardrailed: Optional[bool] = None
     """If true, the response was guardrailed"""
 
diff --git a/tests/api_resources/projects/test_query_logs.py b/tests/api_resources/projects/test_query_logs.py
index d75dcab..68d78ce 100644
--- a/tests/api_resources/projects/test_query_logs.py
+++ b/tests/api_resources/projects/test_query_logs.py
@@ -92,9 +92,12 @@ def test_method_list_with_all_params(self, client: Codex) -> None:
             created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"),
             custom_metadata="custom_metadata",
+            failed_evals=["string"],
+            guardrailed=True,
             limit=1,
             offset=0,
             order="asc",
+            passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
             was_cache_hit=True,
@@ -151,9 +154,13 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None:
             created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"),
             custom_metadata="custom_metadata",
+            failed_evals=["string"],
+            guardrailed=True,
             limit=1,
+            needs_review=True,
             offset=0,
             order="asc",
+            passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             remediation_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"],
             sort="created_at",
@@ -211,9 +218,13 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None:
             created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"),
             custom_metadata="custom_metadata",
+            failed_evals=["string"],
+            guardrailed=True,
             limit=1,
+            needs_review=True,
             offset=0,
             order="asc",
+            passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
             was_cache_hit=True,
@@ -380,9 +391,12 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No
             created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"),
             custom_metadata="custom_metadata",
+            failed_evals=["string"],
+            guardrailed=True,
             limit=1,
             offset=0,
             order="asc",
+            passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
             was_cache_hit=True,
@@ -439,9 +453,13 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod
             created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"),
             custom_metadata="custom_metadata",
+            failed_evals=["string"],
+            guardrailed=True,
             limit=1,
+            needs_review=True,
             offset=0,
             order="asc",
+            passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             remediation_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"],
             sort="created_at",
@@ -499,9 +517,13 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex
             created_at_end=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_at_start=parse_datetime("2019-12-27T18:11:19.117Z"),
             custom_metadata="custom_metadata",
+            failed_evals=["string"],
+            guardrailed=True,
             limit=1,
+            needs_review=True,
             offset=0,
             order="asc",
+            passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
             was_cache_hit=True,
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index e7d7eb1..0764d9a 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -206,8 +206,6 @@ def test_path_params_retrieve(self, client: Codex) -> None:
     def test_method_update(self, client: Codex) -> None:
         project = client.projects.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            config={},
-            name="name",
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
@@ -216,6 +214,7 @@ def test_method_update(self, client: Codex) -> None:
     def test_method_update_with_all_params(self, client: Codex) -> None:
         project = client.projects.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            auto_clustering_enabled=True,
             config={
                 "clustering_use_llm_matching": True,
                 "eval_config": {
@@ -298,9 +297,8 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
                 "query_use_llm_matching": True,
                 "upper_llm_match_distance_threshold": 0,
             },
-            name="name",
-            auto_clustering_enabled=True,
             description="description",
+            name="name",
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
@@ -309,8 +307,6 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
     def test_raw_response_update(self, client: Codex) -> None:
         response = client.projects.with_raw_response.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            config={},
-            name="name",
         )
 
         assert response.is_closed is True
@@ -323,8 +319,6 @@ def test_raw_response_update(self, client: Codex) -> None:
     def test_streaming_response_update(self, client: Codex) -> None:
         with client.projects.with_streaming_response.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            config={},
-            name="name",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -340,8 +334,6 @@ def test_path_params_update(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
             client.projects.with_raw_response.update(
                 project_id="",
-                config={},
-                name="name",
             )
 
     @pytest.mark.skip()
@@ -919,8 +911,6 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
     async def test_method_update(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            config={},
-            name="name",
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
@@ -929,6 +919,7 @@ async def test_method_update(self, async_client: AsyncCodex) -> None:
     async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            auto_clustering_enabled=True,
             config={
                 "clustering_use_llm_matching": True,
                 "eval_config": {
@@ -1011,9 +1002,8 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
                 "query_use_llm_matching": True,
                 "upper_llm_match_distance_threshold": 0,
             },
-            name="name",
-            auto_clustering_enabled=True,
             description="description",
+            name="name",
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
@@ -1022,8 +1012,6 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
     async def test_raw_response_update(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            config={},
-            name="name",
         )
 
         assert response.is_closed is True
@@ -1036,8 +1024,6 @@ async def test_raw_response_update(self, async_client: AsyncCodex) -> None:
     async def test_streaming_response_update(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.update(
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            config={},
-            name="name",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1053,8 +1039,6 @@ async def test_path_params_update(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
             await async_client.projects.with_raw_response.update(
                 project_id="",
-                config={},
-                name="name",
             )
 
     @pytest.mark.skip()