Skip to content

feat: enhance error handling and validation across utility modules #972

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
284 changes: 236 additions & 48 deletions scrapegraphai/utils/code_error_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
"""

import json
from typing import Any, Dict
from typing import Any, Dict, Optional

from pydantic import BaseModel, Field, validator
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

Expand All @@ -25,7 +26,77 @@
)


def syntax_focused_analysis(state: dict, llm_model) -> str:
class AnalysisError(Exception):
"""Base exception for code analysis errors."""
pass


class InvalidStateError(AnalysisError):
"""Exception raised when state dictionary is missing required keys."""
pass


class CodeAnalysisState(BaseModel):
"""Base model for code analysis state validation."""
generated_code: str = Field(..., description="The generated code to analyze")
errors: Dict[str, Any] = Field(..., description="Dictionary containing error information")

@validator('errors')
def validate_errors(cls, v):
"""Ensure errors dictionary has expected structure."""
if not isinstance(v, dict):
raise ValueError("errors must be a dictionary")
return v


class ExecutionAnalysisState(CodeAnalysisState):
"""Model for execution analysis state validation."""
html_code: Optional[str] = Field(None, description="HTML code if available")
html_analysis: Optional[str] = Field(None, description="Analysis of HTML code")

@validator('errors')
def validate_execution_errors(cls, v):
"""Ensure errors dictionary contains execution key."""
super().validate_errors(v)
if 'execution' not in v:
raise ValueError("errors dictionary must contain 'execution' key")
return v


class ValidationAnalysisState(CodeAnalysisState):
"""Model for validation analysis state validation."""
json_schema: Dict[str, Any] = Field(..., description="JSON schema for validation")
execution_result: Any = Field(..., description="Result of code execution")

@validator('errors')
def validate_validation_errors(cls, v):
"""Ensure errors dictionary contains validation key."""
super().validate_errors(v)
if 'validation' not in v:
raise ValueError("errors dictionary must contain 'validation' key")
return v


def get_optimal_analysis_template(error_type: str) -> str:
"""
Returns the optimal prompt template based on the error type.

Args:
error_type (str): Type of error to analyze.

Returns:
str: The prompt template text.
"""
template_registry = {
"syntax": TEMPLATE_SYNTAX_ANALYSIS,
"execution": TEMPLATE_EXECUTION_ANALYSIS,
"validation": TEMPLATE_VALIDATION_ANALYSIS,
"semantic": TEMPLATE_SEMANTIC_ANALYSIS,
}
return template_registry.get(error_type, TEMPLATE_SYNTAX_ANALYSIS)


def syntax_focused_analysis(state: Dict[str, Any], llm_model) -> str:
"""
Analyzes the syntax errors in the generated code.

Expand All @@ -35,17 +106,48 @@ def syntax_focused_analysis(state: dict, llm_model) -> str:

Returns:
str: The result of the syntax error analysis.

Raises:
InvalidStateError: If state is missing required keys.

Example:
>>> state = {
'generated_code': 'print("Hello World")',
'errors': {'syntax': 'Missing parenthesis'}
}
>>> analysis = syntax_focused_analysis(state, mock_llm)
"""
prompt = PromptTemplate(
template=TEMPLATE_SYNTAX_ANALYSIS, input_variables=["generated_code", "errors"]
)
chain = prompt | llm_model | StrOutputParser()
return chain.invoke(
{"generated_code": state["generated_code"], "errors": state["errors"]["syntax"]}
)
try:
# Validate state using Pydantic model
validated_state = CodeAnalysisState(
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {})
)

# Check if syntax errors exist
if "syntax" not in validated_state.errors:
raise InvalidStateError("No syntax errors found in state dictionary")

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("syntax"),
input_variables=["generated_code", "errors"]
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated state
return chain.invoke({
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["syntax"]
})

except KeyError as e:
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
except Exception as e:
raise AnalysisError(f"Syntax analysis failed: {str(e)}")


def execution_focused_analysis(state: dict, llm_model) -> str:
def execution_focused_analysis(state: Dict[str, Any], llm_model) -> str:
"""
Analyzes the execution errors in the generated code and HTML code.

Expand All @@ -55,23 +157,50 @@ def execution_focused_analysis(state: dict, llm_model) -> str:

Returns:
str: The result of the execution error analysis.
"""
prompt = PromptTemplate(
template=TEMPLATE_EXECUTION_ANALYSIS,
input_variables=["generated_code", "errors", "html_code", "html_analysis"],
)
chain = prompt | llm_model | StrOutputParser()
return chain.invoke(
{
"generated_code": state["generated_code"],
"errors": state["errors"]["execution"],
"html_code": state["html_code"],
"html_analysis": state["html_analysis"],

Raises:
InvalidStateError: If state is missing required keys.

Example:
>>> state = {
'generated_code': 'print(x)',
'errors': {'execution': 'NameError: name "x" is not defined'},
'html_code': '<div>Test</div>',
'html_analysis': 'Valid HTML'
}
)
>>> analysis = execution_focused_analysis(state, mock_llm)
"""
try:
# Validate state using Pydantic model
validated_state = ExecutionAnalysisState(
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {}),
html_code=state.get("html_code", ""),
html_analysis=state.get("html_analysis", "")
)

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("execution"),
input_variables=["generated_code", "errors", "html_code", "html_analysis"],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated state
return chain.invoke({
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["execution"],
"html_code": validated_state.html_code,
"html_analysis": validated_state.html_analysis,
})

except KeyError as e:
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
except Exception as e:
raise AnalysisError(f"Execution analysis failed: {str(e)}")


def validation_focused_analysis(state: dict, llm_model) -> str:
def validation_focused_analysis(state: Dict[str, Any], llm_model) -> str:
"""
Analyzes the validation errors in the generated code based on a JSON schema.

Expand All @@ -82,24 +211,51 @@ def validation_focused_analysis(state: dict, llm_model) -> str:

Returns:
str: The result of the validation error analysis.
"""
prompt = PromptTemplate(
template=TEMPLATE_VALIDATION_ANALYSIS,
input_variables=["generated_code", "errors", "json_schema", "execution_result"],
)
chain = prompt | llm_model | StrOutputParser()
return chain.invoke(
{
"generated_code": state["generated_code"],
"errors": state["errors"]["validation"],
"json_schema": state["json_schema"],
"execution_result": state["execution_result"],

Raises:
InvalidStateError: If state is missing required keys.

Example:
>>> state = {
'generated_code': 'return {"name": "John"}',
'errors': {'validation': 'Missing required field: age'},
'json_schema': {'required': ['name', 'age']},
'execution_result': {'name': 'John'}
}
)
>>> analysis = validation_focused_analysis(state, mock_llm)
"""
try:
# Validate state using Pydantic model
validated_state = ValidationAnalysisState(
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {}),
json_schema=state.get("json_schema", {}),
execution_result=state.get("execution_result", {})
)

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("validation"),
input_variables=["generated_code", "errors", "json_schema", "execution_result"],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated state
return chain.invoke({
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["validation"],
"json_schema": validated_state.json_schema,
"execution_result": validated_state.execution_result,
})

except KeyError as e:
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
except Exception as e:
raise AnalysisError(f"Validation analysis failed: {str(e)}")


def semantic_focused_analysis(
state: dict, comparison_result: Dict[str, Any], llm_model
state: Dict[str, Any], comparison_result: Dict[str, Any], llm_model
) -> str:
"""
Analyzes the semantic differences in the generated code based on a comparison result.
Expand All @@ -112,16 +268,48 @@ def semantic_focused_analysis(

Returns:
str: The result of the semantic error analysis.

Raises:
InvalidStateError: If state or comparison_result is missing required keys.

Example:
>>> state = {
'generated_code': 'def add(a, b): return a + b'
}
>>> comparison_result = {
'differences': ['Missing docstring', 'No type hints'],
'explanation': 'The code is missing documentation'
}
>>> analysis = semantic_focused_analysis(state, comparison_result, mock_llm)
"""
prompt = PromptTemplate(
template=TEMPLATE_SEMANTIC_ANALYSIS,
input_variables=["generated_code", "differences", "explanation"],
)
chain = prompt | llm_model | StrOutputParser()
return chain.invoke(
{
"generated_code": state["generated_code"],
try:
# Validate state using Pydantic model
validated_state = CodeAnalysisState(
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {})
)

# Validate comparison_result
if "differences" not in comparison_result:
raise InvalidStateError("comparison_result missing 'differences' key")
if "explanation" not in comparison_result:
raise InvalidStateError("comparison_result missing 'explanation' key")

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("semantic"),
input_variables=["generated_code", "differences", "explanation"],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated inputs
return chain.invoke({
"generated_code": validated_state.generated_code,
"differences": json.dumps(comparison_result["differences"], indent=2),
"explanation": comparison_result["explanation"],
}
)
})

except KeyError as e:
raise InvalidStateError(f"Missing required key: {e}")
except Exception as e:
raise AnalysisError(f"Semantic analysis failed: {str(e)}")
Loading
Loading