Skip to content

Table desc #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion graphdoc/docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@
def setup(app):
# Create static directory if it doesn't exist to avoid the warning
if not os.path.exists(os.path.join(os.path.dirname(__file__), "_static")):
os.makedirs(os.path.join(os.path.dirname(__file__), "_static"))
os.makedirs(os.path.join(os.path.dirname(__file__), "_static"))
2 changes: 2 additions & 0 deletions graphdoc/graphdoc/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
check_directory_path,
check_file_path,
load_yaml_config,
load_yaml_config_redacted,
setup_logging,
)
from graphdoc.data.local import LocalDataHelper
Expand Down Expand Up @@ -45,4 +46,5 @@
"SchemaRating",
"SchemaType",
"schema_objects_to_dataset",
"load_yaml_config_redacted",
]
30 changes: 30 additions & 0 deletions graphdoc/graphdoc/data/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,36 @@ def load_yaml_config(file_path: Union[str, Path], use_env: bool = True) -> dict:
return yaml.load(file, Loader=SafeLoader)


def load_yaml_config_redacted(
file_path: Union[str, Path], replace_value: str = "redacted"
) -> dict:
"""Load a YAML configuration file with environment variables redacted.

:param file_path: The path to the YAML file.
:type file_path: Union[str, Path]
:param replace_value: The value to replace the environment variables with.
:type replace_value: str
:return: The YAML configuration with env vars replaced by "redacted".
:rtype: dict
:raises ValueError: If the path does not resolve to a valid file.

"""

def _redacted_env_constructor(loader, node):
return replace_value

SafeLoader.add_constructor("!env", _redacted_env_constructor)

_file_path = Path(file_path).resolve()
if not _file_path.is_file():
raise ValueError(
f"The provided path does not resolve to a valid file: {file_path}"
)

with open(_file_path, "r") as file:
return yaml.load(file, Loader=SafeLoader)


def setup_logging(
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
):
Expand Down
5 changes: 4 additions & 1 deletion graphdoc/graphdoc/modules/doc_generator_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,13 @@ def _predict(self, database_schema: str) -> dspy.Prediction:
if self.fill_empty_descriptions:
updated_ast = self.par.fill_empty_descriptions(database_ast)
database_schema = print_ast(updated_ast)
else:
database_schema = print_ast(database_ast)

# try to generate the schema
try:
prediction = self.prompt.infer(database_schema=database_schema)
log.info("Generated schema: " + str(prediction.documented_schema))
except Exception as e:
log.warning("Error generating schema: " + str(e))
return dspy.Prediction(documented_schema=database_schema)
Expand Down Expand Up @@ -367,7 +370,7 @@ def document_full_schema(
updated_ast = self.par.fill_empty_descriptions(document_ast)
return_schema = print_ast(updated_ast)
else:
return_schema = database_schema
return_schema = print_ast(document_ast)
status = "ERROR"

if trace:
Expand Down
1 change: 1 addition & 0 deletions graphdoc/graphdoc/prompts/schema_doc_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class DocGeneratorSignature(dspy.Signature):
- Descriptions should be factual, straightforward, and avoid any speculative language.
- Refrain from using the phrase "in the { table } table" within your descriptions.
- Ensure that the documentation adheres to standard schema formatting without modifying the underlying schema structure.
- Make sure that the entities themselves are documented.

### Formatting:
- Maintain consistency with the existing documentation style and structure.
Expand Down
15 changes: 7 additions & 8 deletions graphdoc/tests/test_confest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Copyright 2025-, Semiotic AI, Inc.
# SPDX-License-Identifier: Apache-2.0

# system packages
import logging

# external packages
from dotenv import load_dotenv

# internal packages
from graphdoc import (
DocGeneratorPrompt,
Expand All @@ -17,13 +21,12 @@
OverwriteSchemaRating,
)

# system packages

# external packages

# logging
log = logging.getLogger(__name__)

# load the environment variables
load_dotenv("../.env")


class TestFixtures:
def test_parser(self, par: Parser):
Expand All @@ -44,10 +47,6 @@ def test_overwrite_ldh(self, overwrite_ldh: LocalDataHelper):
== OverwriteSchemaCategoryRatingMapping.get_rating
)

# def test_gd(self, gd: GraphDoc):
# assert gd is not None
# assert isinstance(gd, GraphDoc)

def test_dqp(self, dqp):
assert isinstance(dqp, DocQualityPrompt)
assert dqp.prompt_type == "predict"
Expand Down