Skip to content

Commit 79a7752

Browse files
Merge pull request #19 from semiotic-ai/main-run
main run
2 parents 4e69c7c + 9092842 commit 79a7752

File tree

8 files changed

+192
-44
lines changed

8 files changed

+192
-44
lines changed

graphdoc/graphdoc/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
setup_logging,
2323
)
2424
from graphdoc.eval import DocGeneratorEvaluator
25-
from graphdoc.main import GraphDoc
2625
from graphdoc.modules import DocGeneratorModule
2726
from graphdoc.prompts import (
2827
BadDocGeneratorSignature,
@@ -45,7 +44,6 @@
4544
)
4645

4746
__all__ = [
48-
"GraphDoc",
4947
"DocGeneratorModule",
5048
"DocGeneratorEvaluator",
5149
"DocGeneratorTrainer",

graphdoc/graphdoc/main.py

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
# Copyright 2025-, Semiotic AI, Inc.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
# system packages
4+
import argparse
55
import logging
66
import random
7+
8+
# system packages
9+
import sys
710
from pathlib import Path
811
from typing import List, Literal, Optional, Union
912

@@ -633,3 +636,147 @@ def doc_generator_eval_from_yaml(
633636
evaluator_prediction_field=evaluator_prediction_field,
634637
readable_value=readable_value,
635638
)
639+
640+
641+
#######################
642+
# Main Entry Point #
643+
#######################
644+
"""Run GraphDoc as a command-line application.
645+
646+
This module can be run directly to train models, generate documentation,
647+
or evaluate documentation quality.
648+
649+
Usage:
650+
python -m graphdoc.main --config CONFIG_FILE [--log-level LEVEL] COMMAND [ARGS]
651+
652+
Global Arguments:
653+
--config PATH Path to YAML configuration file with GraphDoc
654+
and language model settings
655+
--log-level LEVEL Set logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
656+
Default: INFO
657+
658+
Commands:
659+
train Train a prompt using a dataset
660+
--trainer-config PATH Path to trainer YAML configuration
661+
662+
generate Generate documentation for schema files
663+
--module-config PATH Path to module YAML configuration
664+
--input PATH Path to input schema file or directory
665+
--output PATH Path to output file
666+
667+
evaluate Evaluate documentation quality
668+
--eval-config PATH Path to evaluator YAML configuration
669+
670+
Examples:
671+
# Train a documentation quality model
672+
python -m graphdoc.main \
673+
--config config.yaml \
674+
train \
675+
--trainer-config trainer_config.yaml
676+
677+
# Generate documentation for schemas
678+
python -m graphdoc.main \
679+
--config config.yaml \
680+
generate \
681+
--module-config module_config.yaml \
682+
--input schema.graphql \
683+
--output documented_schema.graphql
684+
685+
# Evaluate documentation quality
686+
python -m graphdoc.main \
687+
--config config.yaml \
688+
evaluate \
689+
--eval-config eval_config.yaml
690+
691+
Configuration:
692+
See example YAML files in the documentation for format details.
693+
""" # noqa: B950
694+
if __name__ == "__main__":
695+
696+
parser = argparse.ArgumentParser(description="GraphDoc - Documentation Generator")
697+
parser.add_argument("--config", type=str, help="Path to YAML configuration file")
698+
parser.add_argument(
699+
"--log-level",
700+
type=str,
701+
default="INFO",
702+
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
703+
help="Logging level",
704+
)
705+
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
706+
707+
###################
708+
# train #
709+
###################
710+
train_parser = subparsers.add_parser("train", help="Train a prompt")
711+
train_parser.add_argument(
712+
"--trainer-config",
713+
type=str,
714+
required=True,
715+
help="Path to trainer YAML configuration",
716+
)
717+
718+
###################
719+
# generate #
720+
###################
721+
generate_parser = subparsers.add_parser("generate", help="Generate documentation")
722+
generate_parser.add_argument(
723+
"--module-config",
724+
type=str,
725+
required=True,
726+
help="Path to module YAML configuration",
727+
)
728+
generate_parser.add_argument(
729+
"--input", type=str, required=True, help="Path to input schema file"
730+
)
731+
generate_parser.add_argument(
732+
"--output", type=str, required=True, help="Path to output schema file"
733+
)
734+
735+
###################
736+
# evaluate #
737+
###################
738+
eval_parser = subparsers.add_parser(
739+
"evaluate", help="Evaluate documentation quality"
740+
)
741+
eval_parser.add_argument(
742+
"--eval-config",
743+
type=str,
744+
required=True,
745+
help="Path to evaluator YAML configuration",
746+
)
747+
748+
args = parser.parse_args()
749+
if not args.config:
750+
parser.print_help()
751+
sys.exit(1)
752+
753+
graphdoc = GraphDoc.from_yaml(args.config)
754+
755+
if args.command == "train":
756+
trainer = graphdoc.single_trainer_from_yaml(args.trainer_config)
757+
trained_prompt = trainer.train()
758+
print(
759+
f"Training complete. Saved to MLflow with name: {trainer.mlflow_model_name}"
760+
)
761+
762+
elif args.command == "generate":
763+
module = graphdoc.doc_generator_module_from_yaml(args.module_config)
764+
765+
with open(args.input, "r") as f:
766+
schema = f.read()
767+
768+
documented_schema = module.document_full_schema(schema)
769+
770+
with open(args.output, "w") as f:
771+
f.write(documented_schema.documented_schema)
772+
print(f"Generation complete. Documentation saved to {args.output}")
773+
774+
elif args.command == "evaluate":
775+
evaluator = graphdoc.doc_generator_eval_from_yaml(args.eval_config)
776+
results = evaluator.evaluate()
777+
print(
778+
"Evaluation complete. Results saved to MLflow experiment: "
779+
f"{evaluator.mlflow_experiment_name}"
780+
)
781+
else:
782+
parser.print_help()

graphdoc/graphdoc/modules/doc_generator_module.py

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,45 @@ def __init__(
6464
)
6565
self.prompt.prompt_metric.prompt_metric = "rating"
6666

67+
#######################
68+
# MLFLOW TRACING #
69+
#######################
70+
# TODO: we will break this out into a separate class later
71+
# when we have need for it elsewhere
72+
def _start_trace(
73+
self,
74+
client: mlflow.MlflowClient,
75+
expirement_name: str,
76+
trace_name: str,
77+
inputs: dict,
78+
attributes: dict,
79+
):
80+
# set the experiment name so that everything is logged to the same experiment
81+
mlflow.set_experiment(expirement_name)
82+
83+
# start the trace
84+
trace = client.start_trace(
85+
name=trace_name,
86+
inputs=inputs,
87+
attributes=attributes,
88+
# experiment_id=expirement_name,
89+
)
90+
91+
return trace
92+
93+
def _end_trace(
94+
self,
95+
client: mlflow.MlflowClient,
96+
trace: Any, # TODO: trace: mlflow.Span,
97+
# E AttributeError: module 'mlflow' has no attribute 'Span'
98+
outputs: dict,
99+
status: Literal["OK", "ERROR"],
100+
):
101+
client.end_trace(request_id=trace.request_id, outputs=outputs, status=status)
102+
103+
#######################
104+
# MODULE FUNCTIONS #
105+
#######################
67106
def _retry_by_rating(self, database_schema: str) -> str:
68107
"""Retry the generation if the quality check fails. Rating threshold is
69108
determined at initialization.
@@ -211,42 +250,6 @@ def forward(self, database_schema: str) -> dspy.Prediction:
211250
else:
212251
return self._predict(database_schema=database_schema)
213252

214-
#######################
215-
# MLFLOW TRACING #
216-
#######################
217-
# TODO: we will break this out into a separate class later
218-
# when we have need for it elsewhere
219-
def _start_trace(
220-
self,
221-
client: mlflow.MlflowClient,
222-
expirement_name: str,
223-
trace_name: str,
224-
inputs: dict,
225-
attributes: dict,
226-
):
227-
# set the experiment name so that everything is logged to the same experiment
228-
mlflow.set_experiment(expirement_name)
229-
230-
# start the trace
231-
trace = client.start_trace(
232-
name=trace_name,
233-
inputs=inputs,
234-
attributes=attributes,
235-
# experiment_id=expirement_name,
236-
)
237-
238-
return trace
239-
240-
def _end_trace(
241-
self,
242-
client: mlflow.MlflowClient,
243-
trace: Any, # TODO: trace: mlflow.Span,
244-
# E AttributeError: module 'mlflow' has no attribute 'Span'
245-
outputs: dict,
246-
status: Literal["OK", "ERROR"],
247-
):
248-
client.end_trace(request_id=trace.request_id, outputs=outputs, status=status)
249-
250253
def document_full_schema(
251254
self,
252255
database_schema: str,

graphdoc/runners/eval/eval_doc_generator_module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from dotenv import load_dotenv
1212

1313
# internal packages
14-
from graphdoc import GraphDoc
14+
from graphdoc.main import GraphDoc
1515

1616
# logging
1717
log = logging.getLogger(__name__)

graphdoc/runners/train/single_prompt_trainer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import mlflow
1515
from dotenv import load_dotenv
1616

17-
from graphdoc import GraphDoc, load_yaml_config
17+
from graphdoc.main import GraphDoc, load_yaml_config
1818

1919
# logging
2020
log = logging.getLogger(__name__)

graphdoc/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
from graphdoc import (
1818
DocGeneratorPrompt,
1919
DocQualityPrompt,
20-
GraphDoc,
2120
LocalDataHelper,
2221
Parser,
2322
)
23+
from graphdoc.main import GraphDoc
2424

2525
# logging
2626
log = logging.getLogger(__name__)

graphdoc/tests/test_confest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
from graphdoc import (
88
DocGeneratorPrompt,
99
DocQualityPrompt,
10-
GraphDoc,
1110
LocalDataHelper,
1211
Parser,
1312
)
13+
from graphdoc.main import GraphDoc
1414

1515
from .conftest import (
1616
OverwriteSchemaCategory,

graphdoc/tests/test_graphdoc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
DocGeneratorTrainer,
1717
DocQualityPrompt,
1818
DocQualityTrainer,
19-
GraphDoc,
2019
SinglePromptTrainer,
2120
load_yaml_config,
2221
)
22+
from graphdoc.main import GraphDoc
2323

2424
# logging
2525
log = logging.getLogger(__name__)

0 commit comments

Comments
 (0)