Skip to content

Commit 75a449f

Browse files
Merge pull request #21 from semiotic-ai/random
Random
2 parents 2625ec1 + 8b64ac5 commit 75a449f

15 files changed

+21
-17
lines changed

graphdoc/assets/configs/single_prompt_doc_generator_module.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ data:
2525
trainset_size: 10 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: generation # Type of data helper to use (quality, generation)
28-
28+
seed: 42 # The seed for the random number generator
2929
prompt:
3030
prompt: base_doc_gen # Which prompt signature to use
3131
class: DocGeneratorPrompt # Must be a child of SinglePrompt (we will use an enum to map this)

graphdoc/assets/configs/single_prompt_doc_generator_module_eval.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ data:
2525
trainset_size: 10 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: generation # Type of data helper to use (quality, generation)
28+
seed: 42 # The seed for the random number generator
2829

2930
prompt:
3031
prompt: base_doc_gen # Which prompt signature to use

graphdoc/assets/configs/single_prompt_doc_generator_trainer.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ data:
2525
trainset_size: 10 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: generation # Type of data helper to use (quality, generation)
28-
28+
seed: 42 # The seed for the random number generator
2929
prompt:
3030
prompt: base_doc_gen # Which prompt signature to use
3131
class: DocGeneratorPrompt # Must be a child of SinglePrompt (we will use an enum to map this)

graphdoc/assets/configs/single_prompt_doc_quality_trainer.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ data:
2525
trainset_size: 10 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: quality # Type of data helper to use (quality, generation)
28-
28+
seed: 42 # The seed for the random number generator
2929
prompt:
3030
prompt: doc_quality # Which prompt signature to use
3131
class: DocQualityPrompt # Must be a child of SinglePrompt (we will use an enum to map this)

graphdoc/graphdoc/config.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@
2929
# logging
3030
log = logging.getLogger(__name__)
3131

32-
# global variables
33-
random.seed(42)
34-
35-
3632
#######################
3733
# Resource Setup #
3834
#######################
@@ -160,7 +156,9 @@ def trainset_from_yaml(yaml_path: Union[str, Path]) -> List[dspy.Example]:
160156

161157

162158
def split_trainset(
163-
trainset: List[dspy.Example], evalset_ratio: float
159+
trainset: List[dspy.Example],
160+
evalset_ratio: float,
161+
seed: int = 42,
164162
) -> tuple[List[dspy.Example], List[dspy.Example]]:
165163
"""Split a trainset into a trainset and evalset.
166164
@@ -170,6 +168,7 @@ def split_trainset(
170168
tuple[List[dspy.Example], List[dspy.Example]]
171169
172170
"""
171+
random.seed(seed)
173172
split_idx = int(len(trainset) * (1 - evalset_ratio))
174173
random.shuffle(trainset)
175174
evalset = trainset[split_idx:]
@@ -201,6 +200,7 @@ def trainset_and_evalset_from_yaml(
201200
evalset_ratio: 0.1, # The proportionate size of evalset
202201
data_helper_type: quality # Type of data helper to use
203202
# (quality, generation)
203+
seed: 42 # The seed for the random number generator
204204
205205
:param yaml_path: Path to the YAML file.
206206
:type yaml_path: Union[str, Path]
@@ -210,7 +210,9 @@ def trainset_and_evalset_from_yaml(
210210
"""
211211
config = load_yaml_config(yaml_path)
212212
trainset = trainset_from_dict(config["data"])
213-
return split_trainset(trainset, config["data"]["evalset_ratio"])
213+
return split_trainset(
214+
trainset, config["data"]["evalset_ratio"], config["data"]["seed"]
215+
)
214216

215217

216218
#######################

graphdoc/graphdoc/main.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import argparse
55
import logging
6-
import random
76

87
# system packages
98
import sys
@@ -20,9 +19,6 @@
2019
# logging
2120
log = logging.getLogger(__name__)
2221

23-
# global variables
24-
random.seed(42)
25-
2622
#######################
2723
# Main Entry Point #
2824
#######################

graphdoc/tests/assets/configs/single_prompt_doc_generator_module.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ data:
2525
trainset_size: 10 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: generation # Type of data helper to use (quality, generation)
28-
28+
seed: 42 # The seed for the random number generator
2929
prompt:
3030
prompt: base_doc_gen # Which prompt signature to use
3131
class: DocGeneratorPrompt # Must be a child of SinglePrompt (we will use an enum to map this)

graphdoc/tests/assets/configs/single_prompt_doc_generator_module_eval.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ data:
2525
trainset_size: 10 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: generation # Type of data helper to use (quality, generation)
28-
28+
seed: 42 # The seed for the random number generator
2929
prompt:
3030
prompt: base_doc_gen # Which prompt signature to use
3131
class: DocGeneratorPrompt # Must be a child of SinglePrompt (we will use an enum to map this)

graphdoc/tests/assets/configs/single_prompt_doc_generator_trainer.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ data:
2525
trainset_size: 1000 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: generation # Type of data helper to use (quality, generation)
28-
28+
seed: 42 # The seed for the random number generator
2929
prompt:
3030
prompt: base_doc_gen # Which prompt signature to use
3131
class: DocGeneratorPrompt # Must be a child of SinglePrompt (we will use an enum to map this)

graphdoc/tests/assets/configs/single_prompt_doc_quality_trainer.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ data:
2525
trainset_size: 1000 # The size of the trainset
2626
evalset_ratio: 0.1 # The proportionate size of the evalset
2727
data_helper_type: quality # Type of data helper to use (quality, generation)
28+
seed: 42 # The seed for the random number generator
2829

2930
prompt:
3031
prompt: doc_quality # Which prompt signature to use

0 commit comments

Comments
 (0)