Skip to content

Commit 838534b

Browse files
authored
Merge pull request #165 from georgian-io/bumps-20240507
[Version Bumps] May 7, 2024
2 parents c8b3d0b + def3450 commit 838534b

File tree

11 files changed

+913
-930
lines changed

11 files changed

+913
-930
lines changed

llmtune/config.yml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,16 @@ lora:
3838
r: 32
3939
lora_alpha: 64
4040
lora_dropout: 0.1
41-
target_modules:
42-
- q_proj
43-
- v_proj
44-
- k_proj
45-
- o_proj
46-
- up_proj
47-
- down_proj
48-
- gate_proj
41+
target_modules: "all-linear"
42+
# to target specific modules
43+
# target_modules:
44+
# - q_proj
45+
# - v_proj
46+
# - k_proj
47+
# - o_proj
48+
# - up_proj
49+
# - down_proj
50+
# - gate_proj
4951

5052
# Training -------------------
5153
training:

llmtune/pydantic_models/config_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,9 @@ class LoraConfig(BaseModel):
125125
lora_alpha: Optional[int] = Field(16, description="The alpha parameter for Lora scaling")
126126
bias: Optional[str] = Field("none", description="Bias type for Lora. Can be 'none', 'all' or 'lora_only'")
127127
lora_dropout: Optional[float] = Field(0.1, description="The dropout probability for Lora layers")
128-
target_modules: Optional[List[str]] = Field(None, description="The names of the modules to apply Lora to")
128+
target_modules: Optional[Union[List[str], Literal["all-linear"]]] = Field(
129+
"all-linear", description="The names of the modules to apply Lora to"
130+
)
129131
fan_in_fan_out: Optional[bool] = Field(
130132
False,
131133
description="Flag to indicate if the layer to replace stores weight like (fan_in, fan_out)",

poetry.lock

Lines changed: 863 additions & 845 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ folders = [
3232

3333
[tool.poetry.dependencies]
3434
python = ">=3.9, <=3.12"
35-
transformers = "~4.37.2"
35+
transformers = "~4.40.2"
3636
datasets = "^2.17.0"
3737
peft = "^0.8.2"
3838
pandas = "^2.2.0"
@@ -44,7 +44,7 @@ einops = "^0.7.0"
4444
bitsandbytes = "^0.42.0"
4545
nltk = "^3.8.1"
4646
accelerate = "^0.27.0"
47-
trl = "~0.7.10"
47+
trl = "~0.8.6"
4848
rouge-score = "^0.1.2"
4949
absl-py = "^2.1.0"
5050
py7zr = "^0.20.8"

tests/data/test_ingestor.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from unittest.mock import MagicMock, mock_open
2+
13
import pytest
2-
from unittest.mock import patch, MagicMock, mock_open
4+
from datasets import Dataset
35

46
from llmtune.data.ingestor import (
57
CsvIngestor,
@@ -9,8 +11,6 @@
911
get_ingestor,
1012
)
1113

12-
from datasets import Dataset
13-
1414

1515
def test_get_ingestor():
1616
assert isinstance(get_ingestor("json")(""), JsonIngestor)
@@ -31,9 +31,7 @@ def test_json_ingestor_to_dataset(mocker):
3131

3232

3333
def test_jsonl_ingestor_to_dataset(mocker):
34-
mock_generator = mocker.patch(
35-
"llmtune.data.ingestor.JsonlIngestor._jsonl_generator"
36-
)
34+
mock_generator = mocker.patch("llmtune.data.ingestor.JsonlIngestor._jsonl_generator")
3735
mock_dataset = mocker.patch("llmtune.data.ingestor.Dataset")
3836
JsonlIngestor("").to_dataset()
3937

@@ -52,9 +50,7 @@ def test_huggingface_to_dataset(mocker):
5250
# Setup
5351
path = "some_path"
5452
ingestor = HuggingfaceIngestor(path)
55-
mock_concatenate_datasets = mocker.patch(
56-
"llmtune.data.ingestor.concatenate_datasets"
57-
)
53+
mock_concatenate_datasets = mocker.patch("llmtune.data.ingestor.concatenate_datasets")
5854
mock_load_dataset = mocker.patch("llmtune.data.ingestor.load_dataset")
5955
mock_dataset = mocker.patch("llmtune.data.ingestor.Dataset")
6056

@@ -108,9 +104,7 @@ def test_jsonl_ingestor_generator(file_content, expected_output, mocker):
108104
mocker.patch("builtins.open", mock_open(read_data=file_content))
109105
mocker.patch(
110106
"ijson.items",
111-
side_effect=lambda f, prefix, multiple_values: (
112-
iter(expected_output) if multiple_values else iter([])
113-
),
107+
side_effect=lambda f, prefix, multiple_values: (iter(expected_output) if multiple_values else iter([])),
114108
)
115109
ingestor = JsonlIngestor("dummy_path.jsonl")
116110

tests/finetune/test_finetune_lora.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,11 @@ def test_lora_finetune_initialization(mocker):
1818
)
1919

2020
# Initialize LoRAFinetune with the sample configuration
21-
lora_finetune = LoRAFinetune(
22-
config=get_sample_config(), directory_helper=MagicMock()
23-
)
21+
lora_finetune = LoRAFinetune(config=get_sample_config(), directory_helper=MagicMock())
2422
# Assertions to ensure that LoRAFinetune is initialized as expected
2523
mock_lora_config.assert_called_once_with(**get_sample_config().lora.model_dump())
2624

27-
assert (
28-
lora_finetune.config == get_sample_config()
29-
), "Configuration should match the input configuration"
25+
assert lora_finetune.config == get_sample_config(), "Configuration should match the input configuration"
3026

3127

3228
def test_model_and_tokenizer_loading(mocker):
@@ -37,9 +33,7 @@ def test_model_and_tokenizer_loading(mocker):
3733
"llmtune.finetune.lora.AutoModelForCausalLM.from_pretrained",
3834
return_value=MagicMock(),
3935
)
40-
mock_tokenizer = mocker.patch(
41-
"llmtune.finetune.lora.AutoTokenizer.from_pretrained", return_value=MagicMock()
42-
)
36+
mock_tokenizer = mocker.patch("llmtune.finetune.lora.AutoTokenizer.from_pretrained", return_value=MagicMock())
4337
mock_inject_lora = mocker.patch(
4438
"llmtune.finetune.lora.LoRAFinetune._inject_lora",
4539
return_value=None, # _inject_lora doesn't return a value
@@ -89,18 +83,14 @@ def test_model_finetune(mocker):
8983
"llmtune.finetune.lora.AutoModelForCausalLM.from_pretrained",
9084
return_value=MagicMock(),
9185
)
92-
mocker.patch(
93-
"llmtune.finetune.lora.AutoTokenizer.from_pretrained", return_value=MagicMock()
94-
)
86+
mocker.patch("llmtune.finetune.lora.AutoTokenizer.from_pretrained", return_value=MagicMock())
9587
mocker.patch(
9688
"llmtune.finetune.lora.LoRAFinetune._inject_lora",
9789
return_value=None, # _inject_lora doesn't return a value
9890
)
9991

10092
mock_trainer = mocker.MagicMock()
101-
mock_sft_trainer = mocker.patch(
102-
"llmtune.finetune.lora.SFTTrainer", return_value=mock_trainer
103-
)
93+
mock_sft_trainer = mocker.patch("llmtune.finetune.lora.SFTTrainer", return_value=mock_trainer)
10494

10595
directory_helper = MagicMock()
10696

@@ -144,9 +134,7 @@ def test_save_model(mocker):
144134
mocker.patch("llmtune.finetune.lora.AutoModelForCausalLM.from_pretrained")
145135

146136
mock_tok = mocker.MagicMock()
147-
mocker.patch(
148-
"llmtune.finetune.lora.AutoTokenizer.from_pretrained", return_value=mock_tok
149-
)
137+
mocker.patch("llmtune.finetune.lora.AutoTokenizer.from_pretrained", return_value=mock_tok)
150138
mocker.patch(
151139
"llmtune.finetune.lora.LoRAFinetune._inject_lora",
152140
return_value=None,

tests/inference/test_inference_lora.py

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,31 @@
1-
import pytest
21
from unittest.mock import MagicMock
2+
33
from datasets import Dataset
4+
from transformers import BitsAndBytesConfig
45

56
from llmtune.inference.lora import LoRAInference
6-
from llmtune.utils.save_utils import DirectoryHelper
77
from test_utils.test_config import get_sample_config # Adjust import path as needed
88

9-
from transformers import BitsAndBytesConfig
10-
119

1210
def test_lora_inference_initialization(mocker):
1311
# Mock dependencies
1412
mock_model = mocker.patch(
1513
"llmtune.inference.lora.AutoPeftModelForCausalLM.from_pretrained",
1614
return_value=MagicMock(),
1715
)
18-
mock_tokenizer = mocker.patch(
19-
"llmtune.inference.lora.AutoTokenizer.from_pretrained", return_value=MagicMock()
20-
)
16+
mock_tokenizer = mocker.patch("llmtune.inference.lora.AutoTokenizer.from_pretrained", return_value=MagicMock())
2117

2218
# Mock configuration and directory helper
2319
config = get_sample_config()
24-
dir_helper = MagicMock(
25-
save_paths=MagicMock(results="results_dir", weights="weights_dir")
26-
)
20+
dir_helper = MagicMock(save_paths=MagicMock(results="results_dir", weights="weights_dir"))
2721
test_dataset = Dataset.from_dict(
2822
{
2923
"formatted_prompt": ["prompt1", "prompt2"],
3024
"label_column_name": ["label1", "label2"],
3125
}
3226
)
3327

34-
inference = LoRAInference(
28+
_ = LoRAInference(
3529
test_dataset=test_dataset,
3630
label_column_name="label_column_name",
3731
config=config,
@@ -45,34 +39,24 @@ def test_lora_inference_initialization(mocker):
4539
device_map=config.model.device_map,
4640
attn_implementation=config.model.attn_implementation,
4741
)
48-
mock_tokenizer.assert_called_once_with(
49-
"weights_dir", device_map=config.model.device_map
50-
)
42+
mock_tokenizer.assert_called_once_with("weights_dir", device_map=config.model.device_map)
5143

5244

5345
def test_infer_all(mocker):
5446
mocker.patch(
5547
"llmtune.inference.lora.AutoPeftModelForCausalLM.from_pretrained",
5648
return_value=MagicMock(),
5749
)
58-
mocker.patch(
59-
"llmtune.inference.lora.AutoTokenizer.from_pretrained", return_value=MagicMock()
60-
)
50+
mocker.patch("llmtune.inference.lora.AutoTokenizer.from_pretrained", return_value=MagicMock())
6151
mocker.patch("os.makedirs")
6252
mock_open = mocker.patch("builtins.open", mocker.mock_open())
6353
mock_csv_writer = mocker.patch("csv.writer")
6454

65-
mock_infer_one = mocker.patch.object(
66-
LoRAInference, "infer_one", return_value="predicted"
67-
)
55+
mock_infer_one = mocker.patch.object(LoRAInference, "infer_one", return_value="predicted")
6856

6957
config = get_sample_config()
70-
dir_helper = MagicMock(
71-
save_paths=MagicMock(results="results_dir", weights="weights_dir")
72-
)
73-
test_dataset = Dataset.from_dict(
74-
{"formatted_prompt": ["prompt1"], "label_column_name": ["label1"]}
75-
)
58+
dir_helper = MagicMock(save_paths=MagicMock(results="results_dir", weights="weights_dir"))
59+
test_dataset = Dataset.from_dict({"formatted_prompt": ["prompt1"], "label_column_name": ["label1"]})
7660

7761
inference = LoRAInference(
7862
test_dataset=test_dataset,

tests/qa/test_generics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
2-
from unittest.mock import Mock
32
from pandas import DataFrame
4-
from llmtune.qa.generics import LLMQaTest, QaTestRegistry, LLMTestSuite
3+
4+
from llmtune.qa.generics import LLMQaTest, LLMTestSuite
55

66

77
@pytest.fixture

tests/qa/test_qa_tests.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import pytest
2+
23
from llmtune.qa.qa_tests import (
3-
QaTestRegistry,
4-
LengthTest,
5-
JaccardSimilarityTest,
4+
AdjectivePercent,
65
DotProductSimilarityTest,
6+
JaccardSimilarityTest,
7+
LengthTest,
8+
NounPercent,
79
RougeScoreTest,
8-
WordOverlapTest,
910
VerbPercent,
10-
AdjectivePercent,
11-
NounPercent,
11+
WordOverlapTest,
1212
)
1313

1414

tests/test_ablation_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import pytest
22
from pydantic import BaseModel
3+
34
from llmtune.utils.ablation_utils import (
4-
get_types_from_dict,
5+
generate_permutations,
56
get_annotation,
67
get_model_field_type,
7-
validate_and_get_ablations,
8+
get_types_from_dict,
89
patch_with_permutation,
9-
generate_permutations,
1010
)
1111

1212

tests/test_cli.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
1-
import pytest
2-
from unittest.mock import MagicMock, patch
3-
from pathlib import Path
1+
from unittest.mock import patch
42

5-
from pydantic import ValidationError
63
from typer.testing import CliRunner
74

8-
from llmtune.cli.toolkit import app, cli, run_one_experiment
9-
from llmtune.pydantic_models.config_model import Config
5+
from llmtune.cli.toolkit import app, cli
106

11-
from test_utils.test_config import get_sample_config
127

138
runner = CliRunner()
149

0 commit comments

Comments
 (0)