Skip to content

Commit e9f9e2f

Browse files
Merge branch 'llmware-ai:main' into patch-2
2 parents 5e0cf39 + b8c1f09 commit e9f9e2f

13 files changed

+3193
-143
lines changed

README.md

+11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
[![discord](https://img.shields.io/badge/Chat%20on-Discord-blue?logo=discord&logoColor=white)](https://discord.gg/MhZn5Nc39h)
55
[![Documentation](https://github.com/llmware-ai/llmware/actions/workflows/pages.yml/badge.svg)](https://github.com/llmware-ai/llmware/actions/workflows/pages.yml)
66

7+
![DevFest GIF](https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExc3dodTV4czFsd2lrYWV5N3BhaXV5MXpucDhrcWZ2ODF4amM2aXo3diZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/Bkax2GRzAt0PDHcmSq/giphy.gif)
8+
9+
10+
**Selected Winners will Win $25 in GitHub Sponsorship Prize!**
11+
712
## 🧰🛠️🔩Building Enterprise RAG Pipelines with Small, Specialized Models
813

914
`llmware` provides a unified framework for building LLM-based applications (e.g, RAG, Agents), using small, specialized models that can be deployed privately, integrated with enterprise knowledge sources safely and securely, and cost-effectively tuned and adapted for any business process.
@@ -819,6 +824,12 @@ Questions and discussions are welcome in our [github discussions](https://github
819824

820825
See also [additional deployment/install release notes in wheel_archives](https://github.com/llmware-ai/llmware/tree/main/wheel_archives)
821826

827+
**Sunday, October 6 - v0.3.7**
828+
- Added new model class - OVGenerativeModel - to support the use of models packaged in OpenVino format
829+
- Added new model class - ONNXGenerativeModel - to support use of models packaged in ONNX format
830+
- Getting started with [OpenVino example](https://github.com/llmware-ai/llmware/blob/main/examples/Models/using_openvino_models.py)
831+
- Getting started with [ONNX example](https://github.com/llmware-ai/llmware/blob/main/examples/Models/using_onnx_models.py)
832+
822833
**Tuesday, October 1 - v0.3.6**
823834
- Added new prompt chat templates
824835
- Improved and updated model configurations
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
2+
""" This example shows how to add a custom or private OpenVino or ONNX model to the llmware model catalog.
3+
4+
Over the next few releases, we will be expanding the default ModelCatalog considerably, but for the time
5+
being, please feel free to follow the steps below to build your own custom catalog.
6+
7+
We show below templates for the model card dictionaries - most of which is fairly easy to build for a given
8+
model.
9+
10+
We highlight both the main step - which is a simple one-liner to register the model, and then provide
11+
more details on three potential troubleshooting items:
12+
13+
1 - using a model from a custom/private path - and 'inserting' directly into the model_repo lookup
14+
2 - identifying the prompt wrapper template
15+
3 - customizing a new prompt wrapper
16+
17+
"""
18+
19+
from llmware.models import ModelCatalog
20+
from llmware.prompts import Prompt
21+
from llmware.configs import LLMWareConfig
22+
23+
# Create model card and register in the ModelCatalog
24+
25+
""" Sample OpenVino Model Card template
26+
27+
model_card_dict = {"model_name": "phi-3-ov", "model_family": "OVGenerativeModel",
28+
"model_category": "generative_local", "display_name": "phi-3-ov",
29+
"model_location": "llmware_repo",
30+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3",
31+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
32+
"tokenizer_local": "tokenizer_phi3.json",
33+
"hf_repo": "llmware/phi-3-ov",
34+
"custom_model_files": [], "custom_model_repo": "",
35+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
36+
"validation_files": ["openvino_model.xml"],
37+
"link": "https://huggingface.co/llmware/phi-3-ov"},
38+
"""
39+
40+
""" Sample ONNX Model Card template
41+
42+
model_card_dict = {"model_name": "phi-3-onnx", "model_family": "ONNXGenerativeModel",
43+
"model_category": "generative_local", "display_name": "phi-3-onnx",
44+
"model_location": "llmware_repo",
45+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3",
46+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
47+
"tokenizer_local": "tokenizer_phi3.json",
48+
"hf_repo": "llmware/phi-3-onnx",
49+
"custom_model_files": [], "custom_model_repo": "",
50+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
51+
"validation_files": ["model.onnx", "model.onnx.data"],
52+
"link": "https://huggingface.co/llmware/phi-3-onnx"},
53+
"""
54+
55+
# create the model card dictionary manually using the templates above as guides, e.g.,
56+
model_card_dict = {"model_name": "my_model", "insert other params from above...": []}
57+
58+
# this is the key step - registering the model card - add as a first line in any script/example
59+
ModelCatalog().register_new_model_card(model_card_dict)
60+
61+
# once the model is registered in the catalog, it can then be accessed anytime by name, e.g.,
62+
model = ModelCatalog().load_model("my_model")
63+
response = model.inference("What is ...")
64+
65+
# or if using in conjunction with building a RAG prompt
66+
prompter = Prompt().load_model("my_model")
67+
68+
""" Issue # 1 - Models in local/custom path
69+
70+
If you have the model in a local/custom path, then the easiest thing to do is to copy/move manually to
71+
/llmware_data/model_repo/{{my_model_name}}/ and place the model components in this path.
72+
"""
73+
74+
# lookup model repo path
75+
model_path = LLMWareConfig().get_model_repo_path()
76+
print("local model path: ", model_path)
77+
78+
# You can manually put the model components in a folder called "model_name" at the model repo path, and
79+
# 'lookups' will all work.
80+
81+
""" Issue # 2 - How do I figure out the prompt template?
82+
83+
Below is a list of the prompt wrapper lookups that covers most of the common models:
84+
85+
# standard used in most llmware models - bling, dragon and slim
86+
"human_bot": {"main_start": "<human>: ", "main_stop": "\n", "start_llm_response": "<bot>:"},
87+
88+
# commonly used by llama2 and mistral
89+
"<INST>": {"main_start": "<INST>", "main_stop": "</INST>", "start_llm_response": ""},
90+
91+
"hf_chat": {"system_start": "<|im_start|>system\n", "system_stop": "<|im_end|>\n",
92+
"main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n",
93+
"start_llm_response": "<|im_start|>assistant"},
94+
95+
"open_chat": {"main_start": "GPT4 User: ", "main_stop": "<|endofturn|>",
96+
"start_llm_response": "GPT4 Assistant:"},
97+
98+
"alpaca": {"main_start": "### Instruction: ", "main_stop": "\n",
99+
"start_llm_response": "### Response: "},
100+
101+
"chat_ml": {"system_start": "<|im_start|>system", "system_stop": "<|im_end|>\n",
102+
"main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n",
103+
"start_llm_response": "<|im_start|>assistant"},
104+
105+
"phi_3": {"system_start": "<|system|>\n", "system_stop": "<|end|>\n",
106+
"main_start": "<|user|>\n", "main_stop": "<|end|>\n", "start_llm_response": "<|assistant|>"},
107+
108+
"llama_3_chat": {"system_start": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
109+
"system_stop": "<|eot_id|>",
110+
"main_start": "<|start_header_id|>user>|end_header_id|>\n",
111+
"main_stop": "<|eot_id|>",
112+
"start_llm_response": "<|start_header_id|>assistant<|end_header_id|>\n"},
113+
114+
"tiny_llama_chat": {"system_start": "<|system|>", "system_stop": "</s>",
115+
"main_start": "<|user|>", "main_stop": "</s>",
116+
"start_llm_response": "<|assistant|>"},
117+
118+
"stablelm_zephyr_chat": {"system_start": "", "system_stop": "",
119+
"main_start": "<|user|>", "main_stop": "<|endoftext|>\n",
120+
"start_llm_response": "<|assistant|>"},
121+
122+
"google_gemma_chat": {"system_start": "", "system_stop": "",
123+
"main_start": "<bos><start_of_turn>user\n",
124+
"main_stop": "<end_of_turn>\n",
125+
"start_llm_response": "<start_of_turn>model"},
126+
127+
"vicuna_chat": {"system_start": "", "system_stop": "",
128+
"main_start": "USER: ", "main_stop": "",
129+
"start_llm_response": " ASSISTANT:"}
130+
131+
"""
132+
133+
# if none of these templates work, then you can also register a new prompt template
134+
ModelCatalog().register_new_finetune_wrapper("my_new_template",
135+
main_start="<user starts here>",
136+
main_stop="<user ends here>",
137+
llm_start="<model starts here>",
138+
system_start="<you are useful assistant...",
139+
system_stop="<end system stuff>"
140+
)
141+
142+
# once registered, this new prompt wrapper can also be invoked directly by "my_new_template", and it will be
143+
# picked up in the lookup at the time of instantiating the model
144+

examples/Models/using_onnx_models.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
2+
""" Starting with llmware 0.3.7, we have integrated support for ONNX Runtime Generative models.
3+
4+
To get started:
5+
6+
`pip install onnxruntime_genai`
7+
8+
Please note that onnxruntime_genai is supported on a wide range of Windows, Linux and x86 platforms, but
9+
does not build for Mac Metal - so it will not work on Macs.
10+
11+
"""
12+
13+
from llmware.models import ModelCatalog
14+
15+
from importlib import util
16+
if not util.find_spec("onnxruntime_genai"):
17+
print("\nto run this example, you need to install onnxruntime_genai first, e.g., pip3 install onnxruntime_genai")
18+
19+
# we will be adding more ONNX models to the default catalog, but we currently support:
20+
# -- bling-tiny-llama-onnx
21+
# -- bling-phi-3-onnx
22+
# -- phi-3-onnx
23+
24+
# please see the example 'adding_openvino_or_onnx_model.py' to add your own ONNX and OpenVino models
25+
26+
27+
def getting_started():
28+
29+
""" Simple 'hello world' example. """
30+
31+
model = ModelCatalog().load_model("bling-tiny-llama-onnx", temperature=0.0, sample=False,
32+
max_output=100)
33+
34+
query= "What was Microsoft's revenue in the 3rd quarter?"
35+
36+
context = ("Microsoft Cloud Strength Drives Third Quarter Results \nREDMOND, Wash. — April 25, 2023 — "
37+
"Microsoft Corp. today announced the following results for the quarter ended March 31, 2023,"
38+
" as compared to the corresponding period of last fiscal year:\n· Revenue was $52.9 billion"
39+
" and increased 7% (up 10% in constant currency)\n· Operating income was $22.4 billion "
40+
"and increased 10% (up 15% in constant currency)\n· Net income was $18.3 billion and "
41+
"increased 9% (up 14% in constant currency)\n· Diluted earnings per share was $2.45 "
42+
"and increased 10% (up 14% in constant currency).\n")
43+
44+
response = model.inference(query,add_context=context)
45+
46+
print(f"\ngetting_started example - query - {query}")
47+
print("getting_started example - response: ", response)
48+
49+
return response
50+
51+
52+
def streaming_example():
53+
54+
prompt = "What are the benefits of small specialized LLMs?"
55+
56+
print(f"\nstreaming_example - prompt: {prompt}")
57+
58+
# since model.stream provides a generator, then use as follows to consume the generator
59+
model = ModelCatalog().load_model("phi-3-onnx", max_output=500)
60+
text_out = ""
61+
token_count = 0
62+
63+
for streamed_token in model.stream(prompt):
64+
65+
text_out += streamed_token
66+
if text_out.strip():
67+
print(streamed_token, end="")
68+
69+
token_count += 1
70+
71+
print("total text: ", text_out)
72+
print("total tokens: ", token_count)
73+
74+
return text_out
75+
76+
77+
if __name__ == "__main__":
78+
79+
getting_started()
80+
81+
streaming_example()
82+
83+
84+
+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
2+
""" Starting with llmware 0.3.7, we have integrated support for OpenVino Generative models.
3+
4+
To get started:
5+
6+
`pip install openvino`
7+
`pip install openvino_genai`
8+
9+
Openvino is supported on a wide range of platforms (including Windows, Linux, Mac OS), and is highly
10+
optimized for Intel x86 architectures - both CPU and GPU.
11+
12+
The intent is for OpenVino models to be "drop in" replacements for Pytorch or GGUF models by simply
13+
replacing the model with the OpenVino equivalent - usually indicated by an 'ov' at the end of the model name
14+
15+
"""
16+
17+
from llmware.models import ModelCatalog
18+
19+
from importlib import util
20+
if not util.find_spec("openvino"):
21+
print("\nto run this example, you need to install openvino first, e.g., pip3 install openvino")
22+
23+
if not util.find_spec("openvino_genai"):
24+
print("\nto run this example, you need to install openvino_genai first, e.g., pip3 install openvino_genai")
25+
26+
27+
# we will be adding more OpenVino models to the default catalog, but we currently support:
28+
# -- bling-tiny-llama-ov
29+
# -- bling-phi-3-ov
30+
# -- phi-3-ov
31+
# -- qwen2.5-1.5b-ov
32+
# -- qwen2.5-3b-ov
33+
# -- qwen2.5-0.5b-ov
34+
# -- dragon-llama2-ov
35+
# -- dragon-mistral-ov
36+
# -- dragon-yi-9b-ov
37+
# -- slim-extract-tiny-ov
38+
# -- slim-extract-phi-3-ov
39+
# -- slim-sentiment-ov
40+
41+
# to add your own OpenVino models, please see the example 'adding_openvino_or_onnx_model.py'
42+
43+
44+
def getting_started():
45+
46+
model = ModelCatalog().load_model("bling-tiny-llama-ov", temperature=0.0, sample=False,
47+
max_output=100)
48+
49+
query= "What was Microsoft's revenue in the 3rd quarter?"
50+
51+
context = ("Microsoft Cloud Strength Drives Third Quarter Results \nREDMOND, Wash. — April 25, 2023 — "
52+
"Microsoft Corp. today announced the following results for the quarter ended March 31, 2023,"
53+
" as compared to the corresponding period of last fiscal year:\n· Revenue was $52.9 billion"
54+
" and increased 7% (up 10% in constant currency)\n· Operating income was $22.4 billion "
55+
"and increased 10% (up 15% in constant currency)\n· Net income was $18.3 billion and "
56+
"increased 9% (up 14% in constant currency)\n· Diluted earnings per share was $2.45 "
57+
"and increased 10% (up 14% in constant currency).\n")
58+
59+
response = model.inference(query ,add_context=context)
60+
61+
print(f"\ngetting_started example - query - {query}")
62+
print("getting_started example - response: ", response)
63+
64+
return response
65+
66+
67+
def sentiment_analysis():
68+
69+
model = ModelCatalog().load_model("slim-sentiment-ov", temperature=0.0,sample=False)
70+
71+
text = ("The poor earnings results along with the worrisome guidance on the future has dampened "
72+
"expectations and put a lot of pressure on the share price.")
73+
74+
response = model.function_call(text)
75+
76+
print(f"\nsentiment_analysis - {response}")
77+
78+
return response
79+
80+
81+
def extract_info():
82+
83+
model = ModelCatalog().load_model("slim-extract-tiny-ov", temperature=0.0, sample=False)
84+
85+
text = ("Adobe shares tumbled as much as 11% in extended trading Thursday after the design software maker "
86+
"issued strong fiscal first-quarter results but came up slightly short on quarterly revenue guidance. "
87+
"Here’s how the company did, compared with estimates from analysts polled by LSEG, formerly known as Refinitiv: "
88+
"Earnings per share: $4.48 adjusted vs. $4.38 expected Revenue: $5.18 billion vs. $5.14 billion expected "
89+
"Adobe’s revenue grew 11% year over year in the quarter, which ended March 1, according to a statement. "
90+
"Net income decreased to $620 million, or $1.36 per share, from $1.25 billion, or $2.71 per share, "
91+
"in the same quarter a year ago. During the quarter, Adobe abandoned its $20 billion acquisition of "
92+
"design software startup Figma after U.K. regulators found competitive concerns. The company paid "
93+
"Figma a $1 billion termination fee.")
94+
95+
response = model.function_call(text,function="extract", params=["termination fee"])
96+
97+
print(f"\nextract_info - {response}")
98+
99+
return response
100+
101+
102+
if __name__ == "__main__":
103+
104+
getting_started()
105+
sentiment_analysis()
106+
extract_info()
107+

0 commit comments

Comments
 (0)