Skip to content

Commit 54138d4

Browse files
authored
Merge pull request #1020 from llmware-ai/update-onnx-ov-100424
adding new model classes
2 parents f13283f + 4514cfd commit 54138d4

File tree

4 files changed

+2219
-136
lines changed

4 files changed

+2219
-136
lines changed

llmware/configs.py

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023-2024 llmware
1+
# Copyright 2023 llmware
22

33
# Licensed under the Apache License, Version 2.0 (the "License"); you
44
# may not use this file except in compliance with the License. You
@@ -502,6 +502,66 @@ def add_vector_db(cls, db_name, vector_db_class, module="llmware.embeddings"):
502502
logging.basicConfig(format=LLMWareConfig().get_logging_format(), level=LLMWareConfig().get_logging_level())
503503

504504

505+
class OVConfig:
506+
507+
""" Configuration object for OpenVino - these parameters are consumed by the
508+
OVGenerativeModel class in module llmware.models. In most cases, the parameters
509+
do not require attention, but provided for more options for performance tuning with
510+
GPU deployment in particular. """
511+
512+
_conf = {"device": "GPU",
513+
"use_ov_tokenizer": False,
514+
"generation_version": "ov_genai_pip",
515+
"use_gpu_if_available": True,
516+
"cache": True,
517+
"cache_with_model": True,
518+
"cache_custom_path": "",
519+
"apply_performance_hints": True,
520+
"verbose_mode": False,
521+
"get_token_counts": True
522+
}
523+
524+
_supported_hints = ["MODEL_PRIORITY", "GPU_HOST_TASK_PRIORITY",
525+
"GPU_QUEUE_THROTTLE", "GPU_QUEUE_PRIORITY"]
526+
527+
# this is a subset of useful GPU performance hints - will expand options over time
528+
529+
_gpu_hints = {
530+
"MODEL_PRIORITY": "HIGH",
531+
"GPU_HOST_TASK_PRIORITY": "HIGH",
532+
"GPU_QUEUE_THROTTLE": "HIGH",
533+
"GPU_QUEUE_PRIORITY": "HIGH"
534+
}
535+
536+
@classmethod
537+
def get_config(cls, param):
538+
return cls._conf[param]
539+
540+
@classmethod
541+
def set_config(cls, param, value):
542+
cls._conf[param]= value
543+
544+
@classmethod
545+
def get_gpu_hints(cls):
546+
return cls._gpu_hints
547+
548+
@classmethod
549+
def set_gpu_hint(cls, param, value):
550+
551+
# will add safety checks for type - most in form of "HIGH" | "MEDIUM" | "LOW"
552+
# for more information, please see OpenVino documentation
553+
if param in cls._supported_hints:
554+
cls._gpu_hints[param] = value
555+
556+
@classmethod
557+
def optimize_for_gpu(cls):
558+
return cls._conf["use_gpu_if_available"]
559+
560+
@classmethod
561+
def generation_version(cls):
562+
return cls._conf["generation_version"]
563+
564+
505565
class MilvusConfig:
506566

507567
"""Configuration object for Milvus"""
@@ -598,6 +658,7 @@ def get_uri_string(cls):
598658

599659
# canonical simple format of postgres uri string
600660
input_collection_db_path = f"postgresql://postgres@{host}:{port}/{db_name}"
661+
# print("update: postgres get_uri_string - ", input_collection_db_path)
601662

602663
return input_collection_db_path
603664

@@ -658,7 +719,6 @@ def get_config(cls, name):
658719
def set_config(cls, name, value):
659720
cls._conf[name] = value
660721

661-
662722
class LanceDBConfig:
663723

664724
_conf = {'uri': '/tmp/lancedb/'}
@@ -702,14 +762,13 @@ def get_uri_string (cls):
702762
db_file = os.path.join(cls._conf["sqlite_db_folder_path"], cls._conf["db_name"])
703763
return db_file
704764

765+
# new method for SQLTables DB
705766
@classmethod
706767
def get_uri_string_experimental_db(cls):
707768
"""For SQLite the URI string is the local file with full absolute path"""
708-
709-
# used in SQLTables DB in llmware.agents module
710-
711769
db_file = os.path.join(cls._conf["sqlite_db_folder_path"], cls._conf["db_experimental"])
712770
return db_file
771+
# end method
713772

714773
@classmethod
715774
def get_db_configs(cls):
@@ -782,8 +841,6 @@ def set_config(cls, name, value):
782841

783842
class LLMWareTableSchema:
784843

785-
""" Table Schema used for Parsing, Library Cards and other llmware modules. """
786-
787844
# notes:
788845
# 1. bigserial type for Postgres
789846
# 2. "text" and "table" replaced with "text_block" and "table_block" in SQL DB for safety / reserved
@@ -985,7 +1042,6 @@ class ChromaDBConfig:
9851042
# update - v0.2.12 -> by default, persistent path set to make chroma persistent.
9861043
# If this is None, then an in-memory only chroma instance will be created.
9871044
#
988-
9891045
'persistent_path': LLMWareConfig().get_library_path(),
9901046

9911047
#

llmware/gguf_configs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,11 @@ class GGUFConfigs:
892892
"force_gpu": False,
893893
"use_macos_accelerate": True,
894894

895+
# option to capture and provide the 'first token' of generation
896+
# used for GGUF - and implemented for HFGenerative (Pytorch) and
897+
# ONNXGenerative classes as well
898+
"get_first_token_speed": False,
899+
895900
# prebuilt shared libraries included in llmware
896901
"windows": "libllama_win.dll",
897902
"windows_cuda": "libllama_win_cuda.dll",

llmware/model_configs.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
"""Global Default Configs for Models, Finetune Wrappers and Prompt Instructions Catalog.
1717
18-
These configs generally do not need to be accessed directly, but can be viewed, accessed and modified through
18+
These configs generally do not need to be accessed directly, but should be viewed, accessed and modified through
1919
ModelCatalog and PromptCatalog classes.
2020
2121
For customization, there is also the option in ModelCatalog to load a custom model catalog from json file, which
@@ -24,7 +24,77 @@
2424

2525
global_model_repo_catalog_list = [
2626

27-
# embedding models
27+
{"model_name": "bling-tiny-llama-onnx", "model_family": "ONNXGenerativeModel",
28+
"model_category": "generative_local", "display_name": "llmware/bling-tiny-llama-onnx",
29+
"model_location": "llmware_repo","context_window": 2048, "instruction_following": False,
30+
"prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False, "trailing_space": "",
31+
"hf_repo": "llmware/bling-tiny-llama-onnx", "custom_model_files": [], "custom_model_repo": "",
32+
"snapshot": True, "tokenizer_local": "tokenizer_tl.json",
33+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
34+
"validation_files": ["model.onnx", "model.onnx.data"],
35+
"link": "https://huggingface.co/llmware/bling-tiny-llama-onnx"},
36+
37+
{"model_name": "bling-tiny-llama-ov", "model_family": "OVGenerativeModel",
38+
"model_category": "generative_local", "display_name": "bling-tiny-llama-ov",
39+
"model_location": "llmware_repo",
40+
"context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot",
41+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
42+
"tokenizer_local": "tokenizer_tl.json",
43+
"hf_repo": "llmware/bling-tiny-llama-ov",
44+
"custom_model_files": [], "custom_model_repo": "",
45+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
46+
"validation_files": ["openvino_model.xml"],
47+
"link": "https://huggingface.co/llmware/bling-tiny-llama-ov"},
48+
49+
{"model_name": "bling-phi-3-ov", "model_family": "OVGenerativeModel",
50+
"model_category": "generative_local", "display_name": "bling-phi-3-ov",
51+
"model_location": "llmware_repo",
52+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
53+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
54+
"tokenizer_local": "tokenizer_phi3.json",
55+
"hf_repo": "llmware/bling-phi-3-ov",
56+
"custom_model_files": [], "custom_model_repo": "",
57+
"fetch": {"snapshot": True, "module": "llmware.models","method": "pull_snapshot_from_hf"},
58+
"validation_files": ["openvino_model.xml"],
59+
"link": "https://huggingface.co/llmware/bling-phi-3-ov"},
60+
61+
{"model_name": "bling-phi-3-onnx", "model_family": "ONNXGenerativeModel",
62+
"model_category": "generative_local", "display_name": "bling-phi-3-onnx",
63+
"model_location": "llmware_repo",
64+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
65+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
66+
"tokenizer_local": "tokenizer_phi3.json",
67+
"hf_repo": "llmware/bling-phi-3-onnx",
68+
"custom_model_files": [], "custom_model_repo": "",
69+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
70+
"validation_files": ["model.onnx", "model.onnx.data"],
71+
"link": "https://huggingface.co/llmware/bling-phi-3-onnx"},
72+
73+
{"model_name": "phi-3-onnx", "model_family": "ONNXGenerativeModel",
74+
"model_category": "generative_local", "display_name": "phi-3-onnx",
75+
"model_location": "llmware_repo",
76+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
77+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
78+
"tokenizer_local": "tokenizer_phi3.json",
79+
"hf_repo": "llmware/phi-3-onnx",
80+
"custom_model_files": [], "custom_model_repo": "",
81+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
82+
"validation_files": ["model.onnx", "model.onnx.data"],
83+
"link": "https://huggingface.co/llmware/phi-3-onnx"},
84+
85+
{"model_name": "phi-3-ov", "model_family": "OVGenerativeModel",
86+
"model_category": "generative_local", "display_name": "phi-3-ov",
87+
"model_location": "llmware_repo",
88+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "human_bot",
89+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
90+
"tokenizer_local": "tokenizer_phi3.json",
91+
"hf_repo": "llmware/phi-3-ov",
92+
"custom_model_files": [], "custom_model_repo": "",
93+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
94+
"validation_files": ["openvino_model.xml"],
95+
"link": "https://huggingface.co/llmware/phi-3-ov"},
96+
97+
# embedding models
2898

2999
{"model_name": "all-MiniLM-L6-v2", "display_name": "mini-lm-sbert", "model_family": "HFEmbeddingModel",
30100
"model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 384, "context_window": 512,

0 commit comments

Comments
 (0)