|
| 1 | + |
| 2 | +""" This example shows how to add a custom or private OpenVino or ONNX model to the llmware model catalog. |
| 3 | +
|
| 4 | + Over the next few releases, we will be expanding the default ModelCatalog considerably, but for the time |
| 5 | + being, please feel free to follow the steps below to build your own custom catalog. |
| 6 | +
|
| 7 | + We show below templates for the model card dictionaries - most of which is fairly easy to build for a given |
| 8 | + model. |
| 9 | +
|
| 10 | + We highlight both the main step - which is a simple one-liner to register the model, and then provide |
| 11 | + more details on three potential troubleshooting items: |
| 12 | +
|
| 13 | + 1 - using a model from a custom/private path - and 'inserting' directly into the model_repo lookup |
| 14 | + 2 - identifying the prompt wrapper template |
| 15 | + 3 - customizing a new prompt wrapper |
| 16 | +
|
| 17 | +""" |
| 18 | + |
| 19 | +from llmware.models import ModelCatalog |
| 20 | +from llmware.prompts import Prompt |
| 21 | +from llmware.configs import LLMWareConfig |
| 22 | + |
| 23 | +# Create model card and register in the ModelCatalog |
| 24 | + |
| 25 | +""" Sample OpenVino Model Card template |
| 26 | +
|
| 27 | + model_card_dict = {"model_name": "phi-3-ov", "model_family": "OVGenerativeModel", |
| 28 | + "model_category": "generative_local", "display_name": "phi-3-ov", |
| 29 | + "model_location": "llmware_repo", |
| 30 | + "context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3", |
| 31 | + "temperature": 0.0, "sample_default": False, "trailing_space": "", |
| 32 | + "tokenizer_local": "tokenizer_phi3.json", |
| 33 | + "hf_repo": "llmware/phi-3-ov", |
| 34 | + "custom_model_files": [], "custom_model_repo": "", |
| 35 | + "fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"}, |
| 36 | + "validation_files": ["openvino_model.xml"], |
| 37 | + "link": "https://huggingface.co/llmware/phi-3-ov"}, |
| 38 | +""" |
| 39 | + |
| 40 | +""" Sample ONNX Model Card template |
| 41 | +
|
| 42 | +model_card_dict = {"model_name": "phi-3-onnx", "model_family": "ONNXGenerativeModel", |
| 43 | + "model_category": "generative_local", "display_name": "phi-3-onnx", |
| 44 | + "model_location": "llmware_repo", |
| 45 | + "context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3", |
| 46 | + "temperature": 0.0, "sample_default": False, "trailing_space": "", |
| 47 | + "tokenizer_local": "tokenizer_phi3.json", |
| 48 | + "hf_repo": "llmware/phi-3-onnx", |
| 49 | + "custom_model_files": [], "custom_model_repo": "", |
| 50 | + "fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"}, |
| 51 | + "validation_files": ["model.onnx", "model.onnx.data"], |
| 52 | + "link": "https://huggingface.co/llmware/phi-3-onnx"}, |
| 53 | +""" |
| 54 | + |
| 55 | +# create the model card dictionary manually using the templates above as guides, e.g., |
| 56 | +model_card_dict = {"model_name": "my_model", "insert other params from above...": []} |
| 57 | + |
| 58 | +# this is the key step - registering the model card - add as a first line in any script/example |
| 59 | +ModelCatalog().register_new_model_card(model_card_dict) |
| 60 | + |
| 61 | +# once the model is registered in the catalog, it can then be accessed anytime by name, e.g., |
| 62 | +model = ModelCatalog().load_model("my_model") |
| 63 | +response = model.inference("What is ...") |
| 64 | + |
| 65 | +# or if using in conjunction with building a RAG prompt |
| 66 | +prompter = Prompt().load_model("my_model") |
| 67 | + |
| 68 | +""" Issue # 1 - Models in local/custom path |
| 69 | +
|
| 70 | + If you have the model in a local/custom path, then the easiest thing to do is to copy/move manually to |
| 71 | + /llmware_data/model_repo/{{my_model_name}}/ and place the model components in this path. |
| 72 | +""" |
| 73 | + |
| 74 | +# lookup model repo path |
| 75 | +model_path = LLMWareConfig().get_model_repo_path() |
| 76 | +print("local model path: ", model_path) |
| 77 | + |
| 78 | +# You can manually put the model components in a folder called "model_name" at the model repo path, and |
| 79 | +# 'lookups' will all work. |
| 80 | + |
| 81 | +""" Issue # 2 - How do I figure out the prompt template? |
| 82 | + |
| 83 | + Below is a list of the prompt wrapper lookups that covers most of the common models: |
| 84 | + |
| 85 | + # standard used in most llmware models - bling, dragon and slim |
| 86 | + "human_bot": {"main_start": "<human>: ", "main_stop": "\n", "start_llm_response": "<bot>:"}, |
| 87 | + |
| 88 | + # commonly used by llama2 and mistral |
| 89 | + "<INST>": {"main_start": "<INST>", "main_stop": "</INST>", "start_llm_response": ""}, |
| 90 | + |
| 91 | + "hf_chat": {"system_start": "<|im_start|>system\n", "system_stop": "<|im_end|>\n", |
| 92 | + "main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n", |
| 93 | + "start_llm_response": "<|im_start|>assistant"}, |
| 94 | + |
| 95 | + "open_chat": {"main_start": "GPT4 User: ", "main_stop": "<|endofturn|>", |
| 96 | + "start_llm_response": "GPT4 Assistant:"}, |
| 97 | + |
| 98 | + "alpaca": {"main_start": "### Instruction: ", "main_stop": "\n", |
| 99 | + "start_llm_response": "### Response: "}, |
| 100 | + |
| 101 | + "chat_ml": {"system_start": "<|im_start|>system", "system_stop": "<|im_end|>\n", |
| 102 | + "main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n", |
| 103 | + "start_llm_response": "<|im_start|>assistant"}, |
| 104 | + |
| 105 | + "phi_3": {"system_start": "<|system|>\n", "system_stop": "<|end|>\n", |
| 106 | + "main_start": "<|user|>\n", "main_stop": "<|end|>\n", "start_llm_response": "<|assistant|>"}, |
| 107 | + |
| 108 | + "llama_3_chat": {"system_start": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", |
| 109 | + "system_stop": "<|eot_id|>", |
| 110 | + "main_start": "<|start_header_id|>user>|end_header_id|>\n", |
| 111 | + "main_stop": "<|eot_id|>", |
| 112 | + "start_llm_response": "<|start_header_id|>assistant<|end_header_id|>\n"}, |
| 113 | + |
| 114 | + "tiny_llama_chat": {"system_start": "<|system|>", "system_stop": "</s>", |
| 115 | + "main_start": "<|user|>", "main_stop": "</s>", |
| 116 | + "start_llm_response": "<|assistant|>"}, |
| 117 | + |
| 118 | + "stablelm_zephyr_chat": {"system_start": "", "system_stop": "", |
| 119 | + "main_start": "<|user|>", "main_stop": "<|endoftext|>\n", |
| 120 | + "start_llm_response": "<|assistant|>"}, |
| 121 | + |
| 122 | + "google_gemma_chat": {"system_start": "", "system_stop": "", |
| 123 | + "main_start": "<bos><start_of_turn>user\n", |
| 124 | + "main_stop": "<end_of_turn>\n", |
| 125 | + "start_llm_response": "<start_of_turn>model"}, |
| 126 | + |
| 127 | + "vicuna_chat": {"system_start": "", "system_stop": "", |
| 128 | + "main_start": "USER: ", "main_stop": "", |
| 129 | + "start_llm_response": " ASSISTANT:"} |
| 130 | +
|
| 131 | +""" |
| 132 | + |
| 133 | +# if none of these templates work, then you can also register a new prompt template |
| 134 | +ModelCatalog().register_new_finetune_wrapper("my_new_template", |
| 135 | + main_start="<user starts here>", |
| 136 | + main_stop="<user ends here>", |
| 137 | + llm_start="<model starts here>", |
| 138 | + system_start="<you are useful assistant...", |
| 139 | + system_stop="<end system stuff>" |
| 140 | + ) |
| 141 | + |
| 142 | +# once registered, this new prompt wrapper can also be invoked directly by "my_new_template", and it will be |
| 143 | +# picked up in the lookup at the time of instantiating the model |
| 144 | + |
0 commit comments