1
1
from __future__ import annotations
2
2
3
+ import contextlib
4
+ import ctypes
5
+ import fnmatch
6
+ import json
7
+ import multiprocessing
3
8
import os
4
9
import sys
5
- import uuid
6
10
import time
7
- import json
8
- import ctypes
9
11
import typing
10
- import fnmatch
12
+ import uuid
11
13
import warnings
12
- import contextlib
13
- import multiprocessing
14
+ from collections import deque
15
+ from pathlib import Path
14
16
15
17
from typing import (
16
18
Any ,
19
+ Callable ,
20
+ Deque ,
21
+ Dict ,
22
+ Generator ,
23
+ Iterator ,
17
24
List ,
18
25
Literal ,
19
26
Optional ,
20
- Union ,
21
- Generator ,
22
27
Sequence ,
23
- Iterator ,
24
- Deque ,
25
- Callable ,
26
- Dict ,
27
- )
28
- from collections import deque
29
- from pathlib import Path
30
-
31
-
32
- from .llama_types import *
33
- from .llama_grammar import LlamaGrammar
34
- from .llama_cache import (
35
- BaseLlamaCache , # type: ignore
28
+ Union ,
36
29
)
37
- from .llama_tokenizer import BaseLlamaTokenizer , LlamaTokenizer
38
- import llama_cpp .llama_cpp as llama_cpp
39
- import llama_cpp .llama_chat_format as llama_chat_format
40
-
41
- from llama_cpp .llama_speculative import LlamaDraftModel
42
30
43
31
import numpy as np
44
32
import numpy .typing as npt
45
33
34
+ from llama_cpp import llama_chat_format , llama_cpp
35
+ from llama_cpp .llama_speculative import LlamaDraftModel
36
+
46
37
from ._internals import (
47
- _LlamaModel , # type: ignore
48
- _LlamaContext , # type: ignore
49
38
_LlamaBatch , # type: ignore
50
- _LlamaTokenDataArray , # type: ignore
51
- _LlamaSamplingParams , # type: ignore
39
+ _LlamaContext , # type: ignore
40
+ _LlamaModel , # type: ignore
52
41
_LlamaSamplingContext , # type: ignore
42
+ _LlamaSamplingParams , # type: ignore
43
+ _LlamaTokenDataArray , # type: ignore
53
44
_normalize_embedding , # type: ignore
54
45
)
55
46
from ._logger import set_verbose
56
47
from ._utils import suppress_stdout_stderr
48
+ from .llama_cache import (
49
+ BaseLlamaCache , # type: ignore
50
+ )
51
+ from .llama_grammar import LlamaGrammar
52
+ from .llama_tokenizer import BaseLlamaTokenizer , LlamaTokenizer
53
+ from .llama_types import *
57
54
58
55
59
56
class Llama :
@@ -1036,7 +1033,7 @@ def _create_completion(
1036
1033
assert self ._ctx is not None
1037
1034
assert suffix is None or suffix .__class__ is str
1038
1035
1039
- completion_id : str = f"cmpl-{ str ( uuid .uuid4 ()) } "
1036
+ completion_id : str = f"cmpl-{ uuid .uuid4 ()!s } "
1040
1037
created : int = int (time .time ())
1041
1038
bos_token_id : int = self .token_bos ()
1042
1039
cls_token_id : int = self ._model .token_cls ()
@@ -2127,7 +2124,7 @@ def from_pretrained(
2127
2124
local_dir_use_symlinks : Union [bool , Literal ["auto" ]] = "auto" ,
2128
2125
cache_dir : Optional [Union [str , os .PathLike [str ]]] = None ,
2129
2126
** kwargs : Any ,
2130
- ) -> " Llama" :
2127
+ ) -> Llama :
2131
2128
"""Create a Llama model from a pretrained model name or path.
2132
2129
This method requires the huggingface-hub package.
2133
2130
You can install it with `pip install huggingface-hub`.
@@ -2142,7 +2139,7 @@ def from_pretrained(
2142
2139
Returns:
2143
2140
A Llama model."""
2144
2141
try :
2145
- from huggingface_hub import hf_hub_download , HfFileSystem
2142
+ from huggingface_hub import HfFileSystem , hf_hub_download
2146
2143
from huggingface_hub .utils import validate_repo_id
2147
2144
except ImportError :
2148
2145
raise ImportError (
0 commit comments