|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 | 3 | import os
|
| 4 | +import contextlib |
4 | 5 | import json
|
5 | 6 | import typing
|
6 |
| -import contextlib |
7 |
| - |
8 |
| -from threading import Lock |
9 | 7 | from functools import partial
|
10 |
| -from typing import Iterator, List, Optional, Union, Dict |
| 8 | +from threading import Lock |
11 | 9 |
|
12 |
| -import llama_cpp |
| 10 | +from typing import Dict, Iterator, List, Optional, Union |
13 | 11 |
|
14 | 12 | import anyio
|
15 | 13 | from anyio.streams.memory import MemoryObjectSendStream
|
16 |
| -from starlette.concurrency import run_in_threadpool, iterate_in_threadpool |
17 |
| -from fastapi import Depends, FastAPI, APIRouter, Request, HTTPException, status, Body |
| 14 | +from fastapi import APIRouter, Body, Depends, FastAPI, HTTPException, Request, status |
18 | 15 | from fastapi.middleware import Middleware
|
19 | 16 | from fastapi.middleware.cors import CORSMiddleware
|
20 | 17 | from fastapi.security import HTTPBearer
|
21 | 18 | from sse_starlette.sse import EventSourceResponse
|
22 |
| -from starlette_context.plugins import RequestIdPlugin # type: ignore |
| 19 | +from starlette.concurrency import iterate_in_threadpool, run_in_threadpool |
23 | 20 | from starlette_context.middleware import RawContextMiddleware
|
24 |
| - |
| 21 | +from starlette_context.plugins import RequestIdPlugin # type: ignore |
| 22 | + |
| 23 | +import llama_cpp |
| 24 | +from llama_cpp.server.errors import RouteErrorHandler |
25 | 25 | from llama_cpp.server.model import (
|
26 | 26 | LlamaProxy,
|
27 | 27 | )
|
28 | 28 | from llama_cpp.server.settings import (
|
29 | 29 | ConfigFileSettings,
|
30 |
| - Settings, |
31 | 30 | ModelSettings,
|
32 | 31 | ServerSettings,
|
| 32 | + Settings, |
33 | 33 | )
|
34 | 34 | from llama_cpp.server.types import (
|
| 35 | + CreateChatCompletionRequest, |
35 | 36 | CreateCompletionRequest,
|
36 | 37 | CreateEmbeddingRequest,
|
37 |
| - CreateChatCompletionRequest, |
| 38 | + DetokenizeInputRequest, |
| 39 | + DetokenizeInputResponse, |
38 | 40 | ModelList,
|
| 41 | + TokenizeInputCountResponse, |
39 | 42 | TokenizeInputRequest,
|
40 | 43 | TokenizeInputResponse,
|
41 |
| - TokenizeInputCountResponse, |
42 |
| - DetokenizeInputRequest, |
43 |
| - DetokenizeInputResponse, |
44 | 44 | )
|
45 |
| -from llama_cpp.server.errors import RouteErrorHandler |
46 |
| - |
47 | 45 |
|
48 | 46 | router = APIRouter(route_class=RouteErrorHandler)
|
49 | 47 |
|
@@ -150,7 +148,7 @@ def create_app(
|
150 | 148 | set_llama_proxy(model_settings=model_settings)
|
151 | 149 |
|
152 | 150 | if server_settings.disable_ping_events:
|
153 |
| - set_ping_message_factory(lambda: bytes()) |
| 151 | + set_ping_message_factory(lambda: b"") |
154 | 152 |
|
155 | 153 | return app
|
156 | 154 |
|
@@ -248,7 +246,7 @@ async def authenticate(
|
248 | 246 | "schema": {
|
249 | 247 | "type": "string",
|
250 | 248 | "title": "Server Side Streaming response, when stream=True. "
|
251 |
| - + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501 |
| 249 | + + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", |
252 | 250 | "example": """data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""",
|
253 | 251 | }
|
254 | 252 | },
|
@@ -386,7 +384,7 @@ async def create_embedding(
|
386 | 384 | "schema": {
|
387 | 385 | "type": "string",
|
388 | 386 | "title": "Server Side Streaming response, when stream=True"
|
389 |
| - + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501 |
| 387 | + + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", |
390 | 388 | "example": """data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""",
|
391 | 389 | }
|
392 | 390 | },
|
|
0 commit comments