smalltong02
diff --git a/‎WebUI/Server/api.py
Lines changed: 19 additions & 16 deletions b/‎WebUI/Server/api.py
Lines changed: 19 additions & 16 deletions
diff --git a/‎WebUI/Server/chat/search_engine_chat.py
Lines changed: 194 additions & 0 deletions b/‎WebUI/Server/chat/search_engine_chat.py
Lines changed: 194 additions & 0 deletions
diff --git a/‎WebUI/Server/llm_api.py
Lines changed: 63 additions & 1 deletion b/‎WebUI/Server/llm_api.py
Lines changed: 63 additions & 1 deletion
diff --git a/‎WebUI/configs/basicconfig.py
Lines changed: 8 additions & 1 deletion b/‎WebUI/configs/basicconfig.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎WebUI/configs/prompttemplates.py
Lines changed: 11 additions & 3 deletions b/‎WebUI/configs/prompttemplates.py
Lines changed: 11 additions & 3 deletions
@@ -14,14 +14,16 @@
 from WebUI.Server.chat.feedback import chat_feedback
 from WebUI.Server.embeddings_api import embed_texts_endpoint
 from WebUI.Server.chat.openai_chat import openai_chat
+from WebUI.Server.chat.search_engine_chat import search_engine_chat
 from WebUI.Server.llm_api import (list_running_models, get_running_models, list_config_models,
                             change_llm_model, stop_llm_model, chat_llm_model, download_llm_model,
                             get_model_config, save_chat_config, save_model_config, get_webui_configs,
                             get_vtot_model, get_vtot_data, stop_vtot_model, change_vtot_model, save_voice_model_config,
                             get_speech_model, get_speech_data, save_speech_model_config, stop_speech_model, change_speech_model,
                             get_image_recognition_model, save_image_recognition_model_config, eject_image_recognition_model, change_image_recognition_model, get_image_recognition_data,
                             get_image_generation_model, save_image_generation_model_config, eject_image_generation_model, change_image_generation_model, get_image_generation_data,
-                            llm_knowledge_base_chat, list_search_engines)
+                            save_search_engine_config,
+                            llm_knowledge_base_chat, llm_search_engine_chat, list_search_engines)
 from WebUI.Server.utils import(BaseResponse, ListResponse, FastAPI, MakeFastAPIOffline,
                           get_server_configs, get_prompt_template)
 from typing import List, Literal
@@ -68,11 +70,6 @@ def mount_app_routes(app: FastAPI, run_mode: str = None):
              summary="Save chat configration information",
              )(save_chat_config)
 
-    #app.post("/chat/search_engine_chat",
-    #         tags=["Chat"],
-    #         summary="Chat with search engine.",
-    #         )(search_engine_chat)
-
     app.post("/chat/feedback",
              tags=["Chat"],
              summary="Return dialogue scores.",
@@ -127,6 +124,22 @@ def mount_app_routes(app: FastAPI, run_mode: str = None):
              summary="Download LLM Model (Model Worker)",
              )(download_llm_model)
 
+    # Search Engine interface
+    app.post("/search_engine/save_search_engine_config",
+             tags=["Search Engine Management"],
+             summary="Save config for search engine",
+             )(save_search_engine_config)
+    
+    app.post("/search_engine/search_engine_chat",
+            tags=["Search Engine Management"],
+            summary="Chat with search engine.",
+            )(search_engine_chat)
+    
+    app.post("/llm_model/search_engine_chat",
+            tags=["Search Engine Management"],
+            summary="Chat with search engine.",
+            )(llm_search_engine_chat)
+    
     # Voice Model interface
     app.post("/voice_model/get_vtot_model",
              tags=["Voice Model Management"],
@@ -236,15 +249,6 @@ def mount_app_routes(app: FastAPI, run_mode: str = None):
              tags=["Server State"],
              summary="get webui config",
              )(get_webui_configs)
-    #app.post("/server/configs",
-    #         tags=["Server State"],
-    #         summary="Get server configration info.",
-    #         )(get_server_configs)
-
-    #app.post("/server/list_search_engines",
-    #         tags=["Server State"],
-    #         summary="Get all search engine info.",
-    #         )(list_search_engines)
 
     @app.post("/server/get_prompt_template",
              tags=["Server State"],
@@ -328,7 +332,6 @@ def mount_knowledge_routes(app: FastAPI):
             summary="update doc for knowledge base"
             )(update_docs_by_id)
 
-
     app.post("/knowledge_base/upload_docs",
             tags=["Knowledge Base Management"],
             response_model=BaseResponse,
 
@@ -0,0 +1,194 @@
+from fastapi import Body, Request
+from WebUI.configs import GetProviderByName
+from fastapi.responses import StreamingResponse
+from WebUI.Server.chat.utils import History
+from langchain.docstore.document import Document
+from langchain.chains import LLMChain
+import asyncio
+import json
+import os
+from WebUI.Server.utils import wrap_done, get_ChatOpenAI
+from fastapi.concurrency import run_in_threadpool
+from WebUI.Server.utils import get_prompt_template
+from langchain.callbacks import AsyncIteratorCallbackHandler
+from langchain.utilities.bing_search import BingSearchAPIWrapper
+from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from WebUI.configs.webuiconfig import InnerJsonConfigWebUIParse
+from langchain.prompts.chat import ChatPromptTemplate
+from typing import AsyncIterable, Dict, List, Optional
+
+def bing_search(text, search_url, api_key, result_len, **kwargs):
+    search = BingSearchAPIWrapper(bing_subscription_key=api_key,
+                                  bing_search_url=search_url)
+    return search.results(text, result_len)
+
+
+def duckduckgo_search(text, search_url, api_key, result_len, **kwargs):
+    search = DuckDuckGoSearchAPIWrapper()
+    return search.results(text, result_len)
+
+def metaphor_search(
+        text: str,
+        search_url: str,
+        api_key: str,
+        result_len: int,
+        chunk_size: int = 500,
+        chunk_overlap: int = 50,
+) -> List[Dict]:
+    from exa_py import Exa
+    from markdownify import markdownify
+    from strsimpy.normalized_levenshtein import NormalizedLevenshtein
+
+    highlights_options  = {
+        "num_sentences": 7, # how long our highlights should be
+        "highlights_per_url": 1, # just get the best highlight for each URL
+    }
+
+    info_for_llm = []
+    exa = Exa(api_key=api_key)
+    search_response = exa.search_and_contents(text, highlights=highlights_options, num_results=result_len, use_autoprompt=True)
+    info = [sr for sr in search_response.results]
+    for x in info:
+        x.highlights[0] = markdownify(x.highlights[0])
+    info_for_llm = info
+
+    docs = [{"snippet": x.highlights[0],
+                "link": x.url,
+                "title": x.title}
+            for x in info_for_llm]
+    return docs
+
+SEARCH_ENGINES = {
+    "bing": bing_search,
+    "duckduckgo": duckduckgo_search,
+    "metaphor": metaphor_search,
+    }
+
+def search_result2docs(search_results):
+    docs = []
+    for result in search_results:
+        doc = Document(page_content=result["snippet"] if "snippet" in result.keys() else "",
+                       metadata={"source": result["link"] if "link" in result.keys() else "",
+                                 "filename": result["title"] if "title" in result.keys() else ""})
+        docs.append(doc)
+    return docs
+
+
+async def lookup_search_engine(
+        query: str,
+        search_engine_name: str,
+        top_k: int = 3,
+):
+    search_engine = SEARCH_ENGINES[search_engine_name]
+    
+    configinst = InnerJsonConfigWebUIParse()
+    webui_config = configinst.dump()
+    config = webui_config.get("SearchEngine").get(search_engine_name)
+    api_key = config.get("api_key", "")
+    search_url = config.get("search_url", "")
+    if search_engine_name == "bing":
+        if api_key == "" or api_key == "YOUR_API_KEY":
+            api_key = os.environ.get("BING_SUBSCRIPTION_KEY", "")
+        if search_url == "":
+            search_url = os.environ.get("BING_SEARCH_URL", "")
+    elif search_engine_name == "metaphor":
+        if api_key == "" or api_key == "YOUR_API_KEY":
+            api_key = os.environ.get("METAPHOR_API_KEY", "")
+    results = await run_in_threadpool(search_engine, query, search_url=search_url, api_key=api_key, result_len=top_k)
+    docs = search_result2docs(results)
+    return docs
+
+async def search_engine_chat(
+    query: str = Body(..., description="User input: ", examples=["chat"]),
+    search_engine_name: str = Body(..., description="search engine name", examples=["duckduckgo"]),
+    history: List[dict] = Body([],
+                                  description="History chat",
+                                  examples=[[
+                                      {"role": "user", "content": "Who are you?"},
+                                      {"role": "assistant", "content": "I am AI."}]]
+                                  ),
+    stream: bool = Body(False, description="stream output"),
+    model_name: str = Body("", description="model name"),
+    temperature: float = Body(0.7, description="LLM Temperature", ge=0.0, le=1.0),
+    max_tokens: Optional[int] = Body(None, description="max tokens."),
+    prompt_name: str = Body("default", description=""),
+) -> StreamingResponse:
+    configinst = InnerJsonConfigWebUIParse()
+    webui_config = configinst.dump()
+    searchengine = webui_config.get("SearchEngine")
+    top_k = searchengine.get("top_k", 3)
+
+    history = [History.from_data(h) for h in history]
+
+    async def search_engine_chat_iterator(query: str,
+                                          search_engine_name: str,
+                                          top_k: int,
+                                          history: Optional[List[History]],
+                                          stream: bool,
+                                          model_name: str = "",
+                                          temperature: float = 0.7,
+                                          max_tokens: Optional[int] = None,
+                                          prompt_name: str = prompt_name,
+                                          ) -> AsyncIterable[str]:
+        nonlocal webui_config
+        callback = AsyncIteratorCallbackHandler()
+        if isinstance(max_tokens, int) and max_tokens <= 0:
+            max_tokens = None
+        provider = GetProviderByName(webui_config, model_name)
+
+        model = get_ChatOpenAI(
+            provider=provider,
+            model_name=model_name,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            callbacks=[callback],
+        )
+
+        docs = await lookup_search_engine(query, search_engine_name, top_k)
+        context = "\n".join([doc.page_content for doc in docs])
+
+        prompt_template = get_prompt_template("search_engine_chat", prompt_name)
+        input_msg = History(role="user", content=prompt_template).to_msg_template(False)
+        chat_prompt = ChatPromptTemplate.from_messages(
+            [i.to_msg_template() for i in history] + [input_msg])
+
+        chain = LLMChain(prompt=chat_prompt, llm=model)
+
+        task = asyncio.create_task(wrap_done(
+            chain.acall({"context": context, "question": query}),
+            callback.done),
+        )
+
+        source_documents = [
+            f"""from [{inum + 1}] [{doc.metadata["source"]}]({doc.metadata["source"]}) \n\n{doc.page_content}\n\n"""
+            for inum, doc in enumerate(docs)
+        ]
+
+        if len(source_documents) == 0:
+            source_documents.append(f"""<span style='color:red'>No relevant information were found. This response is generated based on the LLM Model '{model_name}' itself!</span>""")
+
+        if stream:
+            async for token in callback.aiter():
+                # Use server-sent-events to stream the response
+                yield json.dumps({"answer": token}, ensure_ascii=False)
+            yield json.dumps({"docs": source_documents}, ensure_ascii=False)
+        else:
+            answer = ""
+            async for token in callback.aiter():
+                answer += token
+            yield json.dumps({"answer": answer,
+                              "docs": source_documents},
+                             ensure_ascii=False)
+        await task
+
+    return StreamingResponse(search_engine_chat_iterator(query=query,
+                                                           search_engine_name=search_engine_name,
+                                                           top_k=top_k,
+                                                           history=history,
+                                                           stream=stream,
+                                                           model_name=model_name,
+                                                           temperature=temperature,
+                                                           max_tokens=max_tokens,
+                                                           prompt_name=prompt_name),
+                               )
@@ -818,7 +818,69 @@ def save_chat_config(
         return BaseResponse(
             code=500,
             msg=f"failed to save chat configration, error: {e}")
-
+    
+def save_search_engine_config(
+    config: dict = Body(..., description="Search Engine configration information"),
+    controller_address: str = Body(None, description="Fastchat controller address", examples=[fschat_controller_address()])
+) -> BaseResponse:
+    try:
+        with open("WebUI/configs/webuiconfig.json", 'r+') as file:
+            jsondata = json.load(file)
+            jsondata["SearchEngine"].update(config)
+            file.seek(0)
+            json.dump(jsondata, file, indent=4)
+            file.truncate()
+        return BaseResponse(
+            code=200,
+            msg=f"success save chat configration!")
+            
+    except Exception as e:
+        print(f'{e.__class__.__name__}: {e}')
+        return BaseResponse(
+            code=500,
+            msg=f"failed to save chat configration, error: {e}")
+    
+def llm_search_engine_chat(
+    query: str = Body(..., description="User input: ", examples=["chat"]),
+    search_engine_name: str = Body(..., description="Search engine name"),
+    history: List[dict] = Body([],
+                                  description="History chat",
+                                  examples=[[
+                                      {"role": "user", "content": "Who are you?"},
+                                      {"role": "assistant", "content": "I am AI."}]]
+                                  ),
+    stream: bool = Body(False, description="stream output"),
+    model_name: str = Body("", description="model name"),
+    temperature: float = Body(0.7, description="LLM Temperature", ge=0.0, le=1.0),
+    max_tokens: Optional[int] = Body(None, description="max tokens."),
+    prompt_name: str = Body("default", description=""),
+    controller_address: str = Body(None, description="Fastchat controller address", examples=[fschat_controller_address()])
+):
+    controller_address = controller_address or fschat_controller_address()
+    async def fake_json_streamer() -> AsyncIterable[str]:
+        import asyncio
+        with get_httpx_client() as client:
+            response = client.stream(
+                "POST",
+                url=controller_address + "/llm_search_engine_chat",
+                json={
+                    "query": query,
+                    "search_engine_name": search_engine_name,
+                    "history": history,
+                    "stream": stream,
+                    "model_name": model_name,
+                    "temperature": temperature,
+                    "max_tokens": max_tokens,
+                    "prompt_name": prompt_name,
+                    },
+            )
+            with response as r:
+                for chunk in r.iter_text(None):
+                    if not chunk:
+                        continue
+                    yield chunk
+                    await asyncio.sleep(0.1)
+    return StreamingResponse(fake_json_streamer(), media_type="text/event-stream")
 
 def list_search_engines() -> BaseResponse:
     pass
 
@@ -451,4 +451,11 @@ def generate_prompt_for_imagegen(model_name : str = "", prompt : str = "", image
             new_prompt += f". Contents of this image is '{imagesprompt}'"
         print("new_prompt: ", new_prompt)
         return new_prompt
-    return prompt
+    return prompt
+
+def generate_prompt_for_smart_search(prompt : str = ""):
+    new_prompt = "You are an AI assistant, answering questions based on user inquiries. If you are absolutely certain of the answer to the question, please answer it to the best of your ability and refrain from returning the 'search_engine' command. If you don't know how to answer the question or you require real-time information or need to search the internet before answering questions, then please only return the command: 'search_engine'. \n\n User's question: " + prompt
+    return new_prompt
+
+def use_search_engine(text : str = ""):
+    return "search_engine" in text
@@ -11,8 +11,7 @@
             """
         You are a Python expert, please write code using Python. \n
         {{ input }}
-        """
-        ,
+        """,
     },
 
     "knowledge_base_chat": {
@@ -42,7 +41,16 @@
     },
 
     "search_engine_chat": {
-        "default": "{{ input }}",
+        "default":
+            '<Instruction> This is the internet information I found. Please extract and organize it to provide concise answers to the questions.'
+            'If you cannot find an answer from it, please say, "Unable to find content that answers the question."</Instruction>\n'
+            '<Known Information>{{ context }}</Known Information>\n'
+            '<Question>{{ question }}</Question>\n',
+
+        "search":
+            '<Instruction>Based on the known information, please provide concise and professional answers to the questions. If unable to find an answer from it, please say, "Unable to answer the question based on known information," and the response should be in the language of the query.</Instruction>\n'
+            '<Known Information>{{ context }}</Known Information>\n'
+            '<Question>{{ question }}</Question>\n',
     },
 
     "agent_chat": {