diff --git a/aios/config/config.yaml.example b/aios/config/config.yaml.example index aec72647..612f77c5 100644 --- a/aios/config/config.yaml.example +++ b/aios/config/config.yaml.example @@ -2,53 +2,60 @@ # API Keys Configuration api_keys: - openai: "" # OpenAI API key - gemini: "" # Google Gemini API key - groq: "" # Groq API key + openai: "" # OpenAI API key + gemini: "" # Google Gemini API key + groq: "" # Groq API key anthropic: "" # Anthropic API key huggingface: - auth_token: "" # HuggingFace auth token - home: "" # Optional: HuggingFace models path + auth_token: "" # Your HuggingFace auth token for authorized models + cache_dir: "" # Your cache directory for saving huggingface models # LLM Configuration llms: models: - # - name: "gpt-4o-mini" - # backend: "openai" - # max_new_tokens: 1024 - # temperature: 1.0 - - - name: "gemini-1.5-flash" - backend: "google" - max_new_tokens: 1024 - temperature: 1.0 + # OpenAI Models # - name: "gpt-4o-mini" # backend: "openai" - # max_new_tokens: 1024 - # temperature: 1.0 - # - name: "qwen2.5:7b" - # backend: "ollama" - # max_new_tokens: 1024 - # temperature: 1.0 - # hostname: "http://localhost:11434" # Make sure to run ollama server - # - # - name: "meta-llama/Meta-Llama-3.1-8B-Instruct" + # Google Models + # - name: "gemini-1.5-flash" + # backend: "google" + + + # Anthropic Models + # - name: "claude-3-opus" + # backend: "anthropic" + + # Ollama Models + - name: "qwen2.5:7b" + backend: "ollama" + hostname: "http://localhost:11434" # Make sure to run ollama server + + # HuggingFace Models + # - name: "meta-llama/Llama-3.1-8B-Instruct" # backend: "huggingface" - # max_new_tokens: 1024 - # temperature: 1.0 - # max_gpu_memory: - + # max_gpu_memory: {0: "48GB"} # GPU memory allocation + # eval_device: "cuda:0" # Device for model evaluation + + # vLLM Models + # To use vllm as backend, you need to install vllm and run the vllm server https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html + # An example command to run the vllm server is: + # vllm serve meta-llama/Llama-3.2-3B-Instruct --port 8091 + # - name: "meta-llama/Llama-3.1-8B-Instruct" + # backend: "vllm" + # hostname: "http://localhost:8091" + + + + log_mode: "console" - # use_context_manager: false - use_context_manager: false # set as true to enable context interrupt and switch + use_context_manager: false memory: - memory_limit: 524288 # 512KB - eviction_k: 3 - + log_mode: "console" + storage: root_dir: "root" use_vector_db: true @@ -62,4 +69,4 @@ agent_factory: server: host: "localhost" - port: 8000 + port: 8000 \ No newline at end of file diff --git a/aios/config/config_manager.py b/aios/config/config_manager.py index f4fdbc28..0c3d2765 100644 --- a/aios/config/config_manager.py +++ b/aios/config/config_manager.py @@ -227,6 +227,20 @@ def get_agent_factory_config(self) -> dict: """ return self.config.get("agent_factory", {}) + def get_server_config(self) -> dict: + """ + Retrieves the server configuration settings. + + Returns: + dict: Dictionary containing server configurations + + Example: + server_config = config_manager.get_server_config() + host = server_config.get("host") + port = server_config.get("port") + """ + return self.config.get("server", {}) + # def get_kernel_config(self) -> dict: # """Get kernel configuration""" # return self.config.get("kernel", {}) diff --git a/aios/llm_core/adapter.py b/aios/llm_core/adapter.py index ad7e4b2b..5303c6d5 100644 --- a/aios/llm_core/adapter.py +++ b/aios/llm_core/adapter.py @@ -1,5 +1,5 @@ from aios.context.simple_context import SimpleContextManager -from aios.llm_core.strategy import RouterStrategy, SequentialRouting, SmartRouting +from aios.llm_core.routing import RouterStrategy, SequentialRouting, SmartRouting from aios.llm_core.local import HfLocalBackend from aios.utils.id_generator import generator_tool_call_id from cerebrum.llm.apis import LLMQuery, LLMResponse @@ -85,7 +85,7 @@ def __init__( api_key: Optional[Union[str, List[str]]] = None, log_mode: str = "console", use_context_manager: bool = False, - strategy: Optional[RouterStrategy] = RouterStrategy.Sequential, + routing_strategy: Optional[RouterStrategy] = RouterStrategy.Sequential, ): """ Initialize the LLMAdapter. @@ -95,7 +95,7 @@ def __init__( api_key: API key(s) for the LLM services log_mode: Mode of logging the LLM processing status use_context_manager: Whether to use context management - strategy: Strategy for routing requests + routing_strategy: Strategy for routing requests """ self.log_mode = log_mode self.use_context_manager = use_context_manager @@ -106,8 +106,10 @@ def __init__( self._setup_api_keys() self._initialize_llms() - if strategy == RouterStrategy.Sequential: + if routing_strategy == RouterStrategy.Sequential: self.strategy = SequentialRouting(self.llm_configs) + elif routing_strategy == RouterStrategy.Smart: + self.strategy = SmartRouting(self.llm_configs) def _setup_api_keys(self) -> None: """ diff --git a/aios/llm_core/local.py b/aios/llm_core/local.py index 49cfd9c9..0f2eefd2 100644 --- a/aios/llm_core/local.py +++ b/aios/llm_core/local.py @@ -105,7 +105,8 @@ def generate( response = self.model.generate( **inputs, temperature=temperature, - max_length=max_tokens, + # max_length=max_tokens, + max_new_tokens=max_tokens, top_k=10, num_beams=4, early_stopping=True, diff --git a/aios/llm_core/strategy.py b/aios/llm_core/routing.py similarity index 99% rename from aios/llm_core/strategy.py rename to aios/llm_core/routing.py index 99a21e34..d6296cf8 100644 --- a/aios/llm_core/strategy.py +++ b/aios/llm_core/routing.py @@ -39,6 +39,7 @@ class RouterStrategy(Enum): Sequential = 0, + Smart = 1 class SequentialRouting: """ diff --git a/runtime/launch.py b/runtime/launch.py index cb6f0cae..a6ab59f8 100644 --- a/runtime/launch.py +++ b/runtime/launch.py @@ -32,6 +32,8 @@ from fastapi.middleware.cors import CORSMiddleware +import uvicorn + # from cerebrum.llm.layer import LLMLayer as LLMConfig # from cerebrum.memory.layer import MemoryLayer as MemoryConfig # from cerebrum.storage.layer import StorageLayer as StorageConfig @@ -631,4 +633,14 @@ async def update_config(request: Request): raise HTTPException( status_code=500, detail=f"Failed to update configuration: {str(e)}" - ) \ No newline at end of file + ) + +# Add a main function to run the app directly +if __name__ == "__main__": + # Get server config from config.yaml + server_config = config.get_server_config() + host = server_config.get("host", "localhost") + port = server_config.get("port", 8000) + + # print(f"Starting AIOS server on {host}:{port}") + uvicorn.run("runtime.launch:app", host=host, port=port, reload=False) \ No newline at end of file diff --git a/runtime/launch_kernel.sh b/runtime/launch_kernel.sh index 31a720c8..65cdeef0 100644 --- a/runtime/launch_kernel.sh +++ b/runtime/launch_kernel.sh @@ -1 +1 @@ -python -m uvicorn runtime.launch:app --port 8000 +python -m runtime.launch