Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 40 additions & 33 deletions aios/config/config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,60 @@

# API Keys Configuration
api_keys:
openai: "" # OpenAI API key
gemini: "" # Google Gemini API key
groq: "" # Groq API key
openai: "" # OpenAI API key
gemini: "" # Google Gemini API key
groq: "" # Groq API key
anthropic: "" # Anthropic API key
huggingface:
auth_token: "" # HuggingFace auth token
home: "" # Optional: HuggingFace models path
auth_token: "" # Your HuggingFace auth token for authorized models
cache_dir: "" # Your cache directory for saving huggingface models

# LLM Configuration
llms:
models:
# - name: "gpt-4o-mini"
# backend: "openai"
# max_new_tokens: 1024
# temperature: 1.0

- name: "gemini-1.5-flash"
backend: "google"
max_new_tokens: 1024
temperature: 1.0

# OpenAI Models
# - name: "gpt-4o-mini"
# backend: "openai"
# max_new_tokens: 1024
# temperature: 1.0

# - name: "qwen2.5:7b"
# backend: "ollama"
# max_new_tokens: 1024
# temperature: 1.0
# hostname: "http://localhost:11434" # Make sure to run ollama server

#
# - name: "meta-llama/Meta-Llama-3.1-8B-Instruct"
# Google Models
# - name: "gemini-1.5-flash"
# backend: "google"


# Anthropic Models
# - name: "claude-3-opus"
# backend: "anthropic"

# Ollama Models
- name: "qwen2.5:7b"
backend: "ollama"
hostname: "http://localhost:11434" # Make sure to run ollama server

# HuggingFace Models
# - name: "meta-llama/Llama-3.1-8B-Instruct"
# backend: "huggingface"
# max_new_tokens: 1024
# temperature: 1.0
# max_gpu_memory:

# max_gpu_memory: {0: "48GB"} # GPU memory allocation
# eval_device: "cuda:0" # Device for model evaluation

# vLLM Models
# To use vllm as backend, you need to install vllm and run the vllm server https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
# An example command to run the vllm server is:
# vllm serve meta-llama/Llama-3.2-3B-Instruct --port 8091
# - name: "meta-llama/Llama-3.1-8B-Instruct"
# backend: "vllm"
# hostname: "http://localhost:8091"




log_mode: "console"
# use_context_manager: false
use_context_manager: false # set as true to enable context interrupt and switch
use_context_manager: false

memory:
memory_limit: 524288 # 512KB
eviction_k: 3

log_mode: "console"

storage:
root_dir: "root"
use_vector_db: true
Expand All @@ -62,4 +69,4 @@ agent_factory:

server:
host: "localhost"
port: 8000
port: 8000
14 changes: 14 additions & 0 deletions aios/config/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ def get_agent_factory_config(self) -> dict:
"""
return self.config.get("agent_factory", {})

def get_server_config(self) -> dict:
"""
Retrieves the server configuration settings.

Returns:
dict: Dictionary containing server configurations

Example:
server_config = config_manager.get_server_config()
host = server_config.get("host")
port = server_config.get("port")
"""
return self.config.get("server", {})

# def get_kernel_config(self) -> dict:
# """Get kernel configuration"""
# return self.config.get("kernel", {})
Expand Down
10 changes: 6 additions & 4 deletions aios/llm_core/adapter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from aios.context.simple_context import SimpleContextManager
from aios.llm_core.strategy import RouterStrategy, SequentialRouting, SmartRouting
from aios.llm_core.routing import RouterStrategy, SequentialRouting, SmartRouting
from aios.llm_core.local import HfLocalBackend
from aios.utils.id_generator import generator_tool_call_id
from cerebrum.llm.apis import LLMQuery, LLMResponse
Expand Down Expand Up @@ -85,7 +85,7 @@ def __init__(
api_key: Optional[Union[str, List[str]]] = None,
log_mode: str = "console",
use_context_manager: bool = False,
strategy: Optional[RouterStrategy] = RouterStrategy.Sequential,
routing_strategy: Optional[RouterStrategy] = RouterStrategy.Sequential,
):
"""
Initialize the LLMAdapter.
Expand All @@ -95,7 +95,7 @@ def __init__(
api_key: API key(s) for the LLM services
log_mode: Mode of logging the LLM processing status
use_context_manager: Whether to use context management
strategy: Strategy for routing requests
routing_strategy: Strategy for routing requests
"""
self.log_mode = log_mode
self.use_context_manager = use_context_manager
Expand All @@ -106,8 +106,10 @@ def __init__(
self._setup_api_keys()
self._initialize_llms()

if strategy == RouterStrategy.Sequential:
if routing_strategy == RouterStrategy.Sequential:
self.strategy = SequentialRouting(self.llm_configs)
elif routing_strategy == RouterStrategy.Smart:
self.strategy = SmartRouting(self.llm_configs)

def _setup_api_keys(self) -> None:
"""
Expand Down
3 changes: 2 additions & 1 deletion aios/llm_core/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ def generate(
response = self.model.generate(
**inputs,
temperature=temperature,
max_length=max_tokens,
# max_length=max_tokens,
max_new_tokens=max_tokens,
top_k=10,
num_beams=4,
early_stopping=True,
Expand Down
1 change: 1 addition & 0 deletions aios/llm_core/strategy.py → aios/llm_core/routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

class RouterStrategy(Enum):
Sequential = 0,
Smart = 1

class SequentialRouting:
"""
Expand Down
14 changes: 13 additions & 1 deletion runtime/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

from fastapi.middleware.cors import CORSMiddleware

import uvicorn

# from cerebrum.llm.layer import LLMLayer as LLMConfig
# from cerebrum.memory.layer import MemoryLayer as MemoryConfig
# from cerebrum.storage.layer import StorageLayer as StorageConfig
Expand Down Expand Up @@ -631,4 +633,14 @@ async def update_config(request: Request):
raise HTTPException(
status_code=500,
detail=f"Failed to update configuration: {str(e)}"
)
)

# Add a main function to run the app directly
if __name__ == "__main__":
# Get server config from config.yaml
server_config = config.get_server_config()
host = server_config.get("host", "localhost")
port = server_config.get("port", 8000)

# print(f"Starting AIOS server on {host}:{port}")
uvicorn.run("runtime.launch:app", host=host, port=port, reload=False)
2 changes: 1 addition & 1 deletion runtime/launch_kernel.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
python -m uvicorn runtime.launch:app --port 8000
python -m runtime.launch