diff --git a/api/api.py b/api/api.py index 89f5a33..0a49d49 100644 --- a/api/api.py +++ b/api/api.py @@ -1,14 +1,17 @@ import os import logging +from urllib.parse import quote from fastapi import FastAPI, HTTPException, Query, Request, WebSocket from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse, Response +from fastapi.responses import JSONResponse, Response, RedirectResponse from typing import List, Optional, Dict, Any, Literal import json from datetime import datetime from pydantic import BaseModel, Field import google.generativeai as genai import asyncio +from api.data_pipeline import download_repo, DatabaseManager +from urllib.parse import unquote # Configure logging from api.logging_config import setup_logging @@ -23,14 +26,29 @@ description="API for streaming chat completions" ) -# Configure CORS -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Allows all origins - allow_credentials=True, - allow_methods=["*"], # Allows all methods - allow_headers=["*"], # Allows all headers -) +# Create a separate router for API endpoints to ensure they take precedence +api_router = FastAPI() + +# Configure CORS for both app and api_router +for app_instance in [app, api_router]: + app_instance.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Allows all origins + allow_credentials=True, + allow_methods=["*"], # Allows all methods + allow_headers=["*"], # Allows all headers + ) + +# Mount the API router at /api prefix +app.mount("/api", api_router) + +# Import and include debug routes +try: + from api.debug_azure import debug_router + app.include_router(debug_router) + logger.info("Debug routes registered successfully") +except Exception as e: + logger.error(f"Failed to register debug routes: {str(e)}") # Helper function to get adalflow root path def get_adalflow_default_root_path(): @@ -445,7 +463,7 @@ async def save_wiki_cache(data: WikiCacheRequest) -> bool: # --- Wiki Cache API Endpoints --- -@app.get("/api/wiki_cache", response_model=Optional[WikiCacheData]) +@api_router.get("/wiki_cache", response_model=Optional[WikiCacheData]) async def get_cached_wiki( owner: str = Query(..., description="Repository owner"), repo: str = Query(..., description="Repository name"), @@ -470,7 +488,7 @@ async def get_cached_wiki( logger.info(f"Wiki cache not found for {owner}/{repo} ({repo_type}), lang: {language}") return None -@app.post("/api/wiki_cache") +@api_router.post("/wiki_cache") async def store_wiki_cache(request_data: WikiCacheRequest): """ Stores generated wiki data (structure and pages) to the server-side cache. @@ -488,7 +506,7 @@ async def store_wiki_cache(request_data: WikiCacheRequest): else: raise HTTPException(status_code=500, detail="Failed to save wiki cache") -@app.delete("/api/wiki_cache") +@api_router.delete("/wiki_cache") async def delete_wiki_cache( owner: str = Query(..., description="Repository owner"), repo: str = Query(..., description="Repository name"), @@ -560,8 +578,331 @@ async def root(): "endpoints": endpoints } +# --- Simplified Route for Azure DevOps Repositories --- +@app.get("/{project}/{repository}") +async def simplified_repo_route( + project: str, + repository: str, + request: Request, + file_tree: str = Query(None), + readme: str = Query(None), + type: str = Query(None), + repo_url: str = Query(None) +): + """ + Simplified route for Azure DevOps repositories without the organization part. + This route receives the file tree and README content as query parameters. + """ + # Skip API routes - they should be handled by their specific endpoints + if project == "api": + raise HTTPException(status_code=404, detail="Not found - Use specific API endpoints") + + logger.info(f"Simplified route triggered for: {project}/{repository}") + logger.info(f"Query parameters: {request.query_params}") + + # If we have the file tree and README in the query parameters, return them directly + if file_tree and readme and type == 'azure': + logger.info("Using file tree and README from query parameters") + return { + "file_tree": file_tree, + "readme": readme + } + + # Otherwise, return a 404 + raise HTTPException(status_code=404, detail="Resource not found") + +# --- Catch-all Route for Azure DevOps Repositories --- +@app.get("/{organization}/{project}/{repository}", include_in_schema=False) +async def catch_all_repo_route( + organization: str, + project: str, + repository: str, + request: Request +): + # Skip well-known paths and other special paths + if organization.startswith('.well-known') or organization.startswith('_next') or organization.startswith('api'): + raise HTTPException(status_code=404, detail="Resource not found") + """ + Catch-all route for repository URLs with the format /{organization}/{project}/{repository}. + This is primarily used for Azure DevOps repositories but could be used for other repository types as well. + """ + logger.info(f"Catch-all route triggered for: {organization}/{project}/{repository}") + + # Get all query parameters + query_params = dict(request.query_params) + logger.info(f"Query parameters: {query_params}") + + # Check if repo_url is provided + if 'repo_url' in query_params: + try: + # Get the repo URL from query parameters + repo_url = query_params.get('repo_url') + repo_type = query_params.get('type', 'github') + + # Handle double-encoded URLs + if '%25' in repo_url: # Double-encoded + repo_url = unquote(unquote(repo_url)) + else: + repo_url = unquote(repo_url) + + logger.info(f"Decoded repo_url: {repo_url}") + + # Get access token based on repository type + access_token = None + if repo_type == 'github': + access_token = os.environ.get("GITHUB_TOKEN") + elif repo_type == 'gitlab': + access_token = os.environ.get("GITLAB_TOKEN") + elif repo_type == 'bitbucket': + access_token = os.environ.get("BITBUCKET_TOKEN") + elif repo_type == 'azure': + access_token = os.environ.get("AZURE_DEVOPS_TOKEN") + + if not access_token and repo_type != 'github': # GitHub can work without a token for public repos + logger.warning(f"{repo_type.upper()}_TOKEN not found in environment variables") + + # Import the necessary modules for repository processing + + + # Process the repository + try: + # Create a database manager instance + db_manager = DatabaseManager() + + # Prepare the database for the repository + # This will download the repository, process the files, and create embeddings + logger.info(f"Preparing database for {repo_url}") + db_manager.prepare_database(repo_url, repo_type, access_token) + + # Get the repository structure to return to the frontend + # This is similar to what's done in the GitHub/GitLab endpoints + try: + # Get the repository path + repo_path = os.path.join(get_adalflow_default_root_path(), "repos", repository) + logger.info(f"Getting repository structure from: {repo_path}") + + # Use the existing get_local_repo_structure function + # We can call it directly since we're in the same file + result = await get_local_repo_structure(repo_path) + + # Check if the result is a JSONResponse (error) + if isinstance(result, JSONResponse): + logger.error(f"Error getting repository structure: {result.body}") + # Return basic repository information + return { + "repository": { + "name": repository, + "owner": organization, + "project": project, + "url": repo_url + }, + "type": repo_type, + "status": "processing" + } + + # For Azure DevOps repos, redirect to a simpler URL format without the organization + # This makes the frontend handling much more straightforward + from fastapi.responses import RedirectResponse + + # Get provider and model from query parameters if available + provider = request.query_params.get('provider', 'azure') + model = request.query_params.get('model', 'gpt-4o') + language = request.query_params.get('language', 'en') + comprehensive = request.query_params.get('comprehensive', 'true') + + # Construct the redirect URL with query parameters to pass the file tree and README + redirect_url = f"/{project}/{repository}?file_tree={quote(result['file_tree'])}&readme={quote(result['readme'])}&type=azure&repo_url={quote(repo_url)}&provider={provider}&model={model}&language={language}&comprehensive={comprehensive}" + + # Return a redirect response + return RedirectResponse(url=redirect_url, status_code=302) + except Exception as structure_error: + logger.error(f"Error getting repository structure: {str(structure_error)}") + # Return basic repository information even if structure retrieval fails + return { + "repository": { + "name": repository, + "owner": organization, + "project": project, + "url": repo_url + }, + "type": repo_type, + "status": "processing" + } + except Exception as process_error: + logger.error(f"Error processing repository: {str(process_error)}") + raise HTTPException(status_code=500, detail=f"Error processing repository: {str(process_error)}") + except Exception as e: + logger.error(f"Error in catch-all route: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error processing repository: {str(e)}") + + # If repo_url is not provided, return 404 + raise HTTPException(status_code=404, detail="Resource not found") + +# --- Azure DevOps Repository Structure Endpoint --- +@app.get("/api/azuredevops/structure") +async def get_azuredevops_structure( + repo_url: str = Query(..., description="URL of the Azure DevOps repository") +): + """ + Get repository structure (file tree and README) for an Azure DevOps repository. + This endpoint is specifically designed for Azure DevOps repositories. + + Args: + repo_url: URL of the Azure DevOps repository + + Returns: + Repository structure with file tree and README content + """ + try: + logger.info(f"Getting Azure DevOps repository structure for {repo_url}") + + # Parse the repository URL to extract organization, project, and repository name + # Format: https://dev.azure.com/{organization}/{project}/_git/{repository} + url_parts = repo_url.split('/') + if len(url_parts) < 6: + raise HTTPException(status_code=400, detail=f"Invalid Azure DevOps repository URL: {repo_url}") + + organization = url_parts[3] + project = url_parts[4] + repository = url_parts[-1] + + logger.info(f"Parsed Azure DevOps URL - Organization: {organization}, Project: {project}, Repository: {repository}") + + # Import the necessary modules for repository processing + from api.data_pipeline import download_repo, DatabaseManager + + # Create a database manager instance + db_manager = DatabaseManager() + + # Get access token if available + access_token = os.getenv('AZURE_DEVOPS_TOKEN') + + # Prepare the database for the repository + # This will download the repository, process the files, and create embeddings + logger.info(f"Preparing database for {repo_url}") + db_manager.prepare_database(repo_url, 'azure', access_token) + + # Get the repository structure + try: + # Get the repository path + repo_path = os.path.join(get_adalflow_default_root_path(), "repos", repository) + logger.info(f"Getting repository structure from: {repo_path}") + + # Use the existing get_local_repo_structure function + result = await get_local_repo_structure(repo_path) + + # Check if the result is a JSONResponse (error) + if isinstance(result, JSONResponse): + logger.error(f"Error getting repository structure: {result.body}") + # Return error response + raise HTTPException(status_code=500, detail="Error getting repository structure") + + # Return just the file tree and README directly + return { + "file_tree": result["file_tree"], + "readme": result["readme"] + } + except Exception as structure_error: + logger.error(f"Error getting repository structure: {str(structure_error)}") + raise HTTPException(status_code=500, detail=f"Error getting repository structure: {str(structure_error)}") + except Exception as e: + logger.error(f"Error in get_azuredevops_structure: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error getting Azure DevOps repository structure: {str(e)}") + +# --- Repository Structure Endpoint --- +@app.get("/api/repo/structure") +async def get_repo_structure( + type: str = Query(..., description="Repository type (e.g., github, gitlab, azure)"), + repo_url: str = Query(..., description="URL of the repository"), + owner: str = Query(..., description="Repository owner or organization"), + repo: str = Query(..., description="Repository name") +): + """ + Get repository structure (file tree and README) for a repository. + This is particularly useful for Azure DevOps repositories. + + Args: + type: Repository type (e.g., github, gitlab, azure) + repo_url: URL of the repository + owner: Repository owner or organization + repo: Repository name + + Returns: + Repository structure with file tree and README content + """ + try: + logger.info(f"Getting repository structure for {repo_url}") + + # Import the necessary modules for repository processing + from api.data_pipeline import download_repo, DatabaseManager + + # Create a database manager instance + db_manager = DatabaseManager() + + # Get access token if available + access_token = None + if type == 'azure': + access_token = os.getenv('AZURE_DEVOPS_TOKEN') + elif type == 'github': + access_token = os.getenv('GITHUB_TOKEN') + elif type == 'gitlab': + access_token = os.getenv('GITLAB_TOKEN') + elif type == 'bitbucket': + access_token = os.getenv('BITBUCKET_TOKEN') + + # Prepare the database for the repository + # This will download the repository, process the files, and create embeddings + logger.info(f"Preparing database for {repo_url}") + db_manager.prepare_database(repo_url, type, access_token) + + # Get the repository structure + try: + # Get the repository path + repo_path = os.path.join(get_adalflow_default_root_path(), "repos", repo) + logger.info(f"Getting repository structure from: {repo_path}") + + # Use the existing get_local_repo_structure function + result = await get_local_repo_structure(repo_path) + + # Check if the result is a JSONResponse (error) + if isinstance(result, JSONResponse): + logger.error(f"Error getting repository structure: {result.body}") + # Return basic repository information + return { + "repository": { + "name": repo, + "owner": owner, + "url": repo_url + }, + "type": type, + "status": "processing" + } + + # Return repository information for the frontend in the same format as the GitHub API response + # The frontend expects just file_tree and readme directly + return { + "file_tree": result["file_tree"], + "readme": result["readme"], + "status": "ready" + } + except Exception as structure_error: + logger.error(f"Error getting repository structure: {str(structure_error)}") + # Return basic repository information even if structure retrieval fails + return { + "repository": { + "name": repo, + "owner": owner, + "url": repo_url + }, + "type": type, + "status": "processing" + } + except Exception as e: + logger.error(f"Error in get_repo_structure: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error getting repository structure: {str(e)}") + # --- Processed Projects Endpoint --- (New Endpoint) -@app.get("/api/processed_projects", response_model=List[ProcessedProjectEntry]) +@api_router.get("/processed_projects", response_model=List[ProcessedProjectEntry]) async def get_processed_projects(): """ Lists all processed projects found in the wiki cache directory. diff --git a/api/azure_openai_client.py b/api/azure_openai_client.py new file mode 100644 index 0000000..b1906a8 --- /dev/null +++ b/api/azure_openai_client.py @@ -0,0 +1,543 @@ +"""Azure OpenAI ModelClient integration.""" + +import os +import base64 +from typing import ( + Dict, + Sequence, + Optional, + List, + Any, + TypeVar, + Callable, + Generator, + Union, + Literal, +) +import re +import logging +import backoff +from azure.core.credentials import AzureKeyCredential + +# Import OpenAI modules directly +from openai import AzureOpenAI, AsyncAzureOpenAI, Stream +from openai import ( + APITimeoutError, + InternalServerError, + RateLimitError, + UnprocessableEntityError, + BadRequestError, +) +from openai.types import ( + Completion, + CreateEmbeddingResponse, + Image, +) +from openai.types.chat import ChatCompletionChunk, ChatCompletion, ChatCompletionMessage +from openai.types.chat.chat_completion import Choice + +from adalflow.core.model_client import ModelClient +from adalflow.core.types import ( + ModelType, + EmbedderOutput, + TokenLogProb, + CompletionUsage, + GeneratorOutput, +) +from adalflow.components.model_client.utils import parse_embedding_response + +# Import OpenAI client functions for reuse +from api.openai_client import ( + get_first_message_content, + estimate_token_count, + parse_stream_response, + handle_streaming_response, + get_all_messages_content, + get_probabilities, +) + +log = logging.getLogger(__name__) +T = TypeVar("T") + + +class AzureOpenAIClient(ModelClient): + """A component wrapper for the Azure OpenAI API client. + + Supports both embedding and chat completion APIs, including multimodal capabilities. + + Users can: + 1. Simplify use of ``Embedder`` and ``Generator`` components by passing `AzureOpenAIClient()` as the `model_client`. + 2. Use this as a reference to create their own API client or extend this class by copying and modifying the code. + + Note: + We recommend avoiding `response_format` to enforce output data type or `tools` and `tool_choice` in `model_kwargs` when calling the API. + OpenAI's internal formatting and added prompts are unknown. Instead: + - Use :ref:`OutputParser` for response parsing and formatting. + + For multimodal inputs, provide images in `model_kwargs["images"]` as a path, URL, or list of them. + The model must support vision capabilities (e.g., `gpt-4o`, `gpt-4-vision`). + + Args: + api_key (Optional[str], optional): Azure OpenAI API key. Defaults to `None`. + api_version (str, optional): Azure OpenAI API version. Defaults to `"2024-02-01"`. + chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion into a `str`. Defaults to `None`. + The default parser is `get_first_message_content`. + base_url (str): The API base URL to use when initializing the client. + env_api_key_name (str): The environment variable name for the API key. Defaults to `"AZURE_OPENAI_API_KEY"`. + env_base_url_name (str): The environment variable name for the base URL. Defaults to `"AZURE_OPENAI_API_BASE"`. + + References: + - Azure OpenAI API Overview: https://learn.microsoft.com/en-us/azure/ai-services/openai/ + - Embeddings Guide: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/understand-embeddings + - Chat Completion Models: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models + """ + + def __init__( + self, + api_key: Optional[str] = None, + api_version: str = "2024-12-01-preview", # Updated to latest API version + chat_completion_parser: Callable[[Completion], Any] = None, + input_type: Literal["text", "messages"] = "text", + base_url: Optional[str] = None, + env_base_url_name: str = "AZURE_OPENAI_ENDPOINT", + env_api_key_name: str = "AZURE_OPENAI_API_KEY", + model_api_versions: Optional[Dict[str, str]] = None, + ): + """It is recommended to set the AZURE_OPENAI_API_KEY environment variable instead of passing it as an argument. + + Args: + api_key (Optional[str], optional): Azure OpenAI API key. Defaults to None. + api_version (str, optional): Azure OpenAI API version. Defaults to "2024-02-01". + base_url (str): The API base URL to use when initializing the client. + env_api_key_name (str): The environment variable name for the API key. Defaults to `"AZURE_OPENAI_API_KEY"`. + env_base_url_name (str): The environment variable name for the base URL. Defaults to `"AZURE_OPENAI_API_BASE"`. + """ + super().__init__() + self._api_key = api_key + self._api_version = api_version + self._env_api_key_name = env_api_key_name + self._env_base_url_name = env_base_url_name + self.base_url = base_url or os.getenv(self._env_base_url_name) + + # Store model-specific API versions + self._model_api_versions = model_api_versions or {} + + self.sync_client = self.init_sync_client() + self.async_client = None # only initialize if the async call is called + self.chat_completion_parser = ( + chat_completion_parser or get_first_message_content + ) + self._input_type = input_type + self._api_kwargs = {} # add api kwargs when the Azure OpenAI Client is called + + def init_sync_client(self): + """Initialize the synchronous Azure OpenAI client.""" + api_key = self._api_key or os.getenv(self._env_api_key_name) + if not api_key: + raise ValueError( + f"API key must be provided either as an argument or as an environment variable {self._env_api_key_name}" + ) + if not self.base_url: + raise ValueError( + f"Base URL must be provided either as an argument or as an environment variable {self._env_base_url_name}" + ) + + # Use the Azure OpenAI client format compatible with the installed version + return AzureOpenAI( + api_key=api_key, + api_version=self._api_version, + azure_endpoint=self.base_url + ) + + def init_async_client(self): + """Initialize the asynchronous Azure OpenAI client.""" + api_key = self._api_key or os.getenv(self._env_api_key_name) + if not api_key: + raise ValueError( + f"API key must be provided either as an argument or as an environment variable {self._env_api_key_name}" + ) + if not self.base_url: + raise ValueError( + f"Base URL must be provided either as an argument or as an environment variable {self._env_base_url_name}" + ) + + # Use the Azure OpenAI client format compatible with the installed version + return AsyncAzureOpenAI( + api_key=api_key, + api_version=self._api_version, + azure_endpoint=self.base_url + ) + + def parse_chat_completion( + self, + completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]], + ): + """Parse the completion, and put it into the raw_response.""" + if isinstance(completion, Generator): + # Handle streaming response + text = "" + for chunk in completion: + content = parse_stream_response(chunk) + if content is not None: + text += content + return text + else: + # Handle non-streaming response + return self.chat_completion_parser(completion) + + def track_completion_usage( + self, + completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]], + ): + """Track the completion usage.""" + if isinstance(completion, Generator): + # For streaming responses, we can't get the usage directly + # We'll estimate it based on the response + text = "" + for chunk in completion: + content = parse_stream_response(chunk) + if content is not None: + text += content + return CompletionUsage( + prompt_tokens=0, # We don't know + completion_tokens=estimate_token_count(text), + total_tokens=0, # We don't know + ) + else: + # For non-streaming responses, we can get the usage directly + return CompletionUsage( + prompt_tokens=completion.usage.prompt_tokens, + completion_tokens=completion.usage.completion_tokens, + total_tokens=completion.usage.total_tokens, + ) + + def parse_embedding_response( + self, response: CreateEmbeddingResponse + ) -> EmbedderOutput: + """Parse the embedding response to a structure Adalflow components can understand. + + Should be called in ``Embedder``. + """ + return parse_embedding_response(response) + + def convert_inputs_to_api_kwargs( + self, + input: Optional[Any] = None, + model_kwargs: Dict = {}, + model_type: ModelType = ModelType.UNDEFINED, + ) -> Dict: + """ + Specify the API input type and output api_kwargs that will be used in _call and _acall methods. + Convert the Component's standard input, and system_input(chat model) and model_kwargs into API-specific format. + For multimodal inputs, images can be provided in model_kwargs["images"] as a string path, URL, or list of them. + The model specified in model_kwargs["model"] must support multimodal capabilities when using images. + + Args: + input: The input text or messages to process + model_kwargs: Additional parameters including: + - images: Optional image source(s) as path, URL, or list of them + - detail: Image detail level ('auto', 'low', or 'high'), defaults to 'auto' + - model: The model to use (must support multimodal inputs if images are provided) + model_type: The type of model (EMBEDDER or LLM) + + Returns: + Dict: API-specific kwargs for the model call + """ + api_kwargs = model_kwargs.copy() + + # Handle different model types + if model_type == ModelType.EMBEDDER: + if isinstance(input, list): + api_kwargs["input"] = input + else: + api_kwargs["input"] = [input] + + # Azure OpenAI requires a deployment_id instead of model + if "model" in api_kwargs: + api_kwargs["deployment_id"] = api_kwargs.pop("model") + + return api_kwargs + + elif model_type == ModelType.LLM: + # Azure OpenAI requires a deployment_id instead of model + if "model" in api_kwargs: + api_kwargs["deployment_id"] = api_kwargs.pop("model") + + # Handle multimodal inputs (images) + images = api_kwargs.pop("images", None) + detail = api_kwargs.pop("detail", "auto") + + if self._input_type == "text" and input is not None: + # Convert text input to messages format + if images: + # For multimodal, we need to format with content list + content = [{"type": "text", "text": input}] + + # Process images + if isinstance(images, str): + images = [images] + + for img in images: + img_content = self._prepare_image_content(img, detail) + content.append(img_content) + + api_kwargs["messages"] = [{"role": "user", "content": content}] + else: + # For text-only, we can use simple format + api_kwargs["messages"] = [{"role": "user", "content": input}] + + elif self._input_type == "messages" and input is not None: + # Input is already in messages format + api_kwargs["messages"] = input + + return api_kwargs + + # For other model types, just pass through the kwargs + return api_kwargs + + def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED): + """ + kwargs is the combined input and model_kwargs. Support streaming call. + """ + self._api_kwargs = api_kwargs.copy() + + # Check if we need to use a model-specific API version + deployment_id = api_kwargs.get("deployment_id") + if deployment_id and deployment_id in self._model_api_versions: + # Create a new client with the model-specific API version + api_key = self._api_key or os.getenv(self._env_api_key_name) + model_specific_client = AzureOpenAI( + api_key=api_key, + api_version=self._model_api_versions[deployment_id], + azure_endpoint=self.base_url, + ) + log.info(f"Using model-specific API version {self._model_api_versions[deployment_id]} for model {deployment_id}") + client = model_specific_client + else: + # Use the default client + client = self.sync_client + + # Handle different model types + if model_type == ModelType.EMBEDDER: + # Prepare the embeddings API call + embedding_api_kwargs = api_kwargs.copy() + + # Handle model parameter for Azure OpenAI + # The installed version doesn't support deployment_id, so we need to use model + deployment_id = embedding_api_kwargs.pop('deployment_id', None) + if deployment_id and not embedding_api_kwargs.get('model'): + # Use deployment_id as the model name + embedding_api_kwargs['model'] = deployment_id + log.info(f"Using deployment_id {deployment_id} as model for embeddings") + + # Ensure we have a model parameter + model = embedding_api_kwargs.get('model') + if not model: + model = "text-embedding-ada-002" # Default model + embedding_api_kwargs['model'] = model + log.info(f"Using default model {model} for embeddings") + + # Remove dimensions parameter if it exists - not supported by text-embedding-ada-002 + if 'dimensions' in embedding_api_kwargs: + dimensions = embedding_api_kwargs.pop('dimensions') + log.info(f"Removed dimensions parameter ({dimensions}) as it's not supported by {model}") + + # Ensure we're using the correct API version for text-embedding-3 models + if model and ('text-embedding-3' in model): + # Create a client with the correct API version for text-embedding-3 models + api_key = self._api_key or os.getenv(self._env_api_key_name) + embedding_client = AzureOpenAI( + api_key=api_key, + api_version="2024-02-01", # API version compatible with text-embedding-3 models + azure_endpoint=self.base_url + ) + log.info(f"Using API version 2024-02-01 for {model} embeddings") + # Call the embeddings API with the specialized client + response = embedding_client.embeddings.create(**embedding_api_kwargs) + else: + # Use the standard client for other embedding models + response = client.embeddings.create(**embedding_api_kwargs) + + return self.parse_embedding_response(response) + + elif model_type == ModelType.LLM: + # Handle streaming if requested + stream = api_kwargs.pop("stream", False) + + if stream: + # Handle streaming response + response = client.chat.completions.create( + **api_kwargs, stream=True + ) + return self.parse_chat_completion(response) + else: + # Handle non-streaming response + response = client.chat.completions.create(**api_kwargs) + return self.parse_chat_completion(response) + + # For other model types, raise an error + raise ValueError(f"Unsupported model type: {model_type}") + + async def acall( + self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED + ): + """ + kwargs is the combined input and model_kwargs + """ + if self.async_client is None: + self.async_client = self.init_async_client() + + self._api_kwargs = api_kwargs.copy() + + # Check if we need to use a model-specific API version + deployment_id = api_kwargs.get("deployment_id") + if deployment_id and deployment_id in self._model_api_versions: + # Create a new client with the model-specific API version + api_key = self._api_key or os.getenv(self._env_api_key_name) + model_specific_client = AsyncAzureOpenAI( + api_key=api_key, + api_version=self._model_api_versions[deployment_id], + azure_endpoint=self.base_url, + ) + log.info(f"Using model-specific API version {self._model_api_versions[deployment_id]} for model {deployment_id}") + client = model_specific_client + else: + # Use the default client + client = self.async_client + + # Handle different model types + if model_type == ModelType.EMBEDDER: + # Prepare the embeddings API call + embedding_api_kwargs = api_kwargs.copy() + + # Handle model parameter for Azure OpenAI + # The installed version doesn't support deployment_id, so we need to use model + deployment_id = embedding_api_kwargs.pop('deployment_id', None) + if deployment_id and not embedding_api_kwargs.get('model'): + # Use deployment_id as the model name + embedding_api_kwargs['model'] = deployment_id + log.info(f"Using deployment_id {deployment_id} as model for embeddings") + + # Ensure we have a model parameter + model = embedding_api_kwargs.get('model') + if not model: + model = "text-embedding-ada-002" # Default model + embedding_api_kwargs['model'] = model + log.info(f"Using default model {model} for embeddings") + + # Remove dimensions parameter if it exists - not supported by text-embedding-ada-002 + if 'dimensions' in embedding_api_kwargs: + dimensions = embedding_api_kwargs.pop('dimensions') + log.info(f"Removed dimensions parameter ({dimensions}) as it's not supported by {model}") + + # Ensure we're using the correct API version for text-embedding-3 models + if model and ('text-embedding-3' in model): + # Create a client with the correct API version for text-embedding-3 models + api_key = self._api_key or os.getenv(self._env_api_key_name) + embedding_client = AsyncAzureOpenAI( + api_key=api_key, + api_version="2024-02-01", # API version compatible with text-embedding-3 models + azure_endpoint=self.base_url + ) + log.info(f"Using API version 2024-02-01 for {model} embeddings") + # Call the embeddings API with the specialized client + response = await embedding_client.embeddings.create(**embedding_api_kwargs) + else: + # Use the standard client for other embedding models + response = await client.embeddings.create(**embedding_api_kwargs) + + return self.parse_embedding_response(response) + + elif model_type == ModelType.LLM: + # Handle streaming if requested + stream = api_kwargs.pop("stream", False) + + if stream: + # Handle streaming response + response = await client.chat.completions.create( + **api_kwargs, stream=True + ) + # For streaming, return the raw response so it can be iterated over + # This allows the caller to handle the streaming directly + log.info("Returning raw streaming response") + return response + else: + # Handle non-streaming response + response = await client.chat.completions.create(**api_kwargs) + return self.parse_chat_completion(response) + + # For other model types, raise an error + raise ValueError(f"Unsupported model type: {model_type}") + + @classmethod + def from_dict(cls: type[T], data: Dict[str, Any]) -> T: + """Create a client from a dictionary.""" + return cls(**data) + + def to_dict(self) -> Dict[str, Any]: + """Convert the component to a dictionary.""" + return { + "api_key": self._api_key, + "api_version": self._api_version, + "base_url": self.base_url, + "env_base_url_name": self._env_base_url_name, + "env_api_key_name": self._env_api_key_name, + "input_type": self._input_type, + "model_api_versions": self._model_api_versions, + } + + def _encode_image(self, image_path: str) -> str: + """Encode image to base64 string. + + Args: + image_path: Path to image file. + + Returns: + Base64 encoded image string. + + Raises: + ValueError: If the file cannot be read or doesn't exist. + """ + try: + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + except Exception as e: + raise ValueError(f"Error encoding image: {str(e)}") + + def _prepare_image_content( + self, image_source: Union[str, Dict[str, Any]], detail: str = "auto" + ) -> Dict[str, Any]: + """Prepare image content for API request. + + Args: + image_source: Either a path to local image or a URL. + detail: Image detail level ('auto', 'low', or 'high'). + + Returns: + Formatted image content for API request. + """ + # If image_source is already a formatted dictionary, return it + if isinstance(image_source, dict) and "type" in image_source: + return image_source + + # Check if the source is a URL or a local file path + is_url = image_source.startswith(("http://", "https://")) + + # Format the image content + if is_url: + return { + "type": "image_url", + "image_url": { + "url": image_source, + "detail": detail + } + } + else: + # Local file path, encode it to base64 + return { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{self._encode_image(image_source)}", + "detail": detail + } + } diff --git a/api/azuredevops_client.py b/api/azuredevops_client.py new file mode 100644 index 0000000..a2c4d67 --- /dev/null +++ b/api/azuredevops_client.py @@ -0,0 +1,279 @@ +import os +import logging +import json +import base64 +import subprocess +from urllib.parse import urlparse, urlunparse, quote + +# Configure logging +logger = logging.getLogger(__name__) + +def get_azuredevops_file_content(repo_url: str, file_path: str, access_token: str = None) -> str: + """ + Retrieves the content of a file from an Azure DevOps repository using the Azure DevOps REST API. + + Args: + repo_url (str): The URL of the Azure DevOps repository + (e.g., "https://dev.azure.com/organization/project/_git/repo") + file_path (str): The path to the file within the repository (e.g., "src/main.py") + access_token (str, optional): Personal access token for Azure DevOps + + Returns: + str: The content of the file as a string + + Raises: + ValueError: If the file cannot be fetched or if the URL is not a valid Azure DevOps URL + """ + logger.info(f"Fetching file content from Azure DevOps: {repo_url}, file: {file_path}") + + try: + # Extract organization, project, and repo name from Azure DevOps URL + if not (repo_url.startswith("https://dev.azure.com/") or repo_url.startswith("http://dev.azure.com/")): + logger.error(f"Invalid Azure DevOps URL format: {repo_url}") + raise ValueError(f"Not a valid Azure DevOps repository URL: {repo_url}") + + # Log the original URL for debugging + logger.info(f"Processing Azure DevOps URL: {repo_url}") + + # Parse the URL using urlparse to handle URL encoding properly + parsed_url = urlparse(repo_url) + logger.info(f"Parsed URL - scheme: {parsed_url.scheme}, netloc: {parsed_url.netloc}, path: {parsed_url.path}") + + path_parts = parsed_url.path.strip('/').split('/') + logger.info(f"Path parts: {path_parts}") + + # Find the organization (first part of the path) + if not path_parts or len(path_parts) < 1: + logger.error("Organization not found in URL path parts") + raise ValueError("Organization not found in URL") + organization = path_parts[0] + logger.info(f"Extracted organization: {organization}") + + # Find the _git part to locate the repository name + try: + git_index = path_parts.index('_git') + logger.info(f"Found _git at index {git_index}") + except ValueError: + logger.error("Could not find '_git' in the URL path") + raise ValueError("Could not find '_git' in the URL path") + + # The repository is the part after _git + if git_index + 1 >= len(path_parts): + logger.error("Repository name not found in URL (no part after _git)") + raise ValueError("Repository name not found in URL") + repository = path_parts[git_index + 1] + logger.info(f"Extracted repository: {repository}") + + # The project is everything between the organization and _git + # For projects with spaces, this will be properly encoded in the URL + if git_index <= 1: + logger.error("Project name not found in URL (git_index <= 1)") + raise ValueError("Project name not found in URL") + + # Use the project name as it appears in the URL (might contain URL encoding) + project = path_parts[1] + logger.info(f"Extracted project name: {project}") + + # For URLs with spaces in project names, we need to preserve the URL encoding + # Use the original parsed path to construct the API URL + project_path = parsed_url.path.split('/_git/')[0] + logger.info(f"Project path from URL: {project_path}") + + organization_path = f"/{organization}" + logger.info(f"Organization path: {organization_path}") + + project_relative_path = project_path[len(organization_path):].lstrip('/') + logger.info(f"Project relative path: {project_relative_path}") + + # Use Azure DevOps REST API to get file content + # The API endpoint for getting file content is: + # https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repository}/items?path={path}&api-version=7.1 + + # Encode the file path properly for the URL + encoded_file_path = quote(file_path) + logger.info(f"Encoded file path: {encoded_file_path}") + + # Construct the API URL with detailed logging - properly handle spaces in project path + # We need to re-encode the project_relative_path for the API URL while preserving the spaces + # This is tricky because we need to encode spaces as %20 but not re-encode already encoded characters + from urllib.parse import quote + + # First, ensure project_relative_path has spaces (not %20) + if '%20' in project_relative_path: + project_relative_path = project_relative_path.replace('%20', ' ') + logger.info(f"Normalized project path: {project_relative_path}") + + # Then encode it properly for the URL + encoded_project_path = quote(project_relative_path) + logger.info(f"Encoded project path: {encoded_project_path}") + + # Construct the final API URL + api_url = f"https://dev.azure.com/{organization}/{encoded_project_path}/_apis/git/repositories/{repository}/items?path={encoded_file_path}&api-version=7.1&includeContent=true" + logger.info(f"Constructed API URL: {api_url}") + + # Add verbose curl output for debugging + curl_cmd = ["curl", "-v", "-s"] + logger.info("Using verbose curl for detailed request/response information") + + # Prepare curl command with authentication if token is provided + if access_token: + # Azure DevOps uses Basic Auth with PAT as the password and empty username + auth_string = f":{access_token}" + encoded_auth = base64.b64encode(auth_string.encode()).decode() + curl_cmd.extend(["-H", f"Authorization: Basic {encoded_auth}"]) + logger.info("Added authentication header to request") + else: + logger.warning("No access token provided for Azure DevOps API request") + + curl_cmd.append(api_url) + + logger.info(f"Executing curl command to fetch file content from Azure DevOps API") + logger.info(f"Full API URL: {api_url}") + + # Execute the curl command with detailed output + result = subprocess.run( + curl_cmd, + check=False, # Don't raise exception on non-zero exit code, we'll handle errors manually + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + # Log the curl command exit code and stderr for debugging + logger.info(f"Curl command exit code: {result.returncode}") + if result.stderr: + logger.info(f"Curl stderr output: {result.stderr}") + + # For Azure DevOps, the API returns the raw file content directly (not base64 encoded) + content = result.stdout + + # Check if we got an error response (usually in JSON format) + if content.startswith('{'): + try: + error_data = json.loads(content) + logger.info(f"Received JSON response: {json.dumps(error_data, indent=2)}") + + if "message" in error_data: + error_message = error_data['message'] + logger.error(f"Azure DevOps API error message: {error_message}") + raise ValueError(f"Azure DevOps API error: {error_message}") + + if "value" in error_data and isinstance(error_data["value"], dict) and "content" in error_data["value"]: + # This is a successful response with content in the value field + logger.info("Successfully retrieved file content in JSON format") + return error_data["value"]["content"] + except json.JSONDecodeError as e: + # If it's not valid JSON but starts with '{', it might still be file content + logger.warning(f"Response starts with '{{' but is not valid JSON: {e}") + pass + + # Check for empty content + if not content.strip(): + logger.error("Received empty response from Azure DevOps API") + raise ValueError("Received empty response from Azure DevOps API") + + # If we get here, assume the content is the raw file content + logger.info(f"Successfully retrieved file content, size: {len(content)} bytes") + return content + + except subprocess.CalledProcessError as e: + error_msg = e.stderr + # Sanitize error message to remove any tokens + if access_token and access_token in error_msg: + error_msg = error_msg.replace(access_token, "[REDACTED]") + + logger.error(f"Subprocess error: {e.returncode}, Error message: {error_msg}") + raise ValueError(f"Error fetching file content from Azure DevOps: {error_msg}") + + except Exception as e: + logger.error(f"Unexpected error in get_azuredevops_file_content: {str(e)}") + raise ValueError(f"Unexpected error accessing Azure DevOps: {str(e)}") + +def clone_azuredevops_repo(repo_url: str, local_path: str, access_token: str = None) -> str: + """ + Clones an Azure DevOps repository to a local path. + Handles repositories with spaces in project names. + + Args: + repo_url (str): The URL of the Azure DevOps repository + local_path (str): The local directory where the repository will be cloned + access_token (str, optional): Personal access token for Azure DevOps + + Returns: + str: The output message from the git command + """ + try: + # Check if Git is installed + logger.info(f"Preparing to clone Azure DevOps repository to {local_path}") + subprocess.run( + ["git", "--version"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + # Check if repository already exists + if os.path.exists(local_path) and os.listdir(local_path): + # Directory exists and is not empty + logger.warning(f"Repository already exists at {local_path}. Using existing repository.") + return f"Using existing repository at {local_path}" + + # Ensure the local path exists + os.makedirs(local_path, exist_ok=True) + + # Prepare the clone URL with access token if provided + clone_url = repo_url + + # Handle spaces in project names for Azure DevOps URLs + if " " in repo_url or "%20" in repo_url: + logger.info("Azure DevOps URL contains spaces or encoded spaces, handling specially") + parsed = urlparse(repo_url) + + # Extract components + path = parsed.path + + # Handle spaces in path + if " " in path or "%20" in path: + # Normalize path to have spaces (not %20) + if "%20" in path: + path = path.replace("%20", " ") + + # Then encode it properly for git + from urllib.parse import quote + encoded_path = quote(path) + logger.info(f"Original path: {path}") + logger.info(f"Encoded path for git: {encoded_path}") + + # Reconstruct the URL + clone_url = f"{parsed.scheme}://{parsed.netloc}{encoded_path}" + logger.info(f"Reconstructed URL for git: {clone_url}") + + # Add authentication if token is provided + if access_token: + parsed = urlparse(clone_url) + # Format: https://{username}:{token}@dev.azure.com/... + # For Azure DevOps, we use an empty username with the PAT as the password + clone_url = urlunparse((parsed.scheme, f":{access_token}@{parsed.netloc}", parsed.path, '', '', '')) + logger.info("Using access token for authentication") + + # Clone the repository + logger.info(f"Cloning repository from {repo_url} to {local_path}") + # We use repo_url in the log to avoid exposing the token in logs + result = subprocess.run( + ["git", "clone", clone_url, local_path], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + logger.info("Repository cloned successfully") + return result.stdout.decode("utf-8") + + except subprocess.CalledProcessError as e: + error_msg = e.stderr.decode('utf-8') + # Sanitize error message to remove any tokens + if access_token and access_token in error_msg: + error_msg = error_msg.replace(access_token, "***TOKEN***") + raise ValueError(f"Error during cloning: {error_msg}") + except Exception as e: + raise ValueError(f"An unexpected error occurred: {str(e)}") diff --git a/api/config.py b/api/config.py index 424e7fd..cdae703 100644 --- a/api/config.py +++ b/api/config.py @@ -9,6 +9,7 @@ from api.openai_client import OpenAIClient from api.openrouter_client import OpenRouterClient +from api.azure_openai_client import AzureOpenAIClient from api.bedrock_client import BedrockClient from adalflow import GoogleGenAIClient, OllamaClient @@ -16,6 +17,8 @@ OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY') OPENROUTER_API_KEY = os.environ.get('OPENROUTER_API_KEY') +AZURE_OPENAI_API_KEY = os.environ.get('AZURE_OPENAI_API_KEY') +AZURE_OPENAI_ENDPOINT = os.environ.get('AZURE_OPENAI_ENDPOINT') or os.environ.get('AZURE_OPENAI_API_BASE') AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID') AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY') AWS_REGION = os.environ.get('AWS_REGION') @@ -28,6 +31,14 @@ os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY if OPENROUTER_API_KEY: os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY +if AZURE_OPENAI_API_KEY: + os.environ["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_API_KEY + +# Support both old and new Azure OpenAI endpoint environment variables +if AZURE_OPENAI_ENDPOINT: + os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_OPENAI_ENDPOINT + # Also set the old variable for backward compatibility + os.environ["AZURE_OPENAI_API_BASE"] = AZURE_OPENAI_ENDPOINT if AWS_ACCESS_KEY_ID: os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID if AWS_SECRET_ACCESS_KEY: @@ -51,6 +62,7 @@ "OpenAIClient": OpenAIClient, "OpenRouterClient": OpenRouterClient, "OllamaClient": OllamaClient, + "AzureOpenAIClient": AzureOpenAIClient, "BedrockClient": BedrockClient } @@ -136,6 +148,17 @@ def load_generator_config(): # Load embedder configuration def load_embedder_config(): embedder_config = load_json_config("embedder.json") + + # Debug logging to see what's loaded + logger.info(f"Loaded embedder config: {embedder_config}") + if "retriever" in embedder_config: + logger.info(f"Retriever config found: {embedder_config['retriever']}") + if "top_k" in embedder_config["retriever"]: + logger.info(f"top_k value found: {embedder_config['retriever']['top_k']}") + else: + logger.warning("top_k not found in retriever config") + else: + logger.warning("retriever key not found in embedder config") # Process client classes for key in ["embedder", "embedder_ollama"]: @@ -266,6 +289,12 @@ def load_lang_config(): if key in embedder_config: configs[key] = embedder_config[key] +# Ensure retriever configuration has a top_k value +if "retriever" not in configs: + configs["retriever"] = {} +if "top_k" not in configs.get("retriever", {}): + configs["retriever"]["top_k"] = 20 # Default value + # Update repository configuration if repo_config: for key in ["file_filters", "repository"]: @@ -326,6 +355,14 @@ def get_model_config(provider="google", model=None): result["model_kwargs"] = {"model": model, **model_params["options"]} else: result["model_kwargs"] = {"model": model} + elif provider == "azure": + # Azure OpenAI uses deployment_id instead of model + # The model name is used as the deployment_id + result["model_kwargs"] = {"model": model, **model_params} + + # Pass model-specific API versions if available + if "model_api_versions" in provider_config: + result["model_client_kwargs"] = {"model_api_versions": provider_config["model_api_versions"]} else: # Standard structure for other providers result["model_kwargs"] = {"model": model, **model_params} diff --git a/api/config/azure_example.json b/api/config/azure_example.json new file mode 100644 index 0000000..e126aa9 --- /dev/null +++ b/api/config/azure_example.json @@ -0,0 +1,13 @@ +{ + "embedder_azure": { + "client_class": "AzureOpenAIClient", + "batch_size": 500, + "model_kwargs": { + "model": "text-embedding-ada-002", + "encoding_format": "float", + "azure_endpoint": "https://your-azure-endpoint.openai.azure.com", + "api_key": "your-azure-api-key", + "api_version": "2023-05-15" + } + } +} diff --git a/api/config/generator.json b/api/config/generator.json index 4306c7b..2e8ae2c 100644 --- a/api/config/generator.json +++ b/api/config/generator.json @@ -1,6 +1,32 @@ { "default_provider": "google", "providers": { + "azure": { + "default_model": "gpt-4o", + "client_class": "AzureOpenAIClient", + "supportsCustomModel": true, + "model_api_versions": { + "gpt-4.1": "2025-01-01-preview" + }, + "models": { + "gpt-4o": { + "temperature": 0.7, + "top_p": 0.8 + }, + "gpt-4": { + "temperature": 0.7, + "top_p": 0.8 + }, + "gpt-35-turbo": { + "temperature": 0.7, + "top_p": 0.8 + }, + "gpt-4.1": { + "temperature": 0.7, + "top_p": 0.8 + } + } + }, "google": { "default_model": "gemini-2.0-flash", "supportsCustomModel": true, @@ -145,4 +171,4 @@ } } } -} \ No newline at end of file +} diff --git a/api/data_pipeline.py b/api/data_pipeline.py index e863453..087252a 100644 --- a/api/data_pipeline.py +++ b/api/data_pipeline.py @@ -9,19 +9,68 @@ import base64 import re import glob +import xattr from adalflow.utils import get_adalflow_default_root_path from adalflow.core.db import LocalDB from api.config import configs, DEFAULT_EXCLUDED_DIRS, DEFAULT_EXCLUDED_FILES from api.ollama_patch import OllamaDocumentProcessor from urllib.parse import urlparse, urlunparse, quote +import hashlib +from typing import Dict, Any import requests from requests.exceptions import RequestException from api.tools.embedder import get_embedder # Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) logger = logging.getLogger(__name__) +def get_file_attributes(file_path: str) -> Dict[str, Any]: + """ + Extracts extended file attributes (xattr) from a local file. + + Args: + file_path (str): Path to the file to extract attributes from + + Returns: + Dict[str, Any]: Dictionary of attribute names and their values + """ + attributes = {} + try: + # Get list of all attribute names for the file + attr_names = xattr.listxattr(file_path) + + # For each attribute, get its value and add to the dictionary + for attr_name in attr_names: + try: + # Get the attribute value + attr_value = xattr.getxattr(file_path, attr_name) + + # Try to decode as UTF-8 string if possible + try: + attr_value = attr_value.decode('utf-8') + except (UnicodeDecodeError, AttributeError): + # If not decodable as UTF-8, use as is + pass + + # Add to attributes dictionary + attributes[attr_name] = attr_value + except (OSError, IOError) as e: + logger.warning(f"Error reading attribute {attr_name} from {file_path}: {e}") + + logger.debug(f"Extracted {len(attributes)} attributes from {file_path}") + except (OSError, IOError) as e: + logger.warning(f"Error listing attributes for {file_path}: {e}") + + return attributes + +# No need for a separate get_repo_structure function +# We're using the existing get_local_repo_structure function in api.py + # Maximum token limit for OpenAI embedding models MAX_EMBEDDING_TOKENS = 8192 @@ -55,36 +104,35 @@ def count_tokens(text: str, is_ollama_embedder: bool = None) -> int: # Rough approximation: 4 characters per token return len(text) // 4 -def download_repo(repo_url: str, local_path: str, type: str = "github", access_token: str = None) -> str: +def download_repo(repo_url: str, type: str = "github", access_token: str = None) -> str: """ - Downloads a Git repository (GitHub, GitLab, or Bitbucket) to a specified local path. + Downloads a repository to a local directory. Args: - repo_url (str): The URL of the Git repository to clone. - local_path (str): The local directory where the repository will be cloned. - access_token (str, optional): Access token for private repositories. + repo_url (str): URL of the repository to download + type (str): Type of repository (github, gitlab, bitbucket, azure) + access_token (str, optional): Personal access token for private repositories Returns: - str: The output message from the `git` command. + str: Path to the local directory containing the repository """ try: - # Check if Git is installed - logger.info(f"Preparing to clone repository to {local_path}") - subprocess.run( - ["git", "--version"], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - - # Check if repository already exists - if os.path.exists(local_path) and os.listdir(local_path): - # Directory exists and is not empty - logger.warning(f"Repository already exists at {local_path}. Using existing repository.") - return f"Using existing repository at {local_path}" - - # Ensure the local path exists + # Log the repository URL and type for debugging + logger.info(f"Downloading repository from {repo_url} of type {type}") + + # Create a unique directory name based on the repo URL + repo_hash = hashlib.md5(repo_url.encode()).hexdigest() + local_path = os.path.join(get_adalflow_default_root_path(), "repos", repo_hash) + logger.info(f"Generated local path: {local_path} (hash: {repo_hash})") + + # Check if the repository already exists locally + if os.path.exists(local_path): + logger.info(f"Repository already exists at {local_path}") + return local_path + + # Create the directory if it doesn't exist os.makedirs(local_path, exist_ok=True) + logger.info(f"Created directory for repository at {local_path}") # Prepare the clone URL with access token if provided clone_url = repo_url @@ -100,6 +148,22 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t elif type == "bitbucket": # Format: https://{token}@bitbucket.org/owner/repo.git clone_url = urlunparse((parsed.scheme, f"{access_token}@{parsed.netloc}", parsed.path, '', '', '')) + elif type == "azure": + # Format for Azure DevOps: https://organization@dev.azure.com/organization/project/_git/repo.git + # For Azure DevOps, we need to ensure the URL is in the correct format for git clone + # Extract the organization name from the path + path_parts = parsed.path.strip('/').split('/') + organization = path_parts[0] if path_parts else "" + + # The path should end with .git + path = parsed.path + + # Use the organization name as the username with the PAT as password + clone_url = urlunparse((parsed.scheme, f"{organization}:{access_token}@{parsed.netloc}", path, '', '', '')) + + # Log the URL format (without exposing the token) + sanitized_url = urlunparse((parsed.scheme, f"{organization}:***TOKEN***@{parsed.netloc}", path, '', '', '')) + logger.info(f"Azure DevOps clone URL format (sanitized): {sanitized_url}") logger.info("Using access token for authentication") # Clone the repository @@ -113,7 +177,8 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t ) logger.info("Repository cloned successfully") - return result.stdout.decode("utf-8") + # Return the local path instead of the command output + return local_path except subprocess.CalledProcessError as e: error_msg = e.stderr.decode('utf-8') @@ -297,17 +362,28 @@ def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List if token_count > MAX_EMBEDDING_TOKENS * 10: logger.warning(f"Skipping large file {relative_path}: Token count ({token_count}) exceeds limit") continue + + # Extract file attributes + file_attrs = get_file_attributes(file_path) + + # Prepare metadata + metadata = { + "file_path": relative_path, + "type": ext[1:], + "is_code": True, + "is_implementation": is_implementation, + "title": relative_path, + "token_count": token_count, + } + + # Add file attributes to metadata + if file_attrs: + metadata["file_attributes"] = file_attrs + logger.info(f"Added {len(file_attrs)} file attributes to {relative_path}") doc = Document( text=content, - meta_data={ - "file_path": relative_path, - "type": ext[1:], - "is_code": True, - "is_implementation": is_implementation, - "title": relative_path, - "token_count": token_count, - }, + meta_data=metadata, ) documents.append(doc) except Exception as e: @@ -331,17 +407,28 @@ def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List if token_count > MAX_EMBEDDING_TOKENS: logger.warning(f"Skipping large file {relative_path}: Token count ({token_count}) exceeds limit") continue + + # Extract file attributes + file_attrs = get_file_attributes(file_path) + + # Prepare metadata + metadata = { + "file_path": relative_path, + "type": ext[1:], + "is_code": False, + "is_implementation": False, + "title": relative_path, + "token_count": token_count, + } + + # Add file attributes to metadata + if file_attrs: + metadata["file_attributes"] = file_attrs + logger.info(f"Added {len(file_attrs)} file attributes to {relative_path}") doc = Document( text=content, - meta_data={ - "file_path": relative_path, - "type": ext[1:], - "is_code": False, - "is_implementation": False, - "title": relative_path, - "token_count": token_count, - }, + meta_data=metadata, ) documents.append(doc) except Exception as e: @@ -597,13 +684,14 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str raise ValueError(f"Failed to get file content: {str(e)}") -def get_file_content(repo_url: str, file_path: str, type: str = "github", access_token: str = None) -> str: +def get_file_content(repo_url: str, file_path: str, type: str = "github", access_token: str = None): """ - Retrieves the content of a file from a Git repository (GitHub or GitLab). + Retrieves the content of a file from a Git repository (GitHub, GitLab, Bitbucket, or Azure DevOps). Args: repo_url (str): The URL of the repository file_path (str): The path to the file within the repository + type (str): The type of repository (github, gitlab, bitbucket, azure) access_token (str, optional): Access token for private repositories Returns: @@ -612,14 +700,58 @@ def get_file_content(repo_url: str, file_path: str, type: str = "github", access Raises: ValueError: If the file cannot be fetched or if the URL is not valid """ - if type == "github": - return get_github_file_content(repo_url, file_path, access_token) - elif type == "gitlab": - return get_gitlab_file_content(repo_url, file_path, access_token) - elif type == "bitbucket": - return get_bitbucket_file_content(repo_url, file_path, access_token) - else: - raise ValueError("Unsupported repository URL. Only GitHub and GitLab are supported.") + try: + if type == "github": + return get_github_file_content(repo_url, file_path, access_token) + elif type == "gitlab": + return get_gitlab_file_content(repo_url, file_path, access_token) + elif type == "bitbucket": + return get_bitbucket_file_content(repo_url, file_path, access_token) + elif type == "azure": + from api.azuredevops_client import get_azuredevops_file_content + # For Azure DevOps, we need to decode the URL since it might contain encoded spaces + decoded_repo_url = repo_url + logger.info(f"Original Azure DevOps URL: {repo_url}") + + try: + # If the URL is already decoded, this won't change it + # If it's encoded (like with %20 for spaces), this will decode it + from urllib.parse import unquote + + # Check if the URL is double-encoded (contains %25 which is the encoded form of %) + if "%25" in repo_url: + # First decode to convert %25 to % + temp_url = unquote(repo_url) + logger.info(f"First decode step: {temp_url}") + + # Then decode again to convert % encoded characters + decoded_repo_url = unquote(temp_url) + logger.info(f"Double-decoded Azure DevOps URL: {decoded_repo_url}") + else: + decoded_repo_url = unquote(repo_url) + logger.info(f"Decoded Azure DevOps URL: {decoded_repo_url}") + + # Ensure the URL contains dev.azure.com + if "dev.azure.com" not in decoded_repo_url: + logger.error(f"URL does not appear to be an Azure DevOps URL: {decoded_repo_url}") + raise ValueError(f"Invalid Azure DevOps URL: {decoded_repo_url}") + + # Verify the URL format is correct for Azure DevOps + if "_git" not in decoded_repo_url: + logger.error(f"Azure DevOps URL missing '_git' segment: {decoded_repo_url}") + raise ValueError(f"Invalid Azure DevOps URL format, missing '_git' segment: {decoded_repo_url}") + except Exception as e: + logger.error(f"Failed to process Azure DevOps URL: {str(e)}") + raise ValueError(f"Failed to process Azure DevOps URL: {str(e)}") + + logger.info(f"Processing Azure DevOps repository with URL: {decoded_repo_url}") + logger.info(f"Fetching file: {file_path} from Azure DevOps repository") + return get_azuredevops_file_content(decoded_repo_url, file_path, access_token) + else: + raise ValueError(f"Unsupported repository type: {type}") + except Exception as e: + raise ValueError(f"Error getting file content: {str(e)}") + class DatabaseManager: """ @@ -697,16 +829,56 @@ def _create_repo(self, repo_url_or_path: str, repo_type: str = "github", access_ os.makedirs(root_path, exist_ok=True) # url if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"): - # Extract the repository name from the URL - repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type) - logger.info(f"Extracted repo name: {repo_name}") + # Extract repo name based on the URL format + if type == "github": + # GitHub URL format: https://github.com/owner/repo + repo_name = repo_url_or_path.split("/")[-1].replace(".git", "") + elif type == "gitlab": + # GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo + # Use the last part of the URL as the repo name + repo_name = repo_url_or_path.split("/")[-1].replace(".git", "") + elif type == "bitbucket": + # Bitbucket URL format: https://bitbucket.org/owner/repo + repo_name = repo_url_or_path.split("/")[-1].replace(".git", "") + elif type == "azure": + # Azure DevOps URL format: https://dev.azure.com/organization/project/_git/repo + # Find the part after _git/ in the URL + if "_git/" in repo_url_or_path: + repo_name = repo_url_or_path.split("_git/")[-1].replace(".git", "") + else: + # Fallback to the last part of the URL + repo_name = repo_url_or_path.split("/")[-1].replace(".git", "") + else: + # Generic handling for other Git URLs + repo_name = repo_url_or_path.split("/")[-1].replace(".git", "") save_repo_dir = os.path.join(root_path, "repos", repo_name) # Check if the repository directory already exists and is not empty if not (os.path.exists(save_repo_dir) and os.listdir(save_repo_dir)): # Only download if the repository doesn't exist or is empty - download_repo(repo_url_or_path, save_repo_dir, repo_type, access_token) + try: + # Use the download_repo function to get a local path + repo_path = download_repo(repo_url_or_path, type, access_token) + + # If the repo_path is different from save_repo_dir, copy the contents + if repo_path != save_repo_dir and os.path.exists(repo_path): + import shutil + # Create save_repo_dir if it doesn't exist + os.makedirs(save_repo_dir, exist_ok=True) + + # Copy contents from repo_path to save_repo_dir + for item in os.listdir(repo_path): + src = os.path.join(repo_path, item) + dst = os.path.join(save_repo_dir, item) + if os.path.isdir(src): + shutil.copytree(src, dst, dirs_exist_ok=True) + else: + shutil.copy2(src, dst) + logger.info(f"Copied repository from {repo_path} to {save_repo_dir}") + except Exception as e: + logger.error(f"Error downloading repository: {str(e)}") + raise ValueError(f"Failed to download repository: {str(e)}") else: logger.info(f"Repository already exists at {save_repo_dir}. Using existing repository.") else: # local path diff --git a/api/rag.py b/api/rag.py index cdb71ed..ff027e8 100644 --- a/api/rag.py +++ b/api/rag.py @@ -420,13 +420,29 @@ def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_ try: # Use the appropriate embedder for retrieval retrieve_embedder = self.query_embedder if self.is_ollama_embedder else self.embedder + + # Debug logging for configs + logger.info(f"Available configs keys: {list(configs.keys())}") + if "retriever" in configs: + logger.info(f"Retriever config: {configs['retriever']}") + if "top_k" in configs["retriever"]: + logger.info(f"Found top_k in configs: {configs['retriever']['top_k']}") + else: + logger.warning("top_k not found in retriever config") + else: + logger.warning("retriever key not found in configs") + + # Get top_k from configs or use a default value + top_k = configs.get("retriever", {}).get("top_k", 20) + logger.info(f"Using top_k value: {top_k}") + self.retriever = FAISSRetriever( - **configs["retriever"], embedder=retrieve_embedder, documents=self.transformed_docs, document_map_func=lambda doc: doc.vector, + top_k=top_k # Pass top_k as a direct parameter ) - logger.info("FAISS retriever created successfully") + logger.info(f"FAISS retriever created successfully with top_k={top_k}") except Exception as e: logger.error(f"Error creating FAISS retriever: {str(e)}") # Try to provide more specific error information @@ -462,13 +478,34 @@ def call(self, query: str, language: str = "en") -> Tuple[List]: Tuple of (RAGAnswer, retrieved_documents) """ try: + # Add debug logging + logger.info(f"Calling retriever with query: {query[:50]}...") + logger.info(f"Retriever type: {type(self.retriever).__name__}") + + # Call the retriever retrieved_documents = self.retriever(query) - - # Fill in the documents - retrieved_documents[0].documents = [ - self.transformed_docs[doc_index] - for doc_index in retrieved_documents[0].doc_indices - ] + logger.info(f"Retrieved documents type: {type(retrieved_documents)}") + + if isinstance(retrieved_documents, list) and len(retrieved_documents) > 0: + logger.info(f"First result type: {type(retrieved_documents[0]).__name__}") + logger.info(f"Doc indices available: {hasattr(retrieved_documents[0], 'doc_indices')}") + + # Fill in the documents + if hasattr(retrieved_documents[0], 'doc_indices'): + retrieved_documents[0].documents = [ + self.transformed_docs[doc_index] + for doc_index in retrieved_documents[0].doc_indices + ] + else: + logger.error("Retrieved documents don't have doc_indices attribute") + # Try to handle this case gracefully + if hasattr(retrieved_documents[0], 'documents') and not retrieved_documents[0].documents: + # If documents is empty, try to populate it with the top documents + top_k = getattr(self.retriever, 'top_k', 20) # Default to 20 if not specified + retrieved_documents[0].documents = self.transformed_docs[:top_k] + logger.info(f"Populated documents with top {top_k} documents as fallback") + else: + logger.error(f"Unexpected retriever result format: {retrieved_documents}") return retrieved_documents diff --git a/api/websocket_wiki.py b/api/websocket_wiki.py index 4b9907c..89f99b8 100644 --- a/api/websocket_wiki.py +++ b/api/websocket_wiki.py @@ -3,7 +3,7 @@ from typing import List, Optional, Dict, Any from urllib.parse import unquote -import google.generativeai as genai +# Import model clients from adalflow.components.model_client.ollama_client import OllamaClient from adalflow.core.types import ModelType from fastapi import WebSocket, WebSocketDisconnect, HTTPException @@ -13,14 +13,40 @@ from api.data_pipeline import count_tokens, get_file_content from api.openai_client import OpenAIClient from api.openrouter_client import OpenRouterClient +from api.azure_openai_client import AzureOpenAIClient from api.rag import RAG +# Optional import for Google Generative AI +try: + import google.generativeai as genai + GOOGLE_AI_AVAILABLE = True +except ImportError: + GOOGLE_AI_AVAILABLE = False + # Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +# Unified logging setup from api.logging_config import setup_logging setup_logging() logger = logging.getLogger(__name__) +# Get API keys from environment variables +google_api_key = os.environ.get('GOOGLE_API_KEY') +azure_openai_api_key = os.environ.get('AZURE_OPENAI_API_KEY') +azure_openai_endpoint = os.environ.get('AZURE_OPENAI_ENDPOINT') or os.environ.get('AZURE_OPENAI_API_BASE') + +# Check if Azure OpenAI is configured +AZURE_OPENAI_AVAILABLE = bool(azure_openai_api_key and azure_openai_endpoint) + +# Configure Google Generative AI if available +if GOOGLE_AI_AVAILABLE and google_api_key: + genai.configure(api_key=google_api_key) +else: + logger.warning("GOOGLE_API_KEY not found in environment variables") # Models for the API class ChatMessage(BaseModel): @@ -38,7 +64,7 @@ class ChatCompletionRequest(BaseModel): type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket')") # model parameters - provider: str = Field("google", description="Model provider (google, openai, openrouter, ollama)") + provider: str = Field("azure", description="Model provider (azure, openai, openrouter, ollama, google)") model: Optional[str] = Field(None, description="Model name for the specified provider") language: Optional[str] = Field("en", description="Language for content generation (e.g., 'en', 'ja', 'zh', 'es', 'kr', 'vi')") @@ -72,7 +98,13 @@ async def handle_websocket_chat(websocket: WebSocket): # Create a new RAG instance for this request try: - request_rag = RAG(provider=request.provider, model=request.model) + # Set a default provider if empty + provider = request.provider + if not provider or provider.strip() == "": + provider = "google" # Default to google if provider is empty + logger.info(f"Empty provider detected, defaulting to: {provider}") + + request_rag = RAG(provider=provider, model=request.model) # Extract custom file filter parameters if provided excluded_dirs = None @@ -192,9 +224,22 @@ async def handle_websocket_chat(websocket: WebSocket): # Try to perform RAG retrieval try: # This will use the actual RAG implementation + logger.info("About to call request_rag with query") retrieved_documents = request_rag(rag_query, language=request.language) - - if retrieved_documents and retrieved_documents[0].documents: + logger.info(f"RAG call successful, result type: {type(retrieved_documents)}") + + # Debug the retrieved documents structure + if isinstance(retrieved_documents, tuple): + logger.info(f"Retrieved documents is a tuple of length {len(retrieved_documents)}") + for i, item in enumerate(retrieved_documents): + logger.info(f"Item {i} type: {type(item).__name__}") + elif isinstance(retrieved_documents, list): + logger.info(f"Retrieved documents is a list of length {len(retrieved_documents)}") + for i, item in enumerate(retrieved_documents): + logger.info(f"Item {i} type: {type(item).__name__}") + + # Check if we have documents + if retrieved_documents and hasattr(retrieved_documents[0], 'documents'): # Format context for the prompt in a more structured way documents = retrieved_documents[0].documents logger.info(f"Retrieved {len(documents)} documents") @@ -347,7 +392,7 @@ async def handle_websocket_chat(websocket: WebSocket): You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}). You provide direct, concise, and accurate information about code repositories. You NEVER start responses with markdown headers or code fences. -IMPORTANT:You MUST respond in {language_name} language. +IMPORTANT: You MUST respond in {language_name} language. @@ -368,11 +413,12 @@ async def handle_websocket_chat(websocket: WebSocket): ``` + +- Be precise and technical when discussing code - Format your response with proper markdown including headings, lists, and code blocks WITHIN your answer - For code analysis, organize your response with clear sections - Think step by step and structure your answer logically - Start with the most relevant information that directly addresses the user's query -- Be precise and technical when discussing code - Your response language should be in the same language as the user's query @@ -464,6 +510,68 @@ async def handle_websocket_chat(websocket: WebSocket): model_kwargs=model_kwargs, model_type=ModelType.LLM ) + elif request.provider == "azure": + logger.info(f"Using Azure OpenAI protocol with model: {request.model}") + + # Check if Azure OpenAI credentials are set + if not AZURE_OPENAI_AVAILABLE: + logger.warning("Azure OpenAI credentials not found in environment variables, but continuing with request") + # We'll handle this below by falling back to other providers + + # Initialize Azure OpenAI client + model = AzureOpenAIClient() + + # Format the prompt as messages for Azure OpenAI + # First create the system message with context + system_content = system_prompt + + # Create the user message with the query + user_content = query + + # Format messages for Azure OpenAI + messages = [ + {"role": "system", "content": system_content}, + ] + + # Add conversation history if available + if conversation_history: + messages.append({"role": "user", "content": f"Previous conversation: {conversation_history}"}) + + # Add context if available + if context_text.strip(): + messages.append({"role": "user", "content": f"Context: {context_text}"}) + + # Add file content if available + if request.filePath and file_content: + messages.append({"role": "user", "content": f"File content ({request.filePath}): {file_content}"}) + + # Add the actual query + messages.append({"role": "user", "content": user_content}) + + logger.info(f"Formatted {len(messages)} messages for Azure OpenAI") + + # Set up model kwargs + model_kwargs = { + "model": request.model or "gpt-4", # Default to GPT-4 if not specified + "stream": True, + "temperature": model_config.get("temperature", 0.7), + "top_p": model_config.get("top_p", 0.8) + } + + # For Azure OpenAI, we need to ensure the api_kwargs include both 'messages' and 'model' + # The convert_inputs_to_api_kwargs method may not be handling this correctly + api_kwargs = { + "messages": messages, + "model": request.model or "gpt-4", # Ensure model is included + "stream": True, + "temperature": model_config.get("temperature", 0.7), + "top_p": model_config.get("top_p", 0.8) + } + + # Log the API kwargs for debugging + logger.info(f"Azure OpenAI API kwargs: {api_kwargs.keys()}") + + # No need to use convert_inputs_to_api_kwargs as we're manually constructing the kwargs elif request.provider == "openai": logger.info(f"Using Openai protocol with model: {request.model}") @@ -487,19 +595,107 @@ async def handle_websocket_chat(websocket: WebSocket): model_type=ModelType.LLM ) else: - # Initialize Google Generative AI model - model = genai.GenerativeModel( - model_name=model_config["model"], - generation_config={ - "temperature": model_config["temperature"], - "top_p": model_config["top_p"], - "top_k": model_config["top_k"] + # Fall back to Google Generative AI if available + if GOOGLE_AI_AVAILABLE and google_api_key: + # Initialize Google Generative AI model + logger.info("Using Google Generative AI for model generation") + # Create safe generation config with defaults + generation_config = { + "temperature": 0.7, + "top_p": 0.8, + "top_k": 40 } - ) + + # Update with available parameters from model_config + if "temperature" in model_config: + generation_config["temperature"] = model_config["temperature"] + if "top_p" in model_config: + generation_config["top_p"] = model_config["top_p"] + + # Initialize the model with the safe configuration + model = genai.GenerativeModel( + model_name=model_config["model"], + generation_config=generation_config + ) + else: + # Fall back to OpenAI if neither Azure nor Google is available + logger.info("Falling back to OpenAI for model generation") + model = OpenAIClient() + model_kwargs = { + "model": request.model or "gpt-3.5-turbo", + "stream": True, + "temperature": model_config.get("temperature", 0.7), + "top_p": model_config.get("top_p", 0.8) + } + + api_kwargs = model.convert_inputs_to_api_kwargs( + input=prompt, + model_kwargs=model_kwargs, + model_type=ModelType.LLM + ) # Process the response based on the provider try: - if request.provider == "ollama": + if request.provider == "azure": + # Get the response and handle it properly using the previously created api_kwargs + logger.info("Making Azure OpenAI API call") + response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM) + + # The response is now the raw AsyncStream object from the OpenAI library + logger.info("Processing Azure OpenAI streaming response") + + try: + # Iterate over the stream chunks + async for chunk in response: + # Log the chunk type + logger.debug(f"Received chunk type: {type(chunk).__name__}") + + # Debug the chunk structure + chunk_dict = {attr: getattr(chunk, attr) for attr in dir(chunk) if not attr.startswith('_') and not callable(getattr(chunk, attr))} + logger.debug(f"Chunk attributes: {list(chunk_dict.keys())}") + + # Skip chunks with no delta content + if not hasattr(chunk, 'choices') or not chunk.choices: + logger.debug("Skipping chunk with no choices") + continue + + # Log choices structure + logger.debug(f"Choices length: {len(chunk.choices)}") + + # Process each choice in the chunk + for i, choice in enumerate(chunk.choices): + choice_dict = {attr: getattr(choice, attr) for attr in dir(choice) if not attr.startswith('_') and not callable(getattr(choice, attr))} + logger.debug(f"Choice {i} attributes: {list(choice_dict.keys())}") + + # Extract content from delta if available + if hasattr(choice, 'delta'): + delta_dict = {attr: getattr(choice.delta, attr) for attr in dir(choice.delta) if not attr.startswith('_') and not callable(getattr(choice.delta, attr))} + logger.debug(f"Delta attributes: {list(delta_dict.keys())}") + + # Get content if available + if hasattr(choice.delta, 'content') and choice.delta.content is not None: + content = choice.delta.content + logger.debug(f"Sending content: {content[:20]}..." if len(content) > 20 else f"Sending content: {content}") + await websocket.send_text(content) + + logger.info("Azure OpenAI streaming response completed successfully") + except Exception as e: + logger.error(f"Error processing Azure OpenAI streaming response: {str(e)}") + + # Try to get the response directly if streaming failed + try: + # If response is a completed response rather than a stream + if hasattr(response, 'choices') and len(response.choices) > 0: + if hasattr(response.choices[0], 'message') and hasattr(response.choices[0].message, 'content'): + content = response.choices[0].message.content + if content: + await websocket.send_text(content) + except Exception as recovery_error: + logger.error(f"Failed to recover response content: {str(recovery_error)}") + + # Explicitly close the WebSocket connection after the response is complete + await websocket.close() + elif request.provider == "ollama": # Get the response and handle it properly using the previously created api_kwargs response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM) # Handle streaming response from Ollama @@ -588,75 +784,150 @@ async def handle_websocket_chat(websocket: WebSocket): model_kwargs=model_kwargs, model_type=ModelType.LLM ) - + # Get the response using the simplified prompt fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM) - - # Handle streaming fallback_response from Ollama + + # Handle streaming fallback_response async for chunk in fallback_response: text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None) or str(chunk) if text and not text.startswith('model=') and not text.startswith('created_at='): text = text.replace('', '').replace('', '') await websocket.send_text(text) - elif request.provider == "openrouter": + elif request.provider == "azure" and AZURE_OPENAI_AVAILABLE: + # Initialize Azure OpenAI client for fallback + logger.info("Making fallback Azure OpenAI API call") + fallback_model = AzureOpenAIClient() + + # Format the simplified prompt as messages for Azure OpenAI + fallback_messages = [ + {"role": "system", "content": system_prompt}, + ] + + # Add conversation history if available + if conversation_history: + fallback_messages.append({"role": "user", "content": f"Previous conversation: {conversation_history}"}) + + # Add file content if available + if request.filePath and file_content: + fallback_messages.append({"role": "user", "content": f"File content ({request.filePath}): {file_content}"}) + + # Add the note about answering without retrieval augmentation + fallback_messages.append({"role": "user", "content": "Answering without retrieval augmentation due to input size constraints."}) + + # Add the actual query + fallback_messages.append({"role": "user", "content": query}) + + logger.info(f"Formatted {len(fallback_messages)} fallback messages for Azure OpenAI") + + # For Azure OpenAI, we need to ensure the api_kwargs include both 'messages' and 'model' + # The convert_inputs_to_api_kwargs method may not be handling this correctly + fallback_api_kwargs = { + "messages": fallback_messages, + "model": request.model or "gpt-4", # Ensure model is included + "stream": True, + "temperature": 0.7, + "top_p": 0.8 + } + + # Log the API kwargs for debugging + logger.info(f"Azure OpenAI fallback API kwargs: {fallback_api_kwargs.keys()}") + + # Get the response using the simplified prompt + fallback_response = await fallback_model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM) + + # The response is now the raw AsyncStream object from the OpenAI library + logger.info("Processing Azure OpenAI fallback streaming response") + try: - # Create new api_kwargs with the simplified prompt - fallback_api_kwargs = model.convert_inputs_to_api_kwargs( - input=simplified_prompt, - model_kwargs=model_kwargs, - model_type=ModelType.LLM - ) - - # Get the response using the simplified prompt - logger.info("Making fallback OpenRouter API call") - fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM) - - # Handle streaming fallback_response from OpenRouter + # Iterate over the stream chunks async for chunk in fallback_response: - await websocket.send_text(chunk) - except Exception as e_fallback: - logger.error(f"Error with OpenRouter API fallback: {str(e_fallback)}") - error_msg = f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key." - await websocket.send_text(error_msg) - elif request.provider == "openai": + # Log the chunk type + logger.info(f"Received fallback chunk type: {type(chunk).__name__}") + + # Debug the chunk structure + chunk_dict = {attr: getattr(chunk, attr) for attr in dir(chunk) if not attr.startswith('_') and not callable(getattr(chunk, attr))} + logger.info(f"Fallback chunk attributes: {list(chunk_dict.keys())}") + + # Skip chunks with no delta content + if not hasattr(chunk, 'choices') or not chunk.choices: + logger.info("Skipping fallback chunk with no choices") + continue + + # Log choices structure + logger.info(f"Fallback choices length: {len(chunk.choices)}") + + # Process each choice in the chunk + for i, choice in enumerate(chunk.choices): + choice_dict = {attr: getattr(choice, attr) for attr in dir(choice) if not attr.startswith('_') and not callable(getattr(choice, attr))} + logger.info(f"Fallback choice {i} attributes: {list(choice_dict.keys())}") + + # Extract content from delta if available + if hasattr(choice, 'delta'): + delta_dict = {attr: getattr(choice.delta, attr) for attr in dir(choice.delta) if not attr.startswith('_') and not callable(getattr(choice.delta, attr))} + logger.info(f"Fallback delta attributes: {list(delta_dict.keys())}") + + # Get content if available + if hasattr(choice.delta, 'content') and choice.delta.content is not None: + content = choice.delta.content + logger.info(f"Sending fallback content: {content[:20]}..." if len(content) > 20 else f"Sending fallback content: {content}") + await websocket.send_text(content) + + logger.info("Azure OpenAI fallback streaming response completed successfully") + except Exception as e: + logger.error(f"Error processing Azure OpenAI fallback streaming response: {str(e)}") + + # Try to get the response directly if streaming failed + try: + # If response is a completed response rather than a stream + if hasattr(fallback_response, 'choices') and len(fallback_response.choices) > 0: + if hasattr(fallback_response.choices[0], 'message') and hasattr(fallback_response.choices[0].message, 'content'): + content = fallback_response.choices[0].message.content + if content: + await websocket.send_text(content) + except Exception as recovery_error: + logger.error(f"Failed to recover fallback response content: {str(recovery_error)}") + elif GOOGLE_AI_AVAILABLE and google_api_key: + # Initialize Google Generative AI model as fallback + logger.info("Making fallback Google Generative AI call") try: - # Create new api_kwargs with the simplified prompt - fallback_api_kwargs = model.convert_inputs_to_api_kwargs( - input=simplified_prompt, - model_kwargs=model_kwargs, - model_type=ModelType.LLM + # Get model config + model_config = get_model_config(request.provider, request.model) + + # Create safe generation config with defaults + generation_config = { + "temperature": 0.7, + "top_p": 0.8, + "top_k": 40 + } + + # Update with available parameters if they exist + if isinstance(model_config, dict): + if "temperature" in model_config: + generation_config["temperature"] = model_config["temperature"] + if "top_p" in model_config: + generation_config["top_p"] = model_config["top_p"] + + # Initialize the model with the safe configuration + fallback_model = genai.GenerativeModel( + model_name=model_config.get("model", "gemini-pro"), + generation_config=generation_config ) - - # Get the response using the simplified prompt - logger.info("Making fallback Openai API call") - fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM) - - # Handle streaming fallback_response from Openai - async for chunk in fallback_response: - text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk)) - await websocket.send_text(text) - except Exception as e_fallback: - logger.error(f"Error with Openai API fallback: {str(e_fallback)}") - error_msg = f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key." + + # Get streaming response using simplified prompt + fallback_response = fallback_model.generate_content(simplified_prompt, stream=True) + # Stream the fallback response + for chunk in fallback_response: + if hasattr(chunk, 'text'): + await websocket.send_text(chunk.text) + except Exception as e_google: + logger.error(f"Error with Google Generative AI fallback: {str(e_google)}") + error_msg = f"\nAll fallback options failed. Please try again with a shorter query or check your API configurations.\nLast error: {str(e_google)}" await websocket.send_text(error_msg) else: - # Initialize Google Generative AI model - model_config = get_model_config(request.provider, request.model) - fallback_model = genai.GenerativeModel( - model_name=model_config["model"], - generation_config={ - "temperature": model_config["model_kwargs"].get("temperature", 0.7), - "top_p": model_config["model_kwargs"].get("top_p", 0.8), - "top_k": model_config["model_kwargs"].get("top_k", 40) - } - ) - - # Get streaming response using simplified prompt - fallback_response = fallback_model.generate_content(simplified_prompt, stream=True) - # Stream the fallback response - for chunk in fallback_response: - if hasattr(chunk, 'text'): - await websocket.send_text(chunk.text) + # No fallback options available + error_msg = "\nNo fallback options available. Please check your API configurations and try again with a shorter query." + await websocket.send_text(error_msg) except Exception as e2: logger.error(f"Error in fallback streaming response: {str(e2)}") await websocket.send_text(f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts.") diff --git a/next.config.ts b/next.config.ts index 539857b..6d2c0fe 100644 --- a/next.config.ts +++ b/next.config.ts @@ -51,6 +51,11 @@ const nextConfig: NextConfig = { source: '/local_repo/structure', destination: `${TARGET_SERVER_BASE_URL}/local_repo/structure`, }, + { + // Azure DevOps repository path pattern + source: '/:organization/:project/:repository', + destination: `${TARGET_SERVER_BASE_URL}/:organization/:project/:repository` + }, { source: '/api/auth/status', destination: `${TARGET_SERVER_BASE_URL}/auth/status`, diff --git a/src/app/[owner]/[repo]/page.tsx b/src/app/[owner]/[repo]/page.tsx index fdc2e32..7ceb9e3 100644 --- a/src/app/[owner]/[repo]/page.tsx +++ b/src/app/[owner]/[repo]/page.tsx @@ -576,7 +576,23 @@ Remember: // Determine the wiki structure from repository data const determineWikiStructure = useCallback(async (fileTree: string, readme: string, owner: string, repo: string) => { + console.log('determineWikiStructure called with:', { + fileTreeLength: fileTree?.length || 0, + readmeLength: readme?.length || 0, + owner, + repo, + repoType: repoInfo?.type || 'unknown' + }); + + if (!fileTree) { + console.error('No file tree data provided'); + setError('No file tree data available. Please try again.'); + setIsLoading(false); + return; + } + if (!owner || !repo) { + console.error('Invalid repository information. Owner and repo name are required.'); setError('Invalid repository information. Owner and repo name are required.'); setIsLoading(false); return; @@ -587,6 +603,15 @@ Remember: console.log('Wiki structure determination already in progress, skipping duplicate call'); return; } + + // For Azure DevOps repositories, ensure we're using the correct format + let repoUrlForRequest = ''; + if (repoInfo.type === 'azure') { + repoUrlForRequest = repoInfo.repoUrl || `https://dev.azure.com/${owner}/_git/${repo}`; + console.log('Using Azure DevOps URL for request:', repoUrlForRequest); + } else { + repoUrlForRequest = getRepoUrl(repoInfo); + } try { setStructureRequestInProgress(true); @@ -602,7 +627,7 @@ Remember: type: effectiveRepoInfo.type, messages: [{ role: 'user', -content: `Analyze this GitHub repository ${owner}/${repo} and create a wiki structure for it. +content: `Analyze this ${repoInfo.type === 'azure' ? 'Azure DevOps' : repoInfo.type === 'gitlab' ? 'GitLab' : repoInfo.type === 'bitbucket' ? 'Bitbucket' : 'GitHub'} repository ${owner}/${repo} and create a wiki structure for it. 1. The complete file tree of the project: @@ -729,10 +754,12 @@ IMPORTANT: let responseText = ''; try { + console.log('Starting WebSocket connection for wiki structure generation'); // Create WebSocket URL from the server base URL const serverBaseUrl = process.env.NEXT_PUBLIC_SERVER_BASE_URL || 'http://localhost:8001'; const wsBaseUrl = serverBaseUrl.replace(/^http/, 'ws'); const wsUrl = `${wsBaseUrl}/ws/chat`; + console.log('WebSocket URL:', wsUrl); // Create a new WebSocket connection const ws = new WebSocket(wsUrl); @@ -743,6 +770,7 @@ IMPORTANT: ws.onopen = () => { console.log('WebSocket connection established for wiki structure'); // Send the request as JSON + console.log('Sending request body via WebSocket:', JSON.stringify(requestBody).substring(0, 200) + '...'); ws.send(JSON.stringify(requestBody)); resolve(); }; @@ -754,6 +782,7 @@ IMPORTANT: // If the connection doesn't open within 5 seconds, fall back to HTTP const timeout = setTimeout(() => { + console.warn('WebSocket connection timeout after 5 seconds'); reject(new Error('WebSocket connection timeout')); }, 5000); @@ -762,6 +791,7 @@ IMPORTANT: clearTimeout(timeout); console.log('WebSocket connection established for wiki structure'); // Send the request as JSON + console.log('Sending request body via WebSocket:', JSON.stringify(requestBody).substring(0, 200) + '...'); ws.send(JSON.stringify(requestBody)); resolve(); }; @@ -1221,7 +1251,48 @@ IMPORTANT: throw err; } } - else if (effectiveRepoInfo.type === 'bitbucket') { + else if (effectiveRepoInfo.type === 'azure') { + // Azure DevOps repositories use a simplified approach + try { + // Check if we have the file tree and README in the URL query parameters + const fileTreeParam = searchParams.get('file_tree'); + const readmeParam = searchParams.get('readme'); + + if (fileTreeParam && readmeParam) { + // Use the file tree and README from the URL parameters + fileTreeData = decodeURIComponent(fileTreeParam); + readmeContent = decodeURIComponent(readmeParam); + console.log('Using file tree and README from URL parameters'); + } else { + // For Azure DevOps, we need to make a request to the catch-all route + // The backend will handle cloning the repository and redirecting to a simplified URL + console.log('Fetching Azure DevOps repository structure from backend'); + + // Construct the URL with the repository information + const azureRepoUrl = repoInfo.repoUrl || ''; + + // Extract organization, project, and repository from the URL + // Format: https://dev.azure.com/{organization}/{project}/_git/{repository} + const urlParts = azureRepoUrl.split('/'); + const organization = urlParts[3]; + const project = urlParts[4]; + const repository = urlParts[urlParts.length - 1]; + + // Create the request URL using the catch-all route format + const requestUrl = `/${organization}/${project}/${repository}?type=azure&repo_url=${encodeURIComponent(azureRepoUrl)}`; + + console.log('Making request to catch-all route:', requestUrl); + + // For Azure DevOps, we'll navigate to the catch-all route + // The backend will redirect to a simplified URL with the file tree and README as query parameters + window.location.href = requestUrl; + return; // Stop execution here as we're redirecting + } + } catch (err) { + console.error('Error fetching Azure DevOps repository structure:', err); + throw err; + } + } else if (effectiveRepoInfo.type === 'bitbucket') { // Bitbucket API approach const repoPath = extractUrlPath(effectiveRepoInfo.repoUrl ?? '') ?? `${owner}/${repo}`; const encodedRepoPath = encodeURIComponent(repoPath); @@ -1301,7 +1372,18 @@ IMPORTANT: } // Now determine the wiki structure - await determineWikiStructure(fileTreeData, readmeContent, owner, repo); + console.log('Determining wiki structure with data:', { fileTreeData: fileTreeData.substring(0, 100) + '...', readmeContent: readmeContent.substring(0, 100) + '...', owner, repo }); + + // For Azure DevOps repositories, we need to handle the owner differently + // The owner should be just the organization, not the full path + if (repoInfo.type === 'azure') { + // Extract just the organization name for Azure DevOps + const azureOwner = owner.split('/')[0]; + console.log('Using modified owner for Azure DevOps:', azureOwner); + await determineWikiStructure(fileTreeData, readmeContent, azureOwner, repo); + } else { + await determineWikiStructure(fileTreeData, readmeContent, owner, repo); + } } catch (error) { console.error('Error fetching repository structure:', error); @@ -1857,6 +1939,10 @@ IMPORTANT: ) : effectiveRepoInfo.type === 'gitlab' ? ( + ) : repoInfo.type === 'azure' ? ( + + + ) : ( )} diff --git a/src/app/page.tsx b/src/app/page.tsx index 35e74ce..0300af1 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -90,7 +90,7 @@ export default function Home() { const [excludedFiles, setExcludedFiles] = useState(''); const [includedDirs, setIncludedDirs] = useState(''); const [includedFiles, setIncludedFiles] = useState(''); - const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket'>('github'); + const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket' | 'azure'>('github'); const [accessToken, setAccessToken] = useState(''); const [error, setError] = useState(null); const [isSubmitting, setIsSubmitting] = useState(false); @@ -145,6 +145,7 @@ export default function Home() { // Handle Windows absolute paths (e.g., C:\path\to\folder) const windowsPathRegex = /^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$/; const customGitRegex = /^(?:https?:\/\/)?([^\/]+)\/(.+?)\/([^\/]+)(?:\.git)?\/?$/; + const azureDevOpsRegex = /^(?:https?:\/\/)?dev\.azure\.com\/([^\/]+)\/([^\/]+)\/_git\/([^\/]+)(?:\.git)?\/?$/; if (windowsPathRegex.test(input)) { type = 'local'; @@ -159,6 +160,70 @@ export default function Home() { repo = input.split('/').filter(Boolean).pop() || 'local-repo'; owner = 'local'; } + // Handle Azure DevOps URLs + else if (input.includes('dev.azure.com') || input.includes('azure.com')) { + type = 'azure'; + console.log('Detected Azure DevOps URL:', input); + + // First, ensure the URL is properly decoded (it might be double-encoded) + let decodedInput = input; + try { + // If the URL is already decoded, this won't change it + // If it's encoded (like with %20 for spaces), this will decode it + if (input.includes('%')) { + decodedInput = decodeURIComponent(input); + console.log('Decoded Azure DevOps URL:', decodedInput); + } + } catch (error) { + console.warn('Error decoding URL:', error); + } + + try { + // Parse the URL properly to handle spaces in project names + const url = new URL(decodedInput); + console.log('Parsed URL:', url.toString()); + console.log('URL pathname:', url.pathname); + + const pathParts = url.pathname.split('/'); + console.log('Path parts:', pathParts); + + // Find the organization (first part after domain) + const organization = pathParts[1]; // First part after the initial slash + console.log('Organization:', organization); + + // Check if _git is in the path + if (url.pathname.includes('_git')) { + // Find the repository (part after _git) + const gitIndex = pathParts.indexOf('_git'); + if (gitIndex !== -1 && gitIndex + 1 < pathParts.length) { + repo = pathParts[gitIndex + 1]; + console.log('Repository:', repo); + + // For the owner, we'll use the organization and the encoded project path + // This preserves spaces and special characters in project names + const projectPath = url.pathname.split('/_git/')[0].substring(organization.length + 2); + owner = `${organization}/${projectPath}`; + console.log('Owner:', owner); + } else { + console.error('Could not find repository name after _git'); + return null; + } + } else { + console.error('URL does not contain _git segment:', url.pathname); + // Try to extract organization and project from the URL anyway + if (pathParts.length >= 3) { + owner = `${pathParts[1]}/${pathParts[2]}`; + repo = pathParts[pathParts.length - 1]; + console.log('Fallback - Owner:', owner, 'Repo:', repo); + } else { + return null; + } + } + } catch (error) { + console.error('Could not parse Azure DevOps repository from URL', error); + return null; + } + } else if (customGitRegex.test(input)) { type = 'web'; fullPath = extractUrlPath(input)?.replace(/\.git$/, ''); @@ -267,7 +332,38 @@ export default function Home() { params.append('token', accessToken); } // Always include the type parameter - params.append('type', (type == 'local' ? type : selectedPlatform) || 'github'); + // For Azure DevOps URLs, ensure we use 'azure' as the type and handle URL encoding properly + if (type === 'azure') { + params.append('type', 'azure'); + + // For Azure DevOps URLs, we need to ensure the URL is properly encoded + // This is especially important for URLs with spaces in project names + if (repositoryInput.includes('dev.azure.com')) { + console.log('Processing Azure DevOps URL for API request:', repositoryInput); + + // Ensure the URL is properly encoded for the API + // First decode it to handle any double-encoding + let normalizedUrl = repositoryInput; + try { + if (normalizedUrl.includes('%')) { + normalizedUrl = decodeURIComponent(normalizedUrl); + console.log('Decoded Azure DevOps URL:', normalizedUrl); + } + } catch (e) { + console.warn('Error decoding URL:', e); + } + + // Then encode it properly + const encodedUrl = encodeURIComponent(normalizedUrl); + console.log('Encoded Azure DevOps URL for API:', encodedUrl); + params.append('repo_url', encodedUrl); + } else { + params.append('repo_url', encodeURIComponent(repositoryInput)); + } + } else { + params.append('type', (type == 'local' ? type : selectedPlatform) || 'github'); + params.append('repo_url', encodeURIComponent(repositoryInput)); + } // Add local path if it exists if (localPath) { params.append('local_path', encodeURIComponent(localPath)); diff --git a/src/components/ConfigurationModal.tsx b/src/components/ConfigurationModal.tsx index 7a1dae6..2c4476b 100644 --- a/src/components/ConfigurationModal.tsx +++ b/src/components/ConfigurationModal.tsx @@ -32,8 +32,8 @@ interface ConfigurationModalProps { setCustomModel: (value: string) => void; // Platform selection - selectedPlatform: 'github' | 'gitlab' | 'bitbucket'; - setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void; + selectedPlatform: 'github' | 'gitlab' | 'bitbucket' | 'azure'; + setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket' | 'azure') => void; // Access token accessToken: string; diff --git a/src/components/TokenInput.tsx b/src/components/TokenInput.tsx index 14fadcd..7c61726 100644 --- a/src/components/TokenInput.tsx +++ b/src/components/TokenInput.tsx @@ -4,7 +4,7 @@ import React from 'react'; import { useLanguage } from '@/contexts/LanguageContext'; interface TokenInputProps { - selectedPlatform: 'github' | 'gitlab' | 'bitbucket'; + selectedPlatform: 'github' | 'gitlab' | 'bitbucket' | 'azure'; setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void; accessToken: string; setAccessToken: (value: string) => void; diff --git a/src/messages/en.json b/src/messages/en.json index 290ad56..45d5bf8 100644 --- a/src/messages/en.json +++ b/src/messages/en.json @@ -20,7 +20,7 @@ "home": { "welcome": "Welcome to DeepWiki-Open", "welcomeTagline": "AI-powered documentation for your code repositories", - "description": "Generate comprehensive documentation from GitHub, GitLab, or Bitbucket repositories with just a few clicks.", + "description": "Generate comprehensive documentation from GitHub, GitLab, Azure DevOps, or Bitbucket repositories with just a few clicks.", "quickStart": "Quick Start", "enterRepoUrl": "Enter a repository URL in one of these formats:", "advancedVisualization": "Advanced Visualization with Mermaid Diagrams", diff --git a/src/messages/es.json b/src/messages/es.json index f4a2327..e962233 100644 --- a/src/messages/es.json +++ b/src/messages/es.json @@ -20,7 +20,7 @@ "home": { "welcome": "Bienvenido a DeepWiki", "welcomeTagline": "Documentación impulsada por IA para repositorios de código", - "description": "Genera documentación completa de repositorios GitHub, GitLab o Bitbucket con solo unos clics.", + "description": "Genera documentación completa de repositorios GitHub, GitLab, Azure DevOps, o Bitbucket con solo unos clics.", "quickStart": "Inicio Rápido", "enterRepoUrl": "Ingresa una URL de repositorio en uno de estos formatos:", "advancedVisualization": "Visualización Avanzada con Diagramas Mermaid", @@ -31,7 +31,7 @@ "form": { "repository": "Repositorio", "configureWiki": "Configurar Wiki", - "repoPlaceholder": "propietario/repositorio o URL de GitHub/GitLab/Bitbucket", + "repoPlaceholder": "propietario/repositorio o URL de GitHub/GitLab/Azure DevOps/Bitbucket", "wikiLanguage": "Idioma del Wiki", "modelOptions": "Opciones de Modelo", "modelProvider": "Proveedor de Modelo", diff --git a/src/messages/ja.json b/src/messages/ja.json index 0158f5d..39a1065 100644 --- a/src/messages/ja.json +++ b/src/messages/ja.json @@ -20,7 +20,7 @@ "home": { "welcome": "DeepWikiへようこそ", "welcomeTagline": "コードリポジトリのためのAI駆動ドキュメンテーション", - "description": "GitHub、GitLab、またはBitbucketリポジトリから包括的なドキュメントを数クリックで生成します。", + "description": "GitHub、GitLab、Azure DevOps、またはBitbucketリポジトリから包括的なドキュメントを数クリックで生成します。", "quickStart": "クイックスタート", "enterRepoUrl": "以下のいずれかの形式でリポジトリURLを入力してください:", "advancedVisualization": "Mermaidダイアグラムによる高度な可視化", @@ -31,7 +31,7 @@ "form": { "repository": "リポジトリ", "configureWiki": "Wiki設定", - "repoPlaceholder": "所有者/リポジトリまたはGitHub/GitLab/BitbucketのURL", + "repoPlaceholder": "所有者/リポジトリまたはGitHub/GitLab/Azure DevOps/BitbucketのURL", "wikiLanguage": "Wiki言語", "modelOptions": "モデルオプション", "modelProvider": "モデルプロバイダー", diff --git a/src/messages/kr.json b/src/messages/kr.json index cfda416..60e7008 100644 --- a/src/messages/kr.json +++ b/src/messages/kr.json @@ -20,7 +20,7 @@ "home": { "welcome": "DeepWiki-Open에 오신 것을 환영합니다", "welcomeTagline": "코드 저장소를 위한 AI 기반 문서화", - "description": "GitHub, GitLab 또는 Bitbucket 저장소에서 클릭 한 번으로 종합 문서를 생성하세요.", + "description": "GitHub, GitLab, Azure Devops, 또는 Bitbucket 저장소에서 클릭 한 번으로 종합 문서를 생성하세요.", "quickStart": "빠른 시작", "enterRepoUrl": "다음 형식 중 하나로 저장소 URL을 입력하세요:", "advancedVisualization": "Mermaid 다이어그램을 활용한 고급 시각화", @@ -31,7 +31,7 @@ "form": { "repository": "저장소", "configureWiki": "위키 구성", - "repoPlaceholder": "owner/repo 또는 GitHub/GitLab/Bitbucket URL", + "repoPlaceholder": "owner/repo 또는 GitHub/GitLab/Azure Devops/Bitbucket URL", "wikiLanguage": "위키 언어", "modelOptions": "모델 옵션", "modelProvider": "모델 제공자", diff --git a/src/messages/vi.json b/src/messages/vi.json index 701b898..d427eef 100644 --- a/src/messages/vi.json +++ b/src/messages/vi.json @@ -20,7 +20,7 @@ "home": { "welcome": "Chào mừng đến với DeepWiki-Open", "welcomeTagline": "Tài liệu hỗ trợ bởi AI cho các repository của bạn", - "description": "Tạo tài liệu từ các repository GitHub, GitLab, hoặc Bitbucket chỉ với vài cú nhấp chuột.", + "description": "Tạo tài liệu từ các repository GitHub, GitLab, Azure Devops, hoặc Bitbucket chỉ với vài cú nhấp chuột.", "quickStart": "Bắt đầu nhanh", "enterRepoUrl": "Nhập URL repository", "advancedVisualization": "Tùy chỉnh sơ đồ trực quan với Mermaid", @@ -31,7 +31,7 @@ "form": { "repository": "Repository", "configureWiki": "Cấu hình Wiki", - "repoPlaceholder": "owner/repo hoặc URL GitHub/GitLab/Bitbucket", + "repoPlaceholder": "owner/repo hoặc URL GitHub/GitLab/Azure Devops/Bitbucket", "wikiLanguage": "Ngôn ngữ Wiki", "modelOptions": "Tùy chọn mô hình", "modelProvider": "Nhà cung cấp mô hình", diff --git a/src/messages/zh.json b/src/messages/zh.json index 504a627..a3b6405 100644 --- a/src/messages/zh.json +++ b/src/messages/zh.json @@ -20,7 +20,7 @@ "home": { "welcome": "欢迎使用DeepWiki", "welcomeTagline": "为代码仓库提供AI驱动的文档", - "description": "只需一次点击,即可从GitHub、GitLab或Bitbucket仓库生成全面的文档。", + "description": "只需一次点击,即可从GitHub、GitLab、Azure DevOps或Bitbucket仓库生成全面的文档。", "quickStart": "快速开始", "enterRepoUrl": "请以下列格式之一输入仓库URL:", "advancedVisualization": "使用Mermaid图表进行高级可视化", @@ -31,7 +31,7 @@ "form": { "repository": "仓库", "configureWiki": "配置Wiki", - "repoPlaceholder": "所有者/仓库或GitHub/GitLab/Bitbucket URL", + "repoPlaceholder": "所有者/仓库或GitHub/GitLab/Azure DevOps/Bitbucket URL", "wikiLanguage": "Wiki语言", "modelOptions": "模型选项", "modelProvider": "模型提供商",