Merge pull request #45 from vvincent1234/dev

vvincent1234 · web-flow · commit 98ca63073e0e · 2025-09-26T19:51:45.000+08:00
Fix skill search
diff --git a/vibe_surf/backend/api/task.py b/vibe_surf/backend/api/task.py
@@ -104,7 +104,7 @@ async def submit_task(
             logger.info("Using default empty MCP server configuration")
 
         # DEBUG: Log the type and content of mcp_server_config
-        logger.info(f"mcp_server_config type: {type(mcp_server_config)}, value: {mcp_server_config}")
+        logger.debug(f"mcp_server_config type: {type(mcp_server_config)}, value: {mcp_server_config}")
 
         # Create initial task record in database
         from ..database.queries import TaskQueries
diff --git a/vibe_surf/backend/database/queries.py b/vibe_surf/backend/database/queries.py
@@ -486,13 +486,13 @@ async def save_task(
                 return existing_task
             else:
                 # DEBUG: Log the type and content of mcp_server_config before saving
-                logger.info(
+                logger.debug(
                     f"Creating task with mcp_server_config type: {type(mcp_server_config)}, value: {mcp_server_config}")
 
                 # Serialize mcp_server_config to JSON string if it's a dict
                 if isinstance(mcp_server_config, dict):
                     mcp_server_config_json = json.dumps(mcp_server_config)
-                    logger.info(f"Converted dict to JSON string: {mcp_server_config_json}")
+                    logger.debug(f"Converted dict to JSON string: {mcp_server_config_json}")
                 else:
                     mcp_server_config_json = mcp_server_config
 
diff --git a/vibe_surf/backend/utils/llm_factory.py b/vibe_surf/backend/utils/llm_factory.py
@@ -58,7 +58,7 @@ def get_attr(obj, key, default=None):
             "deepseek": ["temperature"],
             "aws_bedrock": ["temperature"],
             "anthropic_bedrock": ["temperature"],
-            "openai_compatible": ["temperature"]
+            "openai_compatible": ["temperature", "max_tokens"]
         }
 
         # Build common parameters based on provider support
diff --git a/vibe_surf/cli.py b/vibe_surf/cli.py
@@ -325,7 +325,7 @@ def start_backend(port: int) -> None:
         console.print("[yellow]📝 Press Ctrl+C to stop the server[/yellow]\n")
         
         # Run the server
-        uvicorn.run(app, host="127.0.0.1", port=port, log_level="info")
+        uvicorn.run(app, host="127.0.0.1", port=port, log_level="error")
         
     except KeyboardInterrupt:
         console.print("\n[yellow]🛑 Server stopped by user[/yellow]")
diff --git a/vibe_surf/llm/openai_compatible.py b/vibe_surf/llm/openai_compatible.py
@@ -76,7 +76,7 @@ class ChatOpenAICompatible(ChatOpenAI):
     The class automatically detects the model type and applies appropriate fixes.
     """
 
-    max_completion_tokens: int | None = 16000
+    max_completion_tokens: int | None = 8192
 
     def _is_gemini_model(self) -> bool:
         """Check if the current model is a Gemini model."""
diff --git a/vibe_surf/tools/vibesurf_tools.py b/vibe_surf/tools/vibesurf_tools.py
@@ -231,7 +231,9 @@ async def skill_search(
                     # Fallback to simple queries if parsing fails
                     try:
                         from json_repair import repair_json
-                        search_queries = repair_json(response.completion.strip())
+                        search_queries_s = repair_json(response.completion.strip())
+                        search_queries = json.loads(search_queries_s)
+                        search_queries = search_queries[:query_num]
                     except Exception as e:
                         search_queries = [
                             params.query,
@@ -244,7 +246,7 @@ async def skill_search(
                 # Step 2: Create browser sessions for parallel searching
                 register_sessions = []
 
-                for i, query in enumerate(search_queries):
+                for i, query in enumerate(search_queries[:query_num]):
                     agent_id = f"search_agent_{i + 1:03d}"
                     register_sessions.append(
                         browser_manager.register_agent(agent_id, target_id=None)
@@ -278,41 +280,59 @@ async def skill_search(
                 # Step 5: Use LLM only for final ranking and selection (much smaller dataset now)
                 if all_results and len(all_results) > 10:
                     # Only use LLM if we have more than 10 results to rank
+                    # Create indexed results for LLM prompt
+                    indexed_results = []
+                    for i, result in enumerate(all_results):
+                        indexed_results.append({
+                            "index": i,
+                            "title": result.get('title', 'Unknown Title'),
+                            "url": result.get('url', 'No URL'),
+                            "summary": result.get('summary', 'No summary available')
+                        })
+                    
                     ranking_prompt = f"""
 Rank these search results for the query "{params.query}" by relevance and value.
 Select the TOP 10 most relevant and valuable results.
 
-Search Results ({len(all_results)} total):
-{json.dumps(all_results, indent=2)}
+Search Results ({len(indexed_results)} total):
+{json.dumps(indexed_results, indent=2, ensure_ascii=False)}
 
-Return the top 10 results as a JSON array with each result containing:
-- title: string
-- url: string
-- summary: string (brief description of why this result is valuable)
+Return ONLY the indices of the top 10 results as a JSON array of numbers.
+For example: [0, 5, 2, 8, 1, 9, 3, 7, 4, 6]
 
-Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
+Format: [index1, index2, index3, ...]
 """
 
                     ranking_response = await llm.ainvoke([
                         SystemMessage(
-                            content="You are an expert at ranking search results for relevance and value."),
+                            content="You are an expert at ranking search results for relevance and value. Return only the indices of the top results."),
                         UserMessage(content=ranking_prompt)
                     ])
 
                     try:
-                        top_results = json.loads(ranking_response.completion.strip())
-                        if not isinstance(top_results, list):
+                        selected_indices = json.loads(ranking_response.completion.strip())
+                        if not isinstance(selected_indices, list):
                             raise ValueError("Invalid ranking results format")
-                        top_results = top_results[:10]  # Ensure max 10 results
+                        # Ensure indices are valid and limit to 10
+                        valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
+                        if valid_indices:
+                            top_results = [all_results[i] for i in valid_indices]
+                        else:
+                            top_results = all_results[:10]
                     except (json.JSONDecodeError, ValueError):
                         try:
-                            top_results = repair_json(ranking_response.completion.strip())
-                            if isinstance(top_results, list):
-                                top_results = top_results[:10]
+                            selected_indices_s = repair_json(ranking_response.completion.strip())
+                            selected_indices = json.loads(selected_indices_s)
+                            if isinstance(selected_indices, list):
+                                valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
+                                if valid_indices:
+                                    top_results = [all_results[i] for i in valid_indices]
+                                else:
+                                    top_results = all_results[:10]
                             else:
                                 top_results = all_results[:10]
                         except Exception:
-                            # Fallback to first 10 deduplicated results
+                            # Fallback to first 10 results
                             top_results = all_results[:10]
                 elif all_results:
                     # If we have 10 or fewer results, skip LLM ranking
@@ -1075,7 +1095,7 @@ async def _perform_google_search(self, browser_session, query: str, llm: BaseCha
             results = await self._extract_google_results_rule_based(browser_session)
             if results and len(results) > 0:
                 # Rule-based extraction succeeded
-                logger.info(f"Rule-based extraction found {len(results)} results for query: {query}")
+                logger.debug(f"Rule-based extraction found {len(results)} results for query: {query}")
                 return results[:search_ret_len]  # Return top 6 results
             
             # Fallback to LLM extraction if rule-based fails

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ def get_attr(obj, key, default=None):`
`58`	`58`	`"deepseek": ["temperature"],`
`59`	`59`	`"aws_bedrock": ["temperature"],`
`60`	`60`	`"anthropic_bedrock": ["temperature"],`
`61`		`- "openai_compatible": ["temperature"]`
	`61`	`+ "openai_compatible": ["temperature", "max_tokens"]`
`62`	`62`	`}`
`63`	`63`
`64`	`64`	`# Build common parameters based on provider support`