Skip to content

Commit 7f4000f

Browse files
committed
fix skill search
1 parent 50c644a commit 7f4000f

File tree

1 file changed

+34
-16
lines changed

1 file changed

+34
-16
lines changed

vibe_surf/tools/vibesurf_tools.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -280,41 +280,59 @@ async def skill_search(
280280
# Step 5: Use LLM only for final ranking and selection (much smaller dataset now)
281281
if all_results and len(all_results) > 10:
282282
# Only use LLM if we have more than 10 results to rank
283+
# Create indexed results for LLM prompt
284+
indexed_results = []
285+
for i, result in enumerate(all_results):
286+
indexed_results.append({
287+
"index": i,
288+
"title": result.get('title', 'Unknown Title'),
289+
"url": result.get('url', 'No URL'),
290+
"summary": result.get('summary', 'No summary available')
291+
})
292+
283293
ranking_prompt = f"""
284294
Rank these search results for the query "{params.query}" by relevance and value.
285295
Select the TOP 10 most relevant and valuable results.
286296
287-
Search Results ({len(all_results)} total):
288-
{json.dumps(all_results, indent=2)}
297+
Search Results ({len(indexed_results)} total):
298+
{json.dumps(indexed_results, indent=2, ensure_ascii=False)}
289299
290-
Return the top 10 results as a JSON array with each result containing:
291-
- title: string
292-
- url: string
293-
- summary: string (brief description of why this result is valuable)
300+
Return ONLY the indices of the top 10 results as a JSON array of numbers.
301+
For example: [0, 5, 2, 8, 1, 9, 3, 7, 4, 6]
294302
295-
Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
303+
Format: [index1, index2, index3, ...]
296304
"""
297305

298306
ranking_response = await llm.ainvoke([
299307
SystemMessage(
300-
content="You are an expert at ranking search results for relevance and value."),
308+
content="You are an expert at ranking search results for relevance and value. Return only the indices of the top results."),
301309
UserMessage(content=ranking_prompt)
302310
])
303311

304312
try:
305-
top_results = json.loads(ranking_response.completion.strip())
306-
if not isinstance(top_results, list):
313+
selected_indices = json.loads(ranking_response.completion.strip())
314+
if not isinstance(selected_indices, list):
307315
raise ValueError("Invalid ranking results format")
308-
top_results = top_results[:10] # Ensure max 10 results
316+
# Ensure indices are valid and limit to 10
317+
valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
318+
if valid_indices:
319+
top_results = [all_results[i] for i in valid_indices]
320+
else:
321+
top_results = all_results[:10]
309322
except (json.JSONDecodeError, ValueError):
310323
try:
311-
top_results = repair_json(ranking_response.completion.strip())
312-
if isinstance(top_results, list):
313-
top_results = top_results[:10]
324+
selected_indices_s = repair_json(ranking_response.completion.strip())
325+
selected_indices = json.loads(selected_indices_s)
326+
if isinstance(selected_indices, list):
327+
valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
328+
if valid_indices:
329+
top_results = [all_results[i] for i in valid_indices]
330+
else:
331+
top_results = all_results[:10]
314332
else:
315333
top_results = all_results[:10]
316334
except Exception:
317-
# Fallback to first 10 deduplicated results
335+
# Fallback to first 10 results
318336
top_results = all_results[:10]
319337
elif all_results:
320338
# If we have 10 or fewer results, skip LLM ranking
@@ -1077,7 +1095,7 @@ async def _perform_google_search(self, browser_session, query: str, llm: BaseCha
10771095
results = await self._extract_google_results_rule_based(browser_session)
10781096
if results and len(results) > 0:
10791097
# Rule-based extraction succeeded
1080-
logger.info(f"Rule-based extraction found {len(results)} results for query: {query}")
1098+
logger.debug(f"Rule-based extraction found {len(results)} results for query: {query}")
10811099
return results[:search_ret_len] # Return top 6 results
10821100

10831101
# Fallback to LLM extraction if rule-based fails

0 commit comments

Comments
 (0)