@@ -231,7 +231,9 @@ async def skill_search(
231231                    # Fallback to simple queries if parsing fails 
232232                    try :
233233                        from  json_repair  import  repair_json 
234-                         search_queries  =  repair_json (response .completion .strip ())
234+                         search_queries_s  =  repair_json (response .completion .strip ())
235+                         search_queries  =  json .loads (search_queries_s )
236+                         search_queries  =  search_queries [:query_num ]
235237                    except  Exception  as  e :
236238                        search_queries  =  [
237239                            params .query ,
@@ -244,7 +246,7 @@ async def skill_search(
244246                # Step 2: Create browser sessions for parallel searching 
245247                register_sessions  =  []
246248
247-                 for  i , query  in  enumerate (search_queries ):
249+                 for  i , query  in  enumerate (search_queries [: query_num ] ):
248250                    agent_id  =  f"search_agent_{ i  +  1 :03d}  
249251                    register_sessions .append (
250252                        browser_manager .register_agent (agent_id , target_id = None )
@@ -278,41 +280,59 @@ async def skill_search(
278280                # Step 5: Use LLM only for final ranking and selection (much smaller dataset now) 
279281                if  all_results  and  len (all_results ) >  10 :
280282                    # Only use LLM if we have more than 10 results to rank 
283+                     # Create indexed results for LLM prompt 
284+                     indexed_results  =  []
285+                     for  i , result  in  enumerate (all_results ):
286+                         indexed_results .append ({
287+                             "index" : i ,
288+                             "title" : result .get ('title' , 'Unknown Title' ),
289+                             "url" : result .get ('url' , 'No URL' ),
290+                             "summary" : result .get ('summary' , 'No summary available' )
291+                         })
292+                     
281293                    ranking_prompt  =  f""" 
282294Rank these search results for the query "{ params .query }  
283295Select the TOP 10 most relevant and valuable results. 
284296
285- Search Results ({ len (all_results )}  
286- { json .dumps (all_results , indent = 2 )} 
297+ Search Results ({ len (indexed_results )}  
298+ { json .dumps (indexed_results , indent = 2 ,  ensure_ascii = False )} 
287299
288- Return the top 10 results as a JSON array with each result containing: 
289- - title: string 
290- - url: string 
291- - summary: string (brief description of why this result is valuable) 
300+ Return ONLY the indices of the top 10 results as a JSON array of numbers. 
301+ For example: [0, 5, 2, 8, 1, 9, 3, 7, 4, 6] 
292302
293- Format: [{{"title": "...", "url": "...", "summary": "..."}} , ...] 
303+ Format: [index1, index2, index3 , ...] 
294304""" 
295305
296306                    ranking_response  =  await  llm .ainvoke ([
297307                        SystemMessage (
298-                             content = "You are an expert at ranking search results for relevance and value." ),
308+                             content = "You are an expert at ranking search results for relevance and value. Return only the indices of the top results. " ),
299309                        UserMessage (content = ranking_prompt )
300310                    ])
301311
302312                    try :
303-                         top_results  =  json .loads (ranking_response .completion .strip ())
304-                         if  not  isinstance (top_results , list ):
313+                         selected_indices  =  json .loads (ranking_response .completion .strip ())
314+                         if  not  isinstance (selected_indices , list ):
305315                            raise  ValueError ("Invalid ranking results format" )
306-                         top_results  =  top_results [:10 ]  # Ensure max 10 results 
316+                         # Ensure indices are valid and limit to 10 
317+                         valid_indices  =  [i  for  i  in  selected_indices  if  isinstance (i , int ) and  0  <=  i  <  len (all_results )][:10 ]
318+                         if  valid_indices :
319+                             top_results  =  [all_results [i ] for  i  in  valid_indices ]
320+                         else :
321+                             top_results  =  all_results [:10 ]
307322                    except  (json .JSONDecodeError , ValueError ):
308323                        try :
309-                             top_results  =  repair_json (ranking_response .completion .strip ())
310-                             if  isinstance (top_results , list ):
311-                                 top_results  =  top_results [:10 ]
324+                             selected_indices_s  =  repair_json (ranking_response .completion .strip ())
325+                             selected_indices  =  json .loads (selected_indices_s )
326+                             if  isinstance (selected_indices , list ):
327+                                 valid_indices  =  [i  for  i  in  selected_indices  if  isinstance (i , int ) and  0  <=  i  <  len (all_results )][:10 ]
328+                                 if  valid_indices :
329+                                     top_results  =  [all_results [i ] for  i  in  valid_indices ]
330+                                 else :
331+                                     top_results  =  all_results [:10 ]
312332                            else :
313333                                top_results  =  all_results [:10 ]
314334                        except  Exception :
315-                             # Fallback to first 10 deduplicated  results 
335+                             # Fallback to first 10 results 
316336                            top_results  =  all_results [:10 ]
317337                elif  all_results :
318338                    # If we have 10 or fewer results, skip LLM ranking 
@@ -1075,7 +1095,7 @@ async def _perform_google_search(self, browser_session, query: str, llm: BaseCha
10751095            results  =  await  self ._extract_google_results_rule_based (browser_session )
10761096            if  results  and  len (results ) >  0 :
10771097                # Rule-based extraction succeeded 
1078-                 logger .info (f"Rule-based extraction found { len (results )} { query }  )
1098+                 logger .debug (f"Rule-based extraction found { len (results )} { query }  )
10791099                return  results [:search_ret_len ]  # Return top 6 results 
10801100
10811101            # Fallback to LLM extraction if rule-based fails 
0 commit comments