@@ -280,41 +280,59 @@ async def skill_search(
280280                # Step 5: Use LLM only for final ranking and selection (much smaller dataset now) 
281281                if  all_results  and  len (all_results ) >  10 :
282282                    # Only use LLM if we have more than 10 results to rank 
283+                     # Create indexed results for LLM prompt 
284+                     indexed_results  =  []
285+                     for  i , result  in  enumerate (all_results ):
286+                         indexed_results .append ({
287+                             "index" : i ,
288+                             "title" : result .get ('title' , 'Unknown Title' ),
289+                             "url" : result .get ('url' , 'No URL' ),
290+                             "summary" : result .get ('summary' , 'No summary available' )
291+                         })
292+                     
283293                    ranking_prompt  =  f""" 
284294Rank these search results for the query "{ params .query }  
285295Select the TOP 10 most relevant and valuable results. 
286296
287- Search Results ({ len (all_results )}  
288- { json .dumps (all_results , indent = 2 )} 
297+ Search Results ({ len (indexed_results )}  
298+ { json .dumps (indexed_results , indent = 2 ,  ensure_ascii = False )} 
289299
290- Return the top 10 results as a JSON array with each result containing: 
291- - title: string 
292- - url: string 
293- - summary: string (brief description of why this result is valuable) 
300+ Return ONLY the indices of the top 10 results as a JSON array of numbers. 
301+ For example: [0, 5, 2, 8, 1, 9, 3, 7, 4, 6] 
294302
295- Format: [{{"title": "...", "url": "...", "summary": "..."}} , ...] 
303+ Format: [index1, index2, index3 , ...] 
296304""" 
297305
298306                    ranking_response  =  await  llm .ainvoke ([
299307                        SystemMessage (
300-                             content = "You are an expert at ranking search results for relevance and value." ),
308+                             content = "You are an expert at ranking search results for relevance and value. Return only the indices of the top results. " ),
301309                        UserMessage (content = ranking_prompt )
302310                    ])
303311
304312                    try :
305-                         top_results  =  json .loads (ranking_response .completion .strip ())
306-                         if  not  isinstance (top_results , list ):
313+                         selected_indices  =  json .loads (ranking_response .completion .strip ())
314+                         if  not  isinstance (selected_indices , list ):
307315                            raise  ValueError ("Invalid ranking results format" )
308-                         top_results  =  top_results [:10 ]  # Ensure max 10 results 
316+                         # Ensure indices are valid and limit to 10 
317+                         valid_indices  =  [i  for  i  in  selected_indices  if  isinstance (i , int ) and  0  <=  i  <  len (all_results )][:10 ]
318+                         if  valid_indices :
319+                             top_results  =  [all_results [i ] for  i  in  valid_indices ]
320+                         else :
321+                             top_results  =  all_results [:10 ]
309322                    except  (json .JSONDecodeError , ValueError ):
310323                        try :
311-                             top_results  =  repair_json (ranking_response .completion .strip ())
312-                             if  isinstance (top_results , list ):
313-                                 top_results  =  top_results [:10 ]
324+                             selected_indices_s  =  repair_json (ranking_response .completion .strip ())
325+                             selected_indices  =  json .loads (selected_indices_s )
326+                             if  isinstance (selected_indices , list ):
327+                                 valid_indices  =  [i  for  i  in  selected_indices  if  isinstance (i , int ) and  0  <=  i  <  len (all_results )][:10 ]
328+                                 if  valid_indices :
329+                                     top_results  =  [all_results [i ] for  i  in  valid_indices ]
330+                                 else :
331+                                     top_results  =  all_results [:10 ]
314332                            else :
315333                                top_results  =  all_results [:10 ]
316334                        except  Exception :
317-                             # Fallback to first 10 deduplicated  results 
335+                             # Fallback to first 10 results 
318336                            top_results  =  all_results [:10 ]
319337                elif  all_results :
320338                    # If we have 10 or fewer results, skip LLM ranking 
@@ -1077,7 +1095,7 @@ async def _perform_google_search(self, browser_session, query: str, llm: BaseCha
10771095            results  =  await  self ._extract_google_results_rule_based (browser_session )
10781096            if  results  and  len (results ) >  0 :
10791097                # Rule-based extraction succeeded 
1080-                 logger .info (f"Rule-based extraction found { len (results )} { query }  )
1098+                 logger .debug (f"Rule-based extraction found { len (results )} { query }  )
10811099                return  results [:search_ret_len ]  # Return top 6 results 
10821100
10831101            # Fallback to LLM extraction if rule-based fails 
0 commit comments