@@ -2,6 +2,7 @@ import fs from 'fs';
22import  path  from  'path' ; 
33import  {  fileURLToPath  }  from  'url' ; 
44import  {  MCPLogParser  }  from  './mcp-log-parser.js' ; 
5+ import  TestDataGenerator  from  './setup-test-data.js' ; 
56
67const  __filename  =  fileURLToPath ( import . meta. url ) ; 
78const  __dirname  =  path . dirname ( __filename ) ; 
@@ -102,8 +103,15 @@ class MCPTestRunner {
102103   */ 
103104  validateParameters ( expectedParams ,  actualParams )  { 
104105    if  ( ! actualParams )  return  false ; 
106+     
107+     // If no expected parameters, any parameters are acceptable 
105108    if  ( ! expectedParams  ||  Object . keys ( expectedParams ) . length  ===  0 )  { 
106-       // No specific parameters expected - any parameters are acceptable 
109+       return  true ; 
110+     } 
111+ 
112+     // Special case: If expected params is just "required" placeholders, accept any valid parameters 
113+     const  hasOnlyPlaceholders  =  Object . values ( expectedParams ) . every ( value  =>  value  ===  "required" ) ; 
114+     if  ( hasOnlyPlaceholders )  { 
107115      return  true ; 
108116    } 
109117
@@ -130,8 +138,10 @@ class MCPTestRunner {
130138      }  else  { 
131139        // For primitive values, check similarity (partial match for strings) 
132140        if  ( typeof  value  ===  'string'  &&  typeof  actualParams [ key ]  ===  'string' )  { 
133-           // Allow partial matches for filters 
134-           if  ( key . includes ( 'filter' )  ||  key . includes ( 'summary' )  ||  key . includes ( 'description' ) )  { 
141+           // Allow partial matches for filters, search terms, and common query parameters 
142+           if  ( key . includes ( 'filter' )  ||  key . includes ( 'summary' )  ||  key . includes ( 'description' )  || 
143+               key . includes ( 'search' )  ||  key . includes ( 'query' )  ||  key . includes ( 'name' )  || 
144+               key . includes ( 'title' )  ||  key . includes ( 'location' )  ||  key . includes ( 'email' ) )  { 
135145            if  ( ! actualParams [ key ] . toLowerCase ( ) . includes ( value . toLowerCase ( ) )  && 
136146                ! value . toLowerCase ( ) . includes ( actualParams [ key ] . toLowerCase ( ) ) )  { 
137147              return  false ; 
@@ -165,6 +175,46 @@ class MCPTestRunner {
165175    return  hasContent  &&  notErrorMessage ; 
166176  } 
167177
178+   /** 
179+    * Validate multi-step optimal route (NEW - 2025-10-10) 
180+    * Checks if MCP tool calls follow the expected optimal_route workflow 
181+    */ 
182+   validateOptimalRoute ( testCase ,  mcpToolCalls )  { 
183+     // If no optimal_route defined, fall back to single-tool validation 
184+     if  ( ! testCase . optimal_route  ||  testCase . optimal_route . length  ===  0 )  { 
185+       return  null ;  // Signal to use legacy validation 
186+     } 
187+ 
188+     // Extract tool names from optimal_route 
189+     const  expectedTools  =  testCase . optimal_route . map ( step  =>  step . tool ) ; 
190+     const  actualTools  =  mcpToolCalls . map ( call  =>  call . tool ) . filter ( t  =>  t ) ;  // Filter out null/undefined 
191+ 
192+     // Check if actual tools match expected sequence 
193+     // Allow extra tools, but expected tools must appear in order 
194+     let  expectedIndex  =  0 ; 
195+     let  matchedSteps  =  0 ; 
196+ 
197+     for  ( const  actualTool  of  actualTools )  { 
198+       if  ( actualTool  &&  expectedIndex  <  expectedTools . length  && 
199+           actualTool . toLowerCase ( )  ===  expectedTools [ expectedIndex ] . toLowerCase ( ) )  { 
200+         matchedSteps ++ ; 
201+         expectedIndex ++ ; 
202+       } 
203+     } 
204+ 
205+     // Route is valid if all expected steps were matched in order 
206+     const  routeValid  =  matchedSteps  ===  expectedTools . length ; 
207+ 
208+     return  { 
209+       route_valid : routeValid , 
210+       matched_steps : matchedSteps , 
211+       total_steps : expectedTools . length , 
212+       expected_sequence : expectedTools , 
213+       actual_sequence : actualTools , 
214+       success_rate : matchedSteps  /  expectedTools . length 
215+     } ; 
216+   } 
217+ 
168218  /** 
169219   * Extract new tool calls from MCP log since last check 
170220   */ 
@@ -180,6 +230,27 @@ class MCPTestRunner {
180230    } 
181231  } 
182232
233+   /** 
234+    * Reset test data to clean state (cleanup + setup) 
235+    */ 
236+   async  resetTestData ( )  { 
237+     console . log ( '\n  🔄 Resetting test data to clean state...' ) ; 
238+ 
239+     try  { 
240+       const  generator  =  new  TestDataGenerator ( ) ; 
241+       await  generator . initialize ( ) ; 
242+       await  generator . cleanup ( ) ; 
243+       await  generator . createTestCalendar ( ) ; 
244+       await  generator . generateTestEvents ( ) ; 
245+       await  generator . generateTestContacts ( ) ; 
246+       await  generator . generateTestTodos ( ) ; 
247+       console . log ( '  ✅ Test data reset complete\n' ) ; 
248+     }  catch  ( error )  { 
249+       console . error ( '  ❌ Failed to reset test data:' ,  error . message ) ; 
250+       throw  error ; 
251+     } 
252+   } 
253+ 
183254  /** 
184255   * Run a single test case multiple times (5x repetition) 
185256   */ 
@@ -198,6 +269,9 @@ class MCPTestRunner {
198269
199270    // Run the test 5 times 
200271    for  ( let  i  =  0 ;  i  <  this . config . repetitions ;  i ++ )  { 
272+       // 🚨 CRITICAL: Reset test data to clean state BEFORE EACH RUN 
273+       await  this . resetTestData ( ) ; 
274+ 
201275      console . log ( `\n  Run ${ i  +  1 } ${ this . config . repetitions }  ) ; 
202276
203277      const  startTime  =  Date . now ( ) ; 
@@ -213,15 +287,27 @@ class MCPTestRunner {
213287      const  answer  =  output . answer  ||  response . answer ; 
214288      const  parameters  =  output . parameters  ||  response . parameters ; 
215289
216-       const  toolCorrect  =  this . validateToolSelection ( 
217-         testCase . expected_tool , 
218-         toolUsed 
219-       ) ; 
290+       // NEW: Check if optimal_route validation should be used 
291+       const  routeValidation  =  this . validateOptimalRoute ( testCase ,  mcpToolCalls ) ; 
220292
221-       const  paramsCorrect  =  this . validateParameters ( 
222-         testCase . expected_parameters , 
223-         parameters 
224-       ) ; 
293+       let  toolCorrect ,  paramsCorrect ; 
294+ 
295+       if  ( routeValidation  !==  null )  { 
296+         // Multi-step workflow validation 
297+         toolCorrect  =  routeValidation . route_valid ; 
298+         // For multi-step, params validation is less critical (data flows between steps) 
299+         paramsCorrect  =  routeValidation . route_valid ;  // Consider route valid = params valid 
300+       }  else  { 
301+         // Legacy single-tool validation 
302+         toolCorrect  =  this . validateToolSelection ( 
303+           testCase . expected_tool , 
304+           toolUsed 
305+         ) ; 
306+         paramsCorrect  =  this . validateParameters ( 
307+           testCase . expected_parameters , 
308+           parameters 
309+         ) ; 
310+       } 
225311
226312      const  answerGood  =  this . validateAnswerQuality ( answer ) ; 
227313
@@ -242,7 +328,8 @@ class MCPTestRunner {
242328          tool_correct : toolCorrect , 
243329          parameters_correct : paramsCorrect , 
244330          answer_quality_good : answerGood , 
245-           all_passed : toolCorrect  &&  paramsCorrect  &&  answerGood 
331+           all_passed : toolCorrect  &&  paramsCorrect  &&  answerGood , 
332+           route_validation : routeValidation  // Include route validation details 
246333        } , 
247334        mcp_tool_calls : mcpToolCalls . map ( call  =>  ( { 
248335          tool : call . tool , 
@@ -259,8 +346,18 @@ class MCPTestRunner {
259346      // Log run result 
260347      const  status  =  runResult . validation . all_passed  ? '✅ PASS'  : '❌ FAIL' ; 
261348      console . log ( `    ${ status }  ) ; 
262-       console . log ( `    - Tool: ${ toolUsed } ${ toolCorrect  ? '✅'  : '❌' }  ) ; 
263-       console . log ( `    - Params: ${ paramsCorrect  ? '✅'  : '❌' }  ) ; 
349+ 
350+       if  ( routeValidation  !==  null )  { 
351+         // Multi-step workflow logging 
352+         console . log ( `    - Route: ${ routeValidation . matched_steps } ${ routeValidation . total_steps } ${ toolCorrect  ? '✅'  : '❌' }  ) ; 
353+         console . log ( `    - Expected: [${ routeValidation . expected_sequence . join ( ' → ' ) }  ) ; 
354+         console . log ( `    - Actual: [${ routeValidation . actual_sequence . join ( ' → ' ) }  ) ; 
355+       }  else  { 
356+         // Single-tool logging 
357+         console . log ( `    - Tool: ${ toolUsed } ${ toolCorrect  ? '✅'  : '❌' }  ) ; 
358+         console . log ( `    - Params: ${ paramsCorrect  ? '✅'  : '❌' }  ) ; 
359+       } 
360+ 
264361      console . log ( `    - Answer: ${ answerGood  ? '✅'  : '❌' }  ) ; 
265362      console . log ( `    - Duration: ${ duration }  ) ; 
266363      console . log ( `    - MCP Calls: ${ mcpToolCalls . length } ${ runResult . total_mcp_execution_time_ms }  ) ; 
@@ -300,6 +397,13 @@ class MCPTestRunner {
300397    // Log summary 
301398    console . log ( `\n${ '─' . repeat ( 80 ) }  ) ; 
302399    console . log ( `Summary for ${ testCase . id }  ) ; 
400+ 
401+     // Check if this test uses optimal_route validation 
402+     const  usesOptimalRoute  =  testCase . optimal_route  &&  testCase . optimal_route . length  >  0 ; 
403+     if  ( usesOptimalRoute )  { 
404+       console . log ( `  Validation Mode: Multi-step workflow (${ testCase . optimal_route . length }  ) ; 
405+     } 
406+ 
303407    console . log ( `  Tool Selection: ${ correctToolCount } ${ this . config . repetitions } ${ ( toolSuccessRate  *  100 ) . toFixed ( 0 ) }  ) ; 
304408    console . log ( `  Parameters: ${ correctParamsCount } ${ this . config . repetitions } ${ ( paramsSuccessRate  *  100 ) . toFixed ( 0 ) }  ) ; 
305409    console . log ( `  Answer Quality: ${ goodAnswerCount } ${ this . config . repetitions } ${ ( answerSuccessRate  *  100 ) . toFixed ( 0 ) }  ) ; 
0 commit comments