PhilflowIO
diff --git a/‎package-lock.json‎
Lines changed: 1237 additions & 223 deletions b/‎package-lock.json‎
Lines changed: 1237 additions & 223 deletions
diff --git a/‎src/formatters.js‎
Lines changed: 14 additions & 1 deletion b/‎src/formatters.js‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎src/tools.js‎
Lines changed: 231 additions & 40 deletions b/‎src/tools.js‎
Lines changed: 231 additions & 40 deletions
diff --git a/‎src/validation.js‎
Lines changed: 1 addition & 1 deletion b/‎src/validation.js‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/integration/mcp-log-parser.js‎
Lines changed: 55 additions & 6 deletions b/‎tests/integration/mcp-log-parser.js‎
Lines changed: 55 additions & 6 deletions
diff --git a/‎tests/integration/mcp-test-runner.js‎
Lines changed: 118 additions & 14 deletions b/‎tests/integration/mcp-test-runner.js‎
Lines changed: 118 additions & 14 deletions
diff --git a/‎tests/integration/setup-test-data.js‎
Lines changed: 12 additions & 0 deletions b/‎tests/integration/setup-test-data.js‎
Lines changed: 12 additions & 0 deletions
@@ -350,7 +350,14 @@ export function formatContactList(contacts, addressBookName = 'Unknown Address B
     return {
       content: [{
         type: 'text',
-        text: 'No contacts found.'
+        text: `No contacts found in ${addressBookName}.
+
+💡 **Next steps**:
+- Try broader search: use addressbook_query with partial name
+- List all contacts: use list_contacts to see available names  
+- Create new contact: use create_contact if contact doesn't exist yet
+
+📝 **Available address books**: Use list_addressbooks to see all address books`
       }]
     };
   }
@@ -370,6 +377,12 @@ export function formatContactList(contacts, addressBookName = 'Unknown Address B
   })), null, 2);
   output += '\n```\n</details>';
 
+  // Add next action hints
+  output += `\n💡 **What you can do next**:
+- Update contact: use update_contact with URL and ETAG from above
+- Delete contact: use delete_contact with URL and ETAG from above
+- Get full details: Contact data already complete above`;
+
   return {
     content: [{
       type: 'text',
 
@@ -111,7 +111,7 @@ export const deleteContactSchema = z.object({
 });
 
 export const addressBookQuerySchema = z.object({
-  addressbook_url: z.string().url('Invalid addressbook URL'),
+  addressbook_url: z.string().url('Invalid addressbook URL').optional(),
   name_filter: z.string().optional(),
   email_filter: z.string().optional(),
   organization_filter: z.string().optional(),
 
@@ -57,28 +57,77 @@ export class MCPLogParser {
     const timestampMatch = requestLine.match(/\[(\d{2}:\d{2}:\d{2}(?:\.\d{3})?)\]/);
     const timestamp = timestampMatch ? timestampMatch[1] : null;
 
-    // Extract requestId
+    // Try to parse JSON format (single-line)
+    const jsonMatch = requestLine.match(/\{.*\}/);
+    if (jsonMatch) {
+      try {
+        const data = JSON.parse(jsonMatch[0]);
+        const requestId = data.requestId || null;
+        const sessionId = data.sessionId || null;
+        const tool = data.tool || null;
+        const args = data.args || {};
+
+        // Find success/failure
+        let success = null;
+        let executionTime = null;
+
+        // Look ahead for "Tool executed successfully" or error
+        for (let j = startIndex; j < Math.min(startIndex + 20, lines.length); j++) {
+          if (lines[j].includes('Tool executed successfully') && requestId) {
+            // Check if this is the right request by looking for tool name
+            const successJsonMatch = lines[j].match(/\{.*\}/);
+            if (successJsonMatch) {
+              try {
+                const successData = JSON.parse(successJsonMatch[0]);
+                if (successData.requestId === requestId) {
+                  success = true;
+
+                  // Calculate execution time
+                  const successTimestamp = lines[j].match(/\[(\d{2}:\d{2}:\d{2}(?:\.\d{3})?)\]/)?.[1];
+                  if (timestamp && successTimestamp) {
+                    executionTime = this.calculateTimeDiff(timestamp, successTimestamp);
+                  }
+                  break;
+                }
+              } catch {}
+            }
+          } else if (lines[j].includes('ERROR') && lines[j].includes(requestId)) {
+            success = false;
+            break;
+          }
+        }
+
+        return {
+          timestamp,
+          requestId,
+          sessionId,
+          tool,
+          args,
+          success,
+          executionTime
+        };
+      } catch (e) {
+        // Fall back to old parsing method
+      }
+    }
+
+    // Fallback: Multi-line format parsing
     const requestIdMatch = lines[startIndex + 1]?.match(/requestId.*: "([^"]+)"/);
     const requestId = requestIdMatch ? requestIdMatch[1] : null;
 
-    // Extract sessionId
     const sessionIdMatch = lines[startIndex + 2]?.match(/sessionId.*: "([^"]+)"/);
     const sessionId = sessionIdMatch ? sessionIdMatch[1] : null;
 
-    // Extract tool name
     const toolMatch = lines[startIndex + 3]?.match(/tool.*: "([^"]+)"/);
     const tool = toolMatch ? toolMatch[1] : null;
 
-    // Extract args (can span multiple lines)
     let args = {};
     const argsStartIndex = startIndex + 4;
 
     if (lines[argsStartIndex]?.includes('args')) {
-      // Check if args is empty object
       if (lines[argsStartIndex].includes('{}')) {
         args = {};
       } else {
-        // Parse multi-line args
         args = this.parseArgs(lines, argsStartIndex);
       }
     }
 
@@ -2,6 +2,7 @@ import fs from 'fs';
 import path from 'path';
 import { fileURLToPath } from 'url';
 import { MCPLogParser } from './mcp-log-parser.js';
+import TestDataGenerator from './setup-test-data.js';
 
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
@@ -102,8 +103,15 @@ class MCPTestRunner {
    */
   validateParameters(expectedParams, actualParams) {
     if (!actualParams) return false;
+    
+    // If no expected parameters, any parameters are acceptable
     if (!expectedParams || Object.keys(expectedParams).length === 0) {
-      // No specific parameters expected - any parameters are acceptable
+      return true;
+    }
+
+    // Special case: If expected params is just "required" placeholders, accept any valid parameters
+    const hasOnlyPlaceholders = Object.values(expectedParams).every(value => value === "required");
+    if (hasOnlyPlaceholders) {
       return true;
     }
 
@@ -130,8 +138,10 @@ class MCPTestRunner {
       } else {
         // For primitive values, check similarity (partial match for strings)
         if (typeof value === 'string' && typeof actualParams[key] === 'string') {
-          // Allow partial matches for filters
-          if (key.includes('filter') || key.includes('summary') || key.includes('description')) {
+          // Allow partial matches for filters, search terms, and common query parameters
+          if (key.includes('filter') || key.includes('summary') || key.includes('description') ||
+              key.includes('search') || key.includes('query') || key.includes('name') ||
+              key.includes('title') || key.includes('location') || key.includes('email')) {
             if (!actualParams[key].toLowerCase().includes(value.toLowerCase()) &&
                 !value.toLowerCase().includes(actualParams[key].toLowerCase())) {
               return false;
@@ -165,6 +175,46 @@ class MCPTestRunner {
     return hasContent && notErrorMessage;
   }
 
+  /**
+   * Validate multi-step optimal route (NEW - 2025-10-10)
+   * Checks if MCP tool calls follow the expected optimal_route workflow
+   */
+  validateOptimalRoute(testCase, mcpToolCalls) {
+    // If no optimal_route defined, fall back to single-tool validation
+    if (!testCase.optimal_route || testCase.optimal_route.length === 0) {
+      return null; // Signal to use legacy validation
+    }
+
+    // Extract tool names from optimal_route
+    const expectedTools = testCase.optimal_route.map(step => step.tool);
+    const actualTools = mcpToolCalls.map(call => call.tool).filter(t => t); // Filter out null/undefined
+
+    // Check if actual tools match expected sequence
+    // Allow extra tools, but expected tools must appear in order
+    let expectedIndex = 0;
+    let matchedSteps = 0;
+
+    for (const actualTool of actualTools) {
+      if (actualTool && expectedIndex < expectedTools.length &&
+          actualTool.toLowerCase() === expectedTools[expectedIndex].toLowerCase()) {
+        matchedSteps++;
+        expectedIndex++;
+      }
+    }
+
+    // Route is valid if all expected steps were matched in order
+    const routeValid = matchedSteps === expectedTools.length;
+
+    return {
+      route_valid: routeValid,
+      matched_steps: matchedSteps,
+      total_steps: expectedTools.length,
+      expected_sequence: expectedTools,
+      actual_sequence: actualTools,
+      success_rate: matchedSteps / expectedTools.length
+    };
+  }
+
   /**
    * Extract new tool calls from MCP log since last check
    */
@@ -180,6 +230,27 @@ class MCPTestRunner {
     }
   }
 
+  /**
+   * Reset test data to clean state (cleanup + setup)
+   */
+  async resetTestData() {
+    console.log('\n  🔄 Resetting test data to clean state...');
+
+    try {
+      const generator = new TestDataGenerator();
+      await generator.initialize();
+      await generator.cleanup();
+      await generator.createTestCalendar();
+      await generator.generateTestEvents();
+      await generator.generateTestContacts();
+      await generator.generateTestTodos();
+      console.log('  ✅ Test data reset complete\n');
+    } catch (error) {
+      console.error('  ❌ Failed to reset test data:', error.message);
+      throw error;
+    }
+  }
+
   /**
    * Run a single test case multiple times (5x repetition)
    */
@@ -198,6 +269,9 @@ class MCPTestRunner {
 
     // Run the test 5 times
     for (let i = 0; i < this.config.repetitions; i++) {
+      // 🚨 CRITICAL: Reset test data to clean state BEFORE EACH RUN
+      await this.resetTestData();
+
       console.log(`\n  Run ${i + 1}/${this.config.repetitions}...`);
 
       const startTime = Date.now();
@@ -213,15 +287,27 @@ class MCPTestRunner {
       const answer = output.answer || response.answer;
       const parameters = output.parameters || response.parameters;
 
-      const toolCorrect = this.validateToolSelection(
-        testCase.expected_tool,
-        toolUsed
-      );
+      // NEW: Check if optimal_route validation should be used
+      const routeValidation = this.validateOptimalRoute(testCase, mcpToolCalls);
 
-      const paramsCorrect = this.validateParameters(
-        testCase.expected_parameters,
-        parameters
-      );
+      let toolCorrect, paramsCorrect;
+
+      if (routeValidation !== null) {
+        // Multi-step workflow validation
+        toolCorrect = routeValidation.route_valid;
+        // For multi-step, params validation is less critical (data flows between steps)
+        paramsCorrect = routeValidation.route_valid; // Consider route valid = params valid
+      } else {
+        // Legacy single-tool validation
+        toolCorrect = this.validateToolSelection(
+          testCase.expected_tool,
+          toolUsed
+        );
+        paramsCorrect = this.validateParameters(
+          testCase.expected_parameters,
+          parameters
+        );
+      }
 
       const answerGood = this.validateAnswerQuality(answer);
 
@@ -242,7 +328,8 @@ class MCPTestRunner {
           tool_correct: toolCorrect,
           parameters_correct: paramsCorrect,
           answer_quality_good: answerGood,
-          all_passed: toolCorrect && paramsCorrect && answerGood
+          all_passed: toolCorrect && paramsCorrect && answerGood,
+          route_validation: routeValidation // Include route validation details
         },
         mcp_tool_calls: mcpToolCalls.map(call => ({
           tool: call.tool,
@@ -259,8 +346,18 @@ class MCPTestRunner {
       // Log run result
       const status = runResult.validation.all_passed ? '✅ PASS' : '❌ FAIL';
       console.log(`    ${status}`);
-      console.log(`    - Tool: ${toolUsed} ${toolCorrect ? '✅' : '❌'}`);
-      console.log(`    - Params: ${paramsCorrect ? '✅' : '❌'}`);
+
+      if (routeValidation !== null) {
+        // Multi-step workflow logging
+        console.log(`    - Route: ${routeValidation.matched_steps}/${routeValidation.total_steps} steps ${toolCorrect ? '✅' : '❌'}`);
+        console.log(`    - Expected: [${routeValidation.expected_sequence.join(' → ')}]`);
+        console.log(`    - Actual: [${routeValidation.actual_sequence.join(' → ')}]`);
+      } else {
+        // Single-tool logging
+        console.log(`    - Tool: ${toolUsed} ${toolCorrect ? '✅' : '❌'}`);
+        console.log(`    - Params: ${paramsCorrect ? '✅' : '❌'}`);
+      }
+
       console.log(`    - Answer: ${answerGood ? '✅' : '❌'}`);
       console.log(`    - Duration: ${duration}ms`);
       console.log(`    - MCP Calls: ${mcpToolCalls.length} (${runResult.total_mcp_execution_time_ms}ms)`);
@@ -300,6 +397,13 @@ class MCPTestRunner {
     // Log summary
     console.log(`\n${'─'.repeat(80)}`);
     console.log(`Summary for ${testCase.id}:`);
+
+    // Check if this test uses optimal_route validation
+    const usesOptimalRoute = testCase.optimal_route && testCase.optimal_route.length > 0;
+    if (usesOptimalRoute) {
+      console.log(`  Validation Mode: Multi-step workflow (${testCase.optimal_route.length} steps)`);
+    }
+
     console.log(`  Tool Selection: ${correctToolCount}/${this.config.repetitions} (${(toolSuccessRate * 100).toFixed(0)}%)`);
     console.log(`  Parameters: ${correctParamsCount}/${this.config.repetitions} (${(paramsSuccessRate * 100).toFixed(0)}%)`);
     console.log(`  Answer Quality: ${goodAnswerCount}/${this.config.repetitions} (${(answerSuccessRate * 100).toFixed(0)}%)`);
 
@@ -480,6 +480,18 @@ class TestDataGenerator {
     vcard += 'VERSION:3.0\n';
     vcard += `FN:${contact.fn}\n`;
 
+    // Add structured name (N: field) for better search compatibility
+    // Format: "Family;Given;Additional;Prefix;Suffix"
+    const nameParts = contact.fn.split(' ');
+    if (nameParts.length >= 2) {
+      const familyName = nameParts[nameParts.length - 1]; // Last part = family name
+      const givenName = nameParts.slice(0, -1).join(' '); // Rest = given name
+      vcard += `N:${familyName};${givenName};;;\n`;
+    } else {
+      // Single name - treat as given name
+      vcard += `N:;${contact.fn};;;\n`;
+    }
+
     if (contact.email) vcard += `EMAIL:${contact.email}\n`;
     if (contact.tel) vcard += `TEL:${contact.tel}\n`;
     if (contact.org) vcard += `ORG:${contact.org}\n`;