Skip to content

Commit d7706e3

Browse files
PhilflowIOclaude
andcommitted
fix: prevent unnecessary list_calendars calls and calendar_url limitation
Fixes #22 ## Problem LLM was unnecessarily calling list_calendars and then limiting search to a single calendar by providing calendar_url parameter, causing events in other calendars to be missed. ## Changes - Enhanced calendar_query tool description with explicit warnings - Added "DO NOT call list_calendars first" instruction - Clarified that calendar_url should be OMITTED for searches - Added ⚠️ emoji warnings for visibility ## Test Framework (Bonus) Added comprehensive test framework for LLM tool selection analysis: - Structured tool call logger (JSON lines format) - Enhanced test runner with CLI args and live progress - Workflow orchestrator for end-to-end testing - Manual testing script for quick iterations - Comprehensive documentation ## Verification Tested with multiple query variations: - "wann ist der termin mit elena" ✅ No calendar_url - "wann habe ich den termin mit elena" ✅ No calendar_url - "zeig mir alle termine in work-projects" ✅ Uses calendar_url (correct) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 4817080 commit d7706e3

File tree

9 files changed

+1765
-18
lines changed

9 files changed

+1765
-18
lines changed

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
"test": "NODE_ENV=test NODE_OPTIONS='--experimental-vm-modules' jest",
1717
"test:watch": "NODE_ENV=test NODE_OPTIONS='--experimental-vm-modules' jest --watch",
1818
"test:coverage": "NODE_ENV=test NODE_OPTIONS='--experimental-vm-modules' jest --coverage",
19-
"test:integration": "node tests/integration/mcp-test-runner.js",
19+
"test:integration": "node tests/integration/run-test-session.js",
20+
"test:integration:runner": "node tests/integration/mcp-test-runner.js",
21+
"test:integration:quick": "node tests/integration/run-test-session.js --max 5 --repetitions 1",
2022
"test:setup-data": "node tests/integration/setup-test-data.js"
2123
},
2224
"author": "dav-mcp contributors",

src/index.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { tsdavManager } from './tsdav-client.js';
1010
import { tools } from './tools/index.js';
1111
import { createToolErrorResponse, createHTTPErrorResponse, AuthenticationError, MCP_ERROR_CODES } from './error-handler.js';
1212
import { logger, createSessionLogger, createRequestLogger } from './logger.js';
13+
import { initializeToolCallLogger, getToolCallLogger } from './tool-call-logger.js';
1314

1415
// Load environment variables
1516
dotenv.config();
@@ -207,13 +208,26 @@ function createMCPServer(sessionId) {
207208
throw error;
208209
}
209210

211+
// Structured logging for test analysis
212+
const toolCallLogger = getToolCallLogger();
213+
const startTime = Date.now();
214+
toolCallLogger.logToolCallStart(toolName, args, { sessionId, requestId });
215+
210216
try {
211217
requestLogger.debug({ tool: toolName }, 'Executing tool');
212218
const result = await tool.handler(args);
219+
const duration = Date.now() - startTime;
220+
213221
requestLogger.info({ tool: toolName }, 'Tool executed successfully');
222+
toolCallLogger.logToolCallSuccess(toolName, args, result, { sessionId, requestId, duration });
223+
214224
return result;
215225
} catch (error) {
226+
const duration = Date.now() - startTime;
227+
216228
requestLogger.error({ tool: toolName, error: error.message, stack: error.stack }, 'Tool execution error');
229+
toolCallLogger.logToolCallError(toolName, args, error, { sessionId, requestId, duration });
230+
217231
return createToolErrorResponse(error, process.env.NODE_ENV === 'development');
218232
}
219233
});
@@ -463,6 +477,14 @@ let cleanupInterval;
463477
async function start() {
464478
logger.info('Starting tsdav MCP Server...');
465479

480+
// Initialize tool call logger
481+
const toolCallLogger = initializeToolCallLogger();
482+
logger.info({
483+
enabled: toolCallLogger.enabled,
484+
outputMode: toolCallLogger.outputMode,
485+
logFile: toolCallLogger.logFile
486+
}, 'Tool call logger initialized');
487+
466488
// Initialize tsdav clients
467489
await initializeTsdav();
468490

src/server-stdio.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,19 @@ import { tsdavManager } from './tsdav-client.js';
66
import { tools } from './tools.js';
77
import { createToolErrorResponse, MCP_ERROR_CODES } from './error-handler.js';
88
import { logger } from './logger.js';
9+
import { initializeToolCallLogger, getToolCallLogger } from './tool-call-logger.js';
910
import dotenv from 'dotenv';
1011

1112
dotenv.config();
1213

14+
// Initialize tool call logger
15+
const toolCallLogger = initializeToolCallLogger();
16+
logger.info({
17+
enabled: toolCallLogger.enabled,
18+
outputMode: toolCallLogger.outputMode,
19+
logFile: toolCallLogger.logFile
20+
}, 'Tool call logger initialized');
21+
1322
// Initialize tsdav
1423
try {
1524
await tsdavManager.initialize({
@@ -63,13 +72,26 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
6372
throw error;
6473
}
6574

75+
// Structured logging for test analysis
76+
const toolCallLogger = getToolCallLogger();
77+
const startTime = Date.now();
78+
toolCallLogger.logToolCallStart(toolName, args, {});
79+
6680
try {
6781
logger.debug({ tool: toolName }, 'Executing tool');
6882
const result = await tool.handler(args);
83+
const duration = Date.now() - startTime;
84+
6985
logger.info({ tool: toolName }, 'Tool executed successfully');
86+
toolCallLogger.logToolCallSuccess(toolName, args, result, { duration });
87+
7088
return result;
7189
} catch (error) {
90+
const duration = Date.now() - startTime;
91+
7292
logger.error({ tool: toolName, error: error.message, stack: error.stack }, 'Tool execution error');
93+
toolCallLogger.logToolCallError(toolName, args, error, { duration });
94+
7395
return createToolErrorResponse(error, process.env.NODE_ENV === 'development');
7496
}
7597
});

src/tool-call-logger.js

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
/**
2+
* Structured Tool Call Logger for Test Analysis
3+
*
4+
* Logs every tool call in structured JSON format for easy parsing by test runner.
5+
* Can output to STDOUT (JSON lines) or separate file.
6+
*/
7+
import fs from 'fs';
8+
import path from 'path';
9+
10+
class ToolCallLogger {
11+
constructor(options = {}) {
12+
this.enabled = options.enabled !== false; // Default: enabled
13+
this.outputMode = options.outputMode || 'file'; // 'file' | 'stdout' | 'both'
14+
this.logFile = options.logFile || '/tmp/mcp-tool-calls.jsonl'; // JSON lines format
15+
16+
// Ensure log directory exists
17+
if (this.outputMode === 'file' || this.outputMode === 'both') {
18+
const dir = path.dirname(this.logFile);
19+
if (!fs.existsSync(dir)) {
20+
fs.mkdirSync(dir, { recursive: true });
21+
}
22+
}
23+
}
24+
25+
/**
26+
* Log a tool call start
27+
*/
28+
logToolCallStart(toolName, args, metadata = {}) {
29+
if (!this.enabled) return;
30+
31+
const entry = {
32+
type: 'tool_call_start',
33+
timestamp: new Date().toISOString(),
34+
tool: toolName,
35+
args: args,
36+
session_id: metadata.sessionId,
37+
request_id: metadata.requestId,
38+
};
39+
40+
this._write(entry);
41+
}
42+
43+
/**
44+
* Log a tool call success
45+
*/
46+
logToolCallSuccess(toolName, args, result, metadata = {}) {
47+
if (!this.enabled) return;
48+
49+
const entry = {
50+
type: 'tool_call_success',
51+
timestamp: new Date().toISOString(),
52+
tool: toolName,
53+
args: args,
54+
result_summary: this._summarizeResult(result),
55+
duration_ms: metadata.duration,
56+
session_id: metadata.sessionId,
57+
request_id: metadata.requestId,
58+
};
59+
60+
this._write(entry);
61+
}
62+
63+
/**
64+
* Log a tool call failure
65+
*/
66+
logToolCallError(toolName, args, error, metadata = {}) {
67+
if (!this.enabled) return;
68+
69+
const entry = {
70+
type: 'tool_call_error',
71+
timestamp: new Date().toISOString(),
72+
tool: toolName,
73+
args: args,
74+
error: {
75+
message: error.message,
76+
code: error.code,
77+
name: error.name,
78+
},
79+
duration_ms: metadata.duration,
80+
session_id: metadata.sessionId,
81+
request_id: metadata.requestId,
82+
};
83+
84+
this._write(entry);
85+
}
86+
87+
/**
88+
* Clear the log file (for new test session)
89+
*/
90+
clear() {
91+
if (this.outputMode === 'file' || this.outputMode === 'both') {
92+
try {
93+
fs.writeFileSync(this.logFile, '', 'utf8');
94+
} catch (error) {
95+
console.error('Failed to clear tool call log:', error.message);
96+
}
97+
}
98+
}
99+
100+
/**
101+
* Get all logged tool calls (for analysis)
102+
*/
103+
getToolCalls() {
104+
if (this.outputMode === 'stdout') {
105+
throw new Error('Cannot retrieve tool calls when outputMode is stdout');
106+
}
107+
108+
try {
109+
const content = fs.readFileSync(this.logFile, 'utf8');
110+
return content
111+
.split('\n')
112+
.filter(line => line.trim())
113+
.map(line => JSON.parse(line));
114+
} catch (error) {
115+
if (error.code === 'ENOENT') {
116+
return []; // File doesn't exist yet
117+
}
118+
throw error;
119+
}
120+
}
121+
122+
/**
123+
* Write log entry
124+
*/
125+
_write(entry) {
126+
const jsonLine = JSON.stringify(entry);
127+
128+
// Write to stdout (for BashOutput monitoring)
129+
if (this.outputMode === 'stdout' || this.outputMode === 'both') {
130+
console.log(`TOOL_CALL:${jsonLine}`);
131+
}
132+
133+
// Write to file (for Read tool analysis)
134+
if (this.outputMode === 'file' || this.outputMode === 'both') {
135+
try {
136+
fs.appendFileSync(this.logFile, jsonLine + '\n', 'utf8');
137+
} catch (error) {
138+
console.error('Failed to write tool call log:', error.message);
139+
}
140+
}
141+
}
142+
143+
/**
144+
* Summarize result for logging (avoid huge objects)
145+
*/
146+
_summarizeResult(result) {
147+
if (!result) return null;
148+
149+
// For MCP tool results with content array
150+
if (result.content && Array.isArray(result.content)) {
151+
return {
152+
type: 'mcp_result',
153+
content_count: result.content.length,
154+
content_types: result.content.map(c => c.type),
155+
has_text: result.content.some(c => c.type === 'text'),
156+
text_length: result.content
157+
.filter(c => c.type === 'text')
158+
.reduce((sum, c) => sum + (c.text?.length || 0), 0),
159+
};
160+
}
161+
162+
// For simple results
163+
if (typeof result === 'object') {
164+
return {
165+
type: 'object',
166+
keys: Object.keys(result),
167+
};
168+
}
169+
170+
return {
171+
type: typeof result,
172+
value: String(result).substring(0, 100), // First 100 chars
173+
};
174+
}
175+
}
176+
177+
// Singleton instance
178+
let instance = null;
179+
180+
/**
181+
* Initialize tool call logger (call once at server startup)
182+
*/
183+
export function initializeToolCallLogger(options = {}) {
184+
// Check environment variable to enable/disable
185+
const enabled = process.env.LOG_TOOL_CALLS !== 'false'; // Default: enabled
186+
const outputMode = process.env.TOOL_CALL_LOG_MODE || 'file'; // 'file' | 'stdout' | 'both'
187+
const logFile = process.env.TOOL_CALL_LOG_FILE || '/tmp/mcp-tool-calls.jsonl';
188+
189+
instance = new ToolCallLogger({
190+
enabled,
191+
outputMode,
192+
logFile,
193+
...options,
194+
});
195+
196+
return instance;
197+
}
198+
199+
/**
200+
* Get the tool call logger instance
201+
*/
202+
export function getToolCallLogger() {
203+
if (!instance) {
204+
// Auto-initialize with defaults if not explicitly initialized
205+
instance = new ToolCallLogger();
206+
}
207+
return instance;
208+
}
209+
210+
export { ToolCallLogger };

src/tools/calendar/calendar-query.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ import { buildTimeRangeOptions } from '../shared/helpers.js';
88
*/
99
export const calendarQuery = {
1010
name: 'calendar_query',
11-
description: 'PREFERRED: Search and filter calendar events efficiently by text (summary/title), date range, or location. Use this instead of list_events when user asks "find events with X" or "show me events containing Y" to avoid loading thousands of events. Much more token-efficient than list_events. IMPORTANT: When user asks about "today", "tomorrow", "this week" etc., you MUST calculate the correct date range in ISO 8601 format (e.g., 2025-10-08T00:00:00.000Z for tomorrow). If calendar_url is not provided, searches across ALL calendars automatically.',
11+
description: 'PREFERRED: Search and filter calendar events efficiently by text (summary/title), date range, or location. Use this instead of list_events when user asks "find events with X" or "show me events containing Y" to avoid loading thousands of events. Much more token-efficient than list_events. IMPORTANT: When user asks about "today", "tomorrow", "this week" etc., you MUST calculate the correct date range in ISO 8601 format (e.g., 2025-10-08T00:00:00.000Z for tomorrow). ⚠️ FOR SEARCHES: OMIT calendar_url to search across ALL calendars automatically. DO NOT call list_calendars first and then provide a calendar_url - this limits the search to one calendar and will miss events in other calendars. Only provide calendar_url if user explicitly mentions a specific calendar name.',
1212
inputSchema: {
1313
type: 'object',
1414
properties: {
1515
calendar_url: {
1616
type: 'string',
17-
description: 'Optional: The URL of a specific calendar to query. If omitted, searches across ALL available calendars.',
17+
description: '⚠️ USUALLY OMIT THIS for searches! If omitted, searches across ALL available calendars. Only provide if user explicitly specifies a calendar name (e.g., "in my work calendar"). DO NOT use list_calendars and then pick one - that defeats the purpose of cross-calendar search.',
1818
},
1919
time_range_start: {
2020
type: 'string',

0 commit comments

Comments
 (0)