Skip to content
Merged
Show file tree
Hide file tree
Changes from 72 commits
Commits
Show all changes
90 commits
Select commit Hold shift + click to select a range
a76ff24
changes
20001020ycx Oct 7, 2025
697f366
changes for session states
20001020ycx Oct 7, 2025
70dff32
linting
20001020ycx Oct 8, 2025
0d8b06a
finalize get_instructions
20001020ycx Oct 8, 2025
c5ac54e
manual pass
20001020ycx Oct 9, 2025
d326f76
fine functionality wise
20001020ycx Oct 9, 2025
37f2c41
queryresult ready for pr
20001020ycx Oct 9, 2025
47d0301
sessionState ready for pr
20001020ycx Oct 9, 2025
2201266
another pass
20001020ycx Oct 9, 2025
aa37f1a
delete ai doc
20001020ycx Oct 9, 2025
a3076b6
pass for minor files
20001020ycx Oct 9, 2025
d3d7830
basically okay with functionality
20001020ycx Oct 9, 2025
ec0f2bc
fix uv formating and testing
20001020ycx Oct 9, 2025
21e0386
complete test review
20001020ycx Oct 9, 2025
edf68e4
adding multi-threading test
20001020ycx Oct 9, 2025
3d2be57
passing all tests
20001020ycx Oct 10, 2025
e1c1b3b
test docstring
20001020ycx Oct 10, 2025
7844653
Merge branch 'main' of github.com:y-scope/clp into 10-4-mcp-server-im…
20001020ycx Oct 10, 2025
97e7f23
fix docstring
20001020ycx Oct 10, 2025
d3ccf3e
pass on docstring
20001020ycx Oct 10, 2025
15315ec
addressing code rabbit
20001020ycx Oct 10, 2025
8ce8a38
addressing 2nd round of rabbit review
20001020ycx Oct 10, 2025
6faeba9
docstring fixes
20001020ycx Oct 10, 2025
466e242
add paging metadata to documentation
20001020ycx Oct 10, 2025
cccd8ec
nits
20001020ycx Oct 10, 2025
b7c3b87
update concurrency model docstring
20001020ycx Oct 10, 2025
9c6b881
supress linter
20001020ycx Oct 11, 2025
184c94e
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 12, 2025
07aa3f8
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 12, 2025
1c2d810
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 12, 2025
02c706f
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 12, 2025
da0b2e3
var renaming
20001020ycx Oct 12, 2025
142573a
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 12, 2025
6f426ca
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 12, 2025
c8816a5
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 12, 2025
702837c
address easy to fix comments
20001020ycx Oct 12, 2025
e267ec4
resolving conflicts
20001020ycx Oct 12, 2025
ed7765b
fix strings
20001020ycx Oct 12, 2025
9e73062
missed leading space
20001020ycx Oct 12, 2025
01c6a05
wrong docstring
20001020ycx Oct 12, 2025
67d183e
fix errors by latest change
20001020ycx Oct 12, 2025
ce744d7
trivial comment addressing
20001020ycx Oct 14, 2025
4fc5cc3
adding fixure test for unit test
20001020ycx Oct 14, 2025
23797fd
fix unit test var
20001020ycx Oct 14, 2025
0644fb7
fix rabbits change
20001020ycx Oct 14, 2025
662b47d
fix rabbit
20001020ycx Oct 14, 2025
1a53ad3
adding lock to enforce thread-safety
20001020ycx Oct 14, 2025
862aa42
done with impl
20001020ycx Oct 14, 2025
b79f681
sync up with chaoyue about async single thread concurrency
20001020ycx Oct 15, 2025
134591c
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
50e5db3
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
1cad6b0
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
2683bcf
Update components/clp-mcp-server/clp_mcp_server/server/server.py
20001020ycx Oct 16, 2025
22de748
code review format update
20001020ycx Oct 16, 2025
c934ac7
implement async garbage collector
20001020ycx Oct 16, 2025
4a2bd41
Update components/clp-mcp-server/clp_mcp_server/server/server.py
20001020ycx Oct 16, 2025
ad11e90
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
a1979d6
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
2193824
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
d6a7944
fix docstring
20001020ycx Oct 16, 2025
00f9503
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 16, 2025
9678909
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
5af4233
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
392af88
Update components/clp-mcp-server/clp_mcp_server/server/session_manage…
20001020ycx Oct 16, 2025
07f17a5
done with docstring comment
20001020ycx Oct 16, 2025
d4f6015
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 16, 2025
540e78d
done with async garbage collector
20001020ycx Oct 16, 2025
c480b33
move checking to session
20001020ycx Oct 16, 2025
c411ae0
fix docstring
20001020ycx Oct 16, 2025
2601008
Minor fix.
LinZhihao-723 Oct 16, 2025
e38f988
Merge branch '10-4-mcp-server-implementation' of https://github.com/2…
LinZhihao-723 Oct 16, 2025
08c80b3
Add instruction on how to run unit tests.
LinZhihao-723 Oct 16, 2025
1de6a46
fix unit tests
20001020ycx Oct 16, 2025
86184c9
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 16, 2025
a2a13b8
Minor fix.
LinZhihao-723 Oct 16, 2025
8a5c204
Minor renaming.
LinZhihao-723 Oct 16, 2025
9caab35
hoist
20001020ycx Oct 16, 2025
f7a6579
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 16, 2025
a54b019
update docstring of session manager class
20001020ycx Oct 16, 2025
046b6f6
fix ttl seconds
20001020ycx Oct 16, 2025
ebad693
Update unit tests.
LinZhihao-723 Oct 16, 2025
fa8dcc5
Merge branch '10-4-mcp-server-implementation' of https://github.com/2…
LinZhihao-723 Oct 16, 2025
5a12084
System Prompt
20001020ycx Oct 16, 2025
acf99a9
Merge branch '10-4-mcp-server-implementation' of github.com:20001020y…
20001020ycx Oct 16, 2025
3b7daca
Refactor page schema.
LinZhihao-723 Oct 16, 2025
7e7b321
Merge branch '10-4-mcp-server-implementation' of https://github.com/2…
LinZhihao-723 Oct 16, 2025
2b3f13a
Fix formating issue.
LinZhihao-723 Oct 16, 2025
c206f7b
Fix type annotation.
LinZhihao-723 Oct 16, 2025
f882168
Merge branch 'main' into 10-4-mcp-server-implementation
20001020ycx Oct 16, 2025
8e67e3d
Merge branch 'main' into 10-4-mcp-server-implementation
LinZhihao-723 Oct 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions components/clp-mcp-server/README.md
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an instruction for test case.
@coerabbitai can u create an issue as a feature request to add tasks and workflows to run unit tests for all uv components?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@coderabbitai can u create an issue for us?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Create a GitHub issue for adding tasks and workflows to run unit tests for all uv components

gh issue create \
  --repo y-scope/clp \
  --title "Feature: Add tasks and workflows to run unit tests for all uv components" \
  --body "## Feature Request

Add tasks and workflows to run unit tests for all uv components in the repository.

## Context

This feature request was raised during the review of PR #1401, which added unit testing capabilities to the clp-mcp-server component using \`uv test pytest\`. The need for standardized test execution across all uv-based components was identified.

## Requested By

@LinZhihao-723

## References

- PR: https://github.com/y-scope/clp/pull/1401
- Comment: https://github.com/y-scope/clp/pull/1401#discussion_r2437510679" \
  --assignee LinZhihao-723

Length of output: 42


@LinZhihao-723, I've created issue #1431 for this feature request. The issue covers adding tasks and workflows to run unit tests for all uv components in the repository, with references back to this PR and comment thread.

Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ For more details on Claude Desktop MCP setup, see the

> **Warning:** 🚧 This section is still under construction.

## Testing

Use the following command to run all unit tests:

```shell
uv test pytest
```
Comment on lines +52 to +58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify the test command syntax and update for consistency with PR objectives.

The command uv test pytest differs from the testing procedure documented in the PR objectives (uv run --group dev pytest tests/test_session_manager.py -v). The README should clarify whether to run all tests or specific test modules, and align with the project's actual testing setup.

Please verify the correct command by running the following script:

Additionally, please confirm:

  1. Is uv test pytest the correct and intended command?
  2. Should the README provide both a simple "run all tests" command and a more detailed example with specific test modules and verbose output?
  3. Does the project require the --group dev flag to access dev dependencies for testing?

Once verified, the README command should be updated to match the project's testing setup.


🏁 Script executed:

#!/bin/bash
# Verify uv test command syntax and check available test runners

# Check if uv supports 'test' subcommand
uv test --help 2>&1 | head -20

# List available uv commands
uv --version
uv --help | grep -i test

Length of output: 122


Understood. I'll gather information through alternative methods since the sandbox lacks the uv tool.





🏁 Script executed:

#!/bin/bash
# Search codebase for configuration files and existing uv command usage

# Look for configuration files
echo "=== Configuration files ==="
fd -type f -e toml -e lock | head -20

# Search for existing uv command patterns in markdown, scripts, and config files
echo -e "\n=== Existing uv commands in codebase ==="
rg -n "uv\s+(run|test|pytest)" --type md --type py --type toml -C 2

# Check if pyproject.toml defines test commands
echo -e "\n=== Content of pyproject.toml (if exists) ==="
fd -name pyproject.toml -type f -exec cat {} \;

# Search for test-related configuration
echo -e "\n=== Test command patterns ==="
rg -n "pytest|test" --type toml -C 1 | head -30

Length of output: 3065


🏁 Script executed:

#!/bin/bash
# Check the PR objectives or description for test setup guidance
fd -name "*.md" -type f | head -10
cat components/clp-mcp-server/README.md | grep -A 10 -B 5 -i "test\|pytest"

Length of output: 695


🌐 Web query:

What are the available commands in the `uv` package manager, specifically for running tests? Is there a `uv test` subcommand or is the correct syntax `uv run pytest`?

💡 Result:

Short answer: There is no built‑in top‑level uv test subcommand. Use uv run pytest (or uv run <script/name>) — or define a script (tool.uv.scripts.test = "pytest") and run it via uv run test. [1][2][3]

Sources:

  • uv docs (run examples & run for testing). [1]
  • Hypermodern cookiecutter / uv guide (explicit examples: uv run pytest). [2]
  • Community guide showing uv run, dependency groups and uv run --dev pytest. [3]


---

🏁 Script executed:

```shell
#!/bin/bash
# Fetch the pyproject.toml for clp-mcp-server to check for script definitions
cat components/clp-mcp-server/pyproject.toml

Length of output: 2457


Corriger la syntaxe de la commande de test — uv test pytest n'existe pas.

La commande uv test pytest est syntaxiquement invalide. L'outil uv ne dispose pas de sous-commande test. Selon la documentation officielle d'uv et les patterns existants du projet (voir docs/src/dev-docs/testing/integration-tests.md), la syntaxe correcte est uv run pytest.

Puisque les outils pytest se trouvent dans le groupe de dépendances dev du projet, la commande devrait être :

uv run --group dev pytest

Cela s'aligne également avec l'approche documentée dans les objectifs de la RP : uv run --group dev pytest tests/test_session_manager.py -v.

🤖 Prompt for AI Agents
In components/clp-mcp-server/README.md around lines 52 to 58, the test command
shown is invalid ("uv test pytest"); update the instructions to use the correct
uv CLI invocation by replacing that line with the documented command using the
dev dependency group: use "uv run --group dev pytest" (or "uv run pytest" if not
specifying group), and ensure the README example matches the project's
documented pattern (e.g., "uv run --group dev pytest
tests/test_session_manager.py -v").


[claude-desktop]: https://claude.ai/download
[claude-desktop-mcp-doc]: https://modelcontextprotocol.io/docs/develop/connect-local-servers
[mcp]: https://modelcontextprotocol.io/docs/getting-started/intro
Expand Down
26 changes: 26 additions & 0 deletions components/clp-mcp-server/clp_mcp_server/server/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Constants for CLP MCP server."""

EXPIRED_SESSION_SWEEP_INTERVAL_SECONDS = 600 # 10 minutes
NUM_ITEMS_PER_PAGE = 10
MAX_CACHED_RESULTS = 1000
SESSION_TTL_MINUTES = 60

SERVER_NAME = "clp-mcp-server"
SYSTEM_PROMPT = """
You are an AI assistant that helps users query a log database using KQL (Kibana Query Language).
When given a user query, you should generate a KQL query that accurately captures the user's
intent. The KQL query should be as specific as possible to minimize the number of log messages
returned.
You should also consider the following guidelines when generating KQL queries:
- Use specific field names and values to narrow down the search.
- Avoid using wildcards (*) unless absolutely necessary, as they can lead to large result
sets.
- Use logical operators (AND, OR, NOT) to combine multiple conditions.
- Consider the time range of the logs you are searching. If the user specifies a time range,
include it in the KQL query.
- If the user query is ambiguous or lacks detail, ask clarifying questions to better
understand their intent before generating the KQL query.
- Always ensure that the generated KQL query is syntactically correct and can be executed
without errors.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are u sure this would work? The raw string will treat a newline as a line break, so a prompt sentence will be split into multiple lines.

"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored in the following way:

Suggested change
SYSTEM_PROMPT = """
You are an AI assistant that helps users query a log database using KQL (Kibana Query Language).
When given a user query, you should generate a KQL query that accurately captures the user's
intent. The KQL query should be as specific as possible to minimize the number of log messages
returned.
You should also consider the following guidelines when generating KQL queries:
- Use specific field names and values to narrow down the search.
- Avoid using wildcards (*) unless absolutely necessary, as they can lead to large result
sets.
- Use logical operators (AND, OR, NOT) to combine multiple conditions.
- Consider the time range of the logs you are searching. If the user specifies a time range,
include it in the KQL query.
- If the user query is ambiguous or lacks detail, ask clarifying questions to better
understand their intent before generating the KQL query.
- Always ensure that the generated KQL query is syntactically correct and can be executed
without errors.
"""
SYSTEM_PROMPT = (
"You are an AI assistant that helps users query a log database using KQL (Kibana Query Language)."
" You should generate a KQL query that accurately expresses the user's intent. The generated KQL"
" query should be as specific as possible to minimize the number of log messages returned.\n\n"
"You should consider the following guidelines to generate KQL queries efficiently:\n"
"- Use specific field names and values to narrow down the search.\n"
"- Avoid using wildcards (`*`) unless absolutely necessary, as they can lead to large result"
" sets.\n"
"- Use logical operators (`AND`, `OR`, `NOT`) to combine one or more key-value searches.\n"
"- Consider the time range of the logs you are searching. If the user specifies a time range,"
" include it in the KQL query.\n"
"- If the user query is ambiguous or lacks detail, ask clarifying questions to better understand"
" their intent before generating the KQL query.\n"
"- Always ensure that the generated KQL query is syntactically correct and can be executed without"
" errors."
)

A few questions about this prompt, may also need inputs from @kirkrodrigues and @junhaoliao.

  • Since our KQL is slightly different from the standard version, shall we mention the syntax first? For example:
    • Use double quotes for string values.
    • Escape special characters.
  • We didn't mention auto-generated keys. Would this make the agent only query user-generated keys, which may miss some important log-event-level metadata?
  • We didn't mention file-level metadata query.
  • When we say narrow down the timestamp, we didn't give rules to:
    • In what granularity should the timestamp be queried? We should maybe explain the timsetamp is Unix Epoch in milliseconds (or nanoseconds with Devin's latest change).
    • How to deal with the timezone? We don't have timezone information tracked in the archive now. Without some grace offsets applied, the agent could generate a query that doesn't fall into the actual expected timestamp range.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with everything you mentioned except:

  • Afaik, we aren't currently storing file-level metadata in clp-json (although clp-s supports storing it), so perhaps we should defer this.
  • We can tell the LLM that we only want queries in UTC time, right?

Some additional points:

  • I think it might also be useful to tell the LLM that we don't support fuzzy matches (otherwise it might expect that, for example, "system error" can match "system shutdown error").
  • How does the LLM know what key to use for the timestamp?
  • Do we really need the point about ensuring the KQL query is syntactically correct? Does it actually improve the accuracy of the generated query?

Copy link
Member

@LinZhihao-723 LinZhihao-723 Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iirc, we don't support parsing timestamps as UTC yet. When users give a timestamp range, LLM probably also doesn't know which timezone it should use to apply the offset.
I think a safe range should be: suppose lower_bound and upper_bound are given, we add a grace padding to search:

  • real_lower_bound = lower_bound - 12hours
  • real_upper_bound = upper_bound + 14hours

But this might make the range too wild. On the other hand, I'm not sure if AI can understand how to properly apply the offset if we give it a concrete timezone.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's my opinion to each point you have mentioned:

  • agreed, I wasn't aware of this, is there any documentation defining what specific KQL we are supporting?
  • I don't have any background for auto-generated key? Are they just some other fields in the returned response? In this case, I think LLM may learn from the returned result right?
  • In addition to what Kirk said about file-level metadata, the current suite of API does not take in any file-level metadata (e.g file name), so information about the file is not in the interest of the current release.
  • I am having trouble understanding why timezone is important here. Say agent issues substring_seach ("error"), and in the receiving response, each log message is accompanied by a timestamp, say Toronto time or Unix epoch. LLM would learn what is the format here, and issue the next substring search with a timestamp that is around the point of interest. For example, if the timestamp is Unix epoch, and the error message that the LLM is interested in has a timestamp of Unix epoch 1234, then the next request LLM would issue is substring_search("some other related keyword", 1000, 1500), because it realizes a timestamp is Unix epoch rather a UTC timestamp.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • https://docs.yscope.com/clp/main/user-docs/reference-json-search-syntax.html
  • Background. Auto-generated keys won't be returned in the response but they are query-able. Thinking about it, since we don't know what autogenerated keys the compressed logs will have, at this point, we probably can't say anything in the API. In the future, perhaps we can have better support.
  • Regarding timezones, @LinZhihao-723 can you provide an example log input and output that demonstrates the issue? I'm a little unclear as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offline, we will leave the system prompt improvements for next PRs. @coderabbitai can u create two issues:

  • The first one to keep track of updating this system prompt to cover the issues above.
  • The second one to ask us to update CLP doc to state that the current CLP is timezoneless. Users should use timezoneless timestamp in queries.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash

# Create first issue for system prompt improvements
gh issue create \
  --title "Improve CLP MCP Server system prompt to cover KQL-specific guidance" \
  --body "## Context
This issue tracks improvements to the \`SYSTEM_PROMPT\` constant in \`components/clp-mcp-server/clp_mcp_server/server/constants.py\` to provide more comprehensive guidance for LLM query generation.

## Background
Discussion in PR #1401: https://github.com/y-scope/clp/pull/1401#discussion_r2434136722

## Improvements to address:

1. **KQL Syntax specifics**: Mention CLP's specific KQL syntax
   - Use double quotes for string values
   - Escape special characters
   - Reference: https://docs.yscope.com/clp/main/user-docs/reference-json-search-syntax.html

2. **Auto-generated keys**: Explain handling of auto-generated keys that are queryable but not returned in responses

3. **File-level metadata**: Consider if/how to mention file-level metadata queries (pending clp-json support)

4. **Timestamp handling**:
   - Specify timestamp granularity (Unix epoch in milliseconds/nanoseconds)
   - Clarify which key to use for timestamps
   - Address timezone considerations (see related issue about timezoneless timestamps)

5. **Fuzzy match support**: Clarify that CLP doesn't support fuzzy matches (e.g., \"system error\" won't match \"system shutdown error\")

6. **Query validation**: Evaluate if the current point about syntactic correctness actually improves query accuracy

## Related
- PR #1401
- User: @20001020ycx" \
  --assignee LinZhihao-723

# Create second issue for CLP documentation update
gh issue create \
  --title "Document that CLP is timezoneless and requires timezoneless timestamps in queries" \
  --body "## Context
This issue tracks the need to update CLP documentation to clarify that CLP is timezoneless and users should use timezoneless timestamps in their queries.

## Background
Discussion in PR #1401: https://github.com/y-scope/clp/pull/1401#discussion_r2434136722

## Required changes:

Update CLP documentation to explicitly state:
- CLP is timezoneless (does not track timezone information in archives)
- Users should use timezoneless timestamps in their queries
- Clarify the timestamp format expected (Unix epoch in milliseconds/nanoseconds)
- Consider documenting best practices for handling timezone conversions client-side

## Rationale
Without clear documentation, users and LLMs may apply incorrect timezone offsets, resulting in queries that don't match the expected timestamp range. Making this explicit will help users understand how to properly query time-based data.

## Related
- PR #1401
- User: @20001020ycx" \
  --assignee LinZhihao-723

Length of output: 85


@LinZhihao-723, I've created the two issues as requested:

  1. Issue Improve CLP MCP Server system prompt to cover KQL-specific guidance #1432: Improve CLP MCP Server system prompt to cover KQL-specific guidance

    • Tracks improvements to the SYSTEM_PROMPT constant covering KQL syntax specifics, auto-generated keys, file-level metadata, timestamp handling, fuzzy match support, and query validation
  2. Issue Document that CLP is timezoneless and requires timezoneless timestamps in queries #1433: Document that CLP is timezoneless and requires timezoneless timestamps in queries

    • Tracks the documentation update to clarify that CLP is timezoneless and users should use timezoneless timestamps in their queries

Both issues have been assigned to you and include backlinks to this PR and the discussion for context.

67 changes: 37 additions & 30 deletions components/clp-mcp-server/clp_mcp_server/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,10 @@

from typing import Any

from fastmcp import FastMCP
from fastmcp import Context, FastMCP


class ProtocolConstant:
"""Constants for the CLP MCP Server."""

SERVER_NAME = "clp-mcp-server"

# Tool names
TOOL_HELLO_WORLD = "hello_world"
TOOL_GET_SERVER_INFO = "get_server_info"

@classmethod
def get_capabilities(cls) -> list[str]:
"""
Gets the capabilities of the server.
:return: A list of tool names supported by the server.
"""
return [cls.TOOL_HELLO_WORLD, cls.TOOL_GET_SERVER_INFO]
from . import constants
from .session_manager import SessionManager


def create_mcp_server() -> FastMCP:
Expand All @@ -31,22 +16,44 @@ def create_mcp_server() -> FastMCP:
:raise: Propagates `FastMCP.__init__`'s exceptions.
:raise: Propagates `FastMCP.tool`'s exceptions.
"""
mcp = FastMCP(name=ProtocolConstant.SERVER_NAME)
mcp = FastMCP(name=constants.SERVER_NAME)

session_manager = SessionManager(session_ttl_minutes=constants.SESSION_TTL_MINUTES)

@mcp.tool()
def get_server_info() -> dict[str, Any]:
@mcp.tool
async def get_instructions(ctx: Context) -> str:
"""
Gets the MCP server's information.
Gets a pre-defined "system prompt" that guides the LLM behavior.
This function must be invoked before any other `FastMCP.tool`.

:return: The server's information with a list of capabilities.
:param ctx: The `FastMCP` context containing the metadata of the underlying MCP session.
:return: A string of "system prompt".
"""
return {
"name": ProtocolConstant.SERVER_NAME,
"capabilities": ProtocolConstant.get_capabilities(),
"status": "running",
}
await session_manager.start()
return session_manager.get_or_create_session(ctx.session_id).get_instructions()

@mcp.tool
async def get_nth_page(page_index: int, ctx: Context) -> dict[str, Any]:
"""
Retrieves the n-th page of a paginated response with the paging metadata from the previous
query.

:param page_index: Zero-based index, e.g., 0 for the first page.
:param ctx: The `FastMCP` context containing the metadata of the underlying MCP session.
:return: A dictionary containing the following key-value pairs on success:
- "items": A list of log entries in the requested page.
- "total_pages": Total number of pages available from the query as an integer.
- "total_items": Total number of log entries available from the query as an integer.
- "num_items_per_page": Number of log entries per page.
- "has_next": Whether a page exists after the returned one.
- "has_previous": Whether a page exists before the returned one.
:return: A dictionary with the following key-value pair on failures:
- "Error": An error message describing the failure.
"""
await session_manager.start()
return session_manager.get_nth_page(ctx.session_id, page_index)

@mcp.tool()
@mcp.tool
def hello_world(name: str = "clp-mcp-server user") -> dict[str, Any]:
"""
Provides a simple hello world greeting.
Expand All @@ -56,7 +63,7 @@ def hello_world(name: str = "clp-mcp-server user") -> dict[str, Any]:
"""
return {
"message": f"Hello World, {name.strip()}!",
"server": ProtocolConstant.SERVER_NAME,
"server": constants.SERVER_NAME,
"status": "running",
}

Expand Down
242 changes: 242 additions & 0 deletions components/clp-mcp-server/clp_mcp_server/server/session_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
"""Session management for CLP MCP Server."""

import asyncio
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from typing import Any, ClassVar

from paginate import Page

from . import constants


class PaginatedQueryResult:
"""Paginates the cached log entries returned from a query's response."""

def __init__(self, result_log_entries: list[str], num_items_per_page: int) -> None:
"""
:param result_log_entries: List of cached log entries to paginate.
:param num_items_per_page:
:raise: ValueError if the number of cached results or num_items_per_page is invalid.
"""
if len(result_log_entries) > constants.MAX_CACHED_RESULTS:
err_msg = (
"PaginatedQueryResult exceeds maximum allowed cached results:"
f" {len(result_log_entries)} > {constants.MAX_CACHED_RESULTS}."
)
raise ValueError(err_msg)

if num_items_per_page <= 0:
err_msg = (
f"Invalid num_items_per_page: {num_items_per_page}, it must be a positive integer."
)
raise ValueError(err_msg)

self.num_items_per_page = num_items_per_page

self._num_pages = (len(result_log_entries) + num_items_per_page - 1) // num_items_per_page
self._result_log_entries = result_log_entries

def get_page(self, page_index: int) -> Page | None:
"""
:param page_index: The number of page to retrieve (zero-based index; 0 is the first page).
:return: A `Page` object for the specified page.
:return: None if `page_index` is out of bounds.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Docstring phrasing: use “page index” consistently.

Tighten wording and keep zero-based note consistent across methods.

-        :param page_index: The number of page to retrieve (zero-based index; 0 is the first page).
+        :param page_index: The page index to retrieve (zero-based; 0 is the first page).

Apply the same change in SessionState.get_page_data and SessionManager.get_nth_page.

Also applies to: 102-107, 224-226

🤖 Prompt for AI Agents
components/clp-mcp-server/clp_mcp_server/server/session_manager.py around lines
47-49 (and also update the similar docstrings at 102-107 and 224-226): the
docstrings use inconsistent phrasing for the page index and duplicate/verbose
return lines; change all occurrences to use the phrase "page index" (two words)
consistently, tighten the wording to one :param line like ":param page_index:
zero-based page index (0 is the first page)." and one :return line like
":return: A Page object for the specified page, or None if the page index is out
of bounds." Apply this same wording in SessionState.get_page_data and
SessionManager.get_nth_page docstrings at the indicated line ranges.

"""
# Convert zero-based to one-based
page_number = page_index + 1
if page_number <= 0 or self._num_pages < page_number:
return None

return Page(
self._result_log_entries,
page=page_number,
items_per_page=self.num_items_per_page,
)
Comment on lines 47 to 54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Allow empty result sets to return page 0 as an empty page.

When there are zero cached results, _num_pages is 0, so requesting page_index=0 (which becomes page_number=1) fails the check self._num_pages < page_number and returns None. This causes callers to report "Page index is out of bounds" even though the query succeeded with zero results. Consider special-casing empty results to return an empty Page with correct metadata.

Apply this diff:

         # Convert zero-based to one-based
         page_number = page_index + 1
+        
+        # Special-case empty results: allow page 0 to return an empty Page
+        if self._num_pages == 0:
+            if page_index == 0:
+                return Page([], page=1, items_per_page=self.num_items_per_page)
+            return None
+        
         if page_number <= 0 or self._num_pages < page_number:
             return None

Based on past review comments.

Committable suggestion skipped: line range outside the PR's diff.



@dataclass
class SessionState:
"""
Represents the state of a user session.
`SessionState` respects `FastMCP` asynchronous concurrency model that is built on Python's
asyncio runtime:
Asyncio achieves concurrency on a single thread by allowing tasks to yield control at `await`
points. This means when a `mcp.tool` API calls executes an await expression, it gets suspended,
and the event loop executes the next `mcp.tool` API calls from received requests issued by
clients.
Therefore, `SessionState` does not need thread-safe operations because a session cannot be
accessed from two threads at the same time.
"""

session_id: str
_num_items_per_page: int
_session_ttl_minutes: int

is_instructions_retrieved: bool = False
last_accessed: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
_cached_query_result: PaginatedQueryResult | None = None

_GET_INSTRUCTIONS_NOT_RUN_ERROR: ClassVar[dict[str, str]] = {
"Error": "Please call get_instructions() first to understand how to use this MCP server."
}

def get_instructions(self) -> str:
"""
Gets a pre-defined "system prompt" that guides the LLM behavior.
:return: A string of "system prompt".
"""
self.is_instructions_retrieved = True
return constants.SYSTEM_PROMPT

def cache_query_result_and_get_first_page(
self,
results: list[str],
) -> dict[str, Any]:
"""
:param results: Log entries from the query to cache.
:return: Forwards `SessionState.get_page_data`'s return values.
:return: _GET_INSTRUCTIONS_NOT_RUN_ERROR if `get_instructions` has not been called in this
session.
"""
if self.is_instructions_retrieved is False:
return self._GET_INSTRUCTIONS_NOT_RUN_ERROR.copy()

self._cached_query_result = PaginatedQueryResult(
result_log_entries=results, num_items_per_page=self._num_items_per_page
)

return self.get_page_data(0)
Comment on lines 92 to 96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle oversized caches instead of propagating ValueError.

Line 97 constructs PaginatedQueryResult without guarding ValueError. That exception bubbles up to the MCP tool and crashes the call whenever a query returns more than MAX_CACHED_RESULTS, contradicting the “cache up to 1 000 log entries” contract. Catch the ValueError and return a structured "Error" payload.

-        self._cached_query_result = PaginatedQueryResult(
-            result_log_entries=results, num_items_per_page=self._num_items_per_page
-        )
-
-        return self.get_page_data(0)
+        try:
+            self._cached_query_result = PaginatedQueryResult(
+                result_log_entries=results,
+                num_items_per_page=self._num_items_per_page,
+            )
+        except ValueError as exc:
+            return {"Error": str(exc)}
+
+        return self.get_page_data(0)
🤖 Prompt for AI Agents
In components/clp-mcp-server/clp_mcp_server/server/session_manager.py around
lines 97 to 101, constructing PaginatedQueryResult can raise ValueError for
oversized result sets; wrap that construction in a try/except ValueError, and in
the except branch avoid propagating the exception—optionally clear or leave
_cached_query_result unset and return a structured error payload (e.g. a dict
with type "Error" and a clear message about exceeding MAX_CACHED_RESULTS).
Ensure you still return the same shape the caller expects (not an exception) and
log the condition for observability.


def get_page_data(self, page_index: int) -> dict[str, Any]:
"""
Retrieves the n-th page of a paginated response with the paging metadata from the previous
query.
NOTE: This docstring must be synchronized with `get_nth_page`'s MCP tool call.
:param page_index: Zero-based index, e.g., 0 for the first page.
:return: A dictionary containing the following key-value pairs on success:
- "items": A list of log entries in the requested page.
- "total_pages": Total number of pages available from the query as an integer.
- "total_items": Total number of log entries available from the query as an integer.
- "num_items_per_page": Number of log entries per page.
- "has_next": Whether a page exists after the returned one.
- "has_previous": Whether a page exists before the returned one.
:return: A dictionary with the following key-value pair on failures:
- "Error": An error message describing the failure.
:return: _GET_INSTRUCTIONS_NOT_RUN_ERROR if `get_instructions` has not been called in this
session.
"""
if self.is_instructions_retrieved is False:
return self._GET_INSTRUCTIONS_NOT_RUN_ERROR.copy()

if self._cached_query_result is None:
return {"Error": "No previous paginated response in this session."}

page = self._cached_query_result.get_page(page_index)
if page is None:
return {"Error": "Page index is out of bounds."}

return {
"items": list(page),
"total_pages": page.page_count,
"total_items": page.item_count,
"num_items_per_page": page.items_per_page,
"has_next": page.next_page is not None,
"has_previous": page.previous_page is not None,
Comment on lines 133 to 143
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Return an empty first page when there are zero cached results.

When a search yields no hits, _num_pages is 0, so requesting index 0 returns None and we emit “Page index is out of bounds.” even though the query succeeded. Please special-case the zero-results path so page 0 returns an empty payload with the expected metadata.

-        page = self._cached_query_result.get_page(page_index)
+        cached_result = self._cached_query_result
+
+        if cached_result._num_pages == 0:
+            if page_index != 0:
+                return {"Error": "Page index is out of bounds."}
+            return {
+                "items": [],
+                "total_pages": 0,
+                "total_items": 0,
+                "num_items_per_page": cached_result.num_items_per_page,
+                "has_next": False,
+                "has_previous": False,
+            }
+
+        page = cached_result.get_page(page_index)
🤖 Prompt for AI Agents
In components/clp-mcp-server/clp_mcp_server/server/session_manager.py around
lines 125 to 135, the current code returns an "out of bounds" error when
requesting page 0 while there are zero cached results; instead detect the
zero-results case (e.g., _cached_query_result.item_count or
_cached_query_result.page_count == 0) and when page_index == 0 return an empty
page payload: items as an empty list, total_pages 0, total_items 0,
num_items_per_page 0 (or the configured page size if available), and
has_next/has_previous both False; otherwise keep the existing out-of-bounds
behavior for other invalid indexes.

}
Comment on lines 130 to 144
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Return an empty page when the cached result set is empty.

Lines 135-149 treat every zero-length cache as “Page index is out of bounds.” With _num_pages == 0, get_page(0) returns None, so even the first page of a successful empty query yields an error. Return a proper empty page payload for page_index == 0.

-        page = self._cached_query_result.get_page(page_index)
-        if page is None:
-            return {"Error": "Page index is out of bounds."}
-
-        return {
+        cached_result = self._cached_query_result
+
+        if cached_result._num_pages == 0:
+            if page_index != 0:
+                return {"Error": "Page index is out of bounds."}
+            return {
+                "items": [],
+                "total_pages": 0,
+                "total_items": 0,
+                "num_items_per_page": self._num_items_per_page,
+                "has_next": False,
+                "has_previous": False,
+            }
+
+        page = cached_result.get_page(page_index)
+        if page is None:
+            return {"Error": "Page index is out of bounds."}
+
+        return {
             "items": list(page),
             "total_pages": page.page_count,
             "total_items": page.item_count,
             "num_items_per_page": page.items_per_page,
             "has_next": page.next_page is not None,
             "has_previous": page.previous_page is not None,
         }
🤖 Prompt for AI Agents
In components/clp-mcp-server/clp_mcp_server/server/session_manager.py around
lines 135-149, the code treats a zero-length cached_query_result as
out-of-bounds because get_page(0) returns None; update the logic to special-case
an empty cached result when page_index == 0 and return a proper empty page
payload instead of an error. Specifically: if _cached_query_result is present
but _cached_query_result._num_pages == 0 and page_index == 0, return a dict with
"items": [], "total_pages": 0, "total_items": 0, "num_items_per_page": 0 (or the
cached default if available), "has_next": False, and "has_previous": False;
otherwise keep the existing out-of-bounds handling for other page_index values.


def is_expired(self) -> bool:
""":return: Whether the session has expired."""
time_diff = datetime.now(timezone.utc) - self.last_accessed
return time_diff > timedelta(minutes=self._session_ttl_minutes)

def update_access_time(self) -> None:
"""Updates the last accessed timestamp."""
self.last_accessed = datetime.now(timezone.utc)


class SessionManager:
"""
Session manager for concurrent user sessions.
`SessionManager` respects `FastMCP` asynchronous concurrency model that is built on Python's
asyncio runtime:
Asyncio achieves concurrency on a single thread by allowing tasks to yield control at `await`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explanation of the async concurrency within a single thread is duplicated in both SessionManger and SessionState. This does not seems to be a good practice. Maybe we can have a referral, I wonder what is the best practice? Should we move this to server.py where the mcp server is created?

points. This means when a `mcp.tool` API calls executes an await expression, it gets suspended,
and the event loop executes the next `mcp.tool` API calls from received requests issued by
clients.
The operations on `sessions` which maintains concurrent user sessions are performed by
asynchronous tasks. The above concurrency model guarantees that the operations of `sessions`
are atomic because it cannot be accessed from two threads at the same time.
"""

def __init__(self, session_ttl_minutes: int) -> None:
""":param session_ttl_minutes: Session time-to-live in minutes."""
self.sessions: dict[str, SessionState] = {}
self._session_ttl_minutes = session_ttl_minutes
self._cleanup_task: asyncio.Task | None = None

async def start(self) -> None:
"""Starts the asynchronous cleanup task."""
if self._cleanup_task is None:
self._cleanup_task = asyncio.create_task(self._cleanup_loop())

async def _cleanup_loop(self) -> None:
"""Cleans up all expired sessions periodically as an async task."""
while True:
await asyncio.sleep(constants.EXPIRED_SESSION_SWEEP_INTERVAL_SECONDS)
self.cleanup_expired_sessions()

def cleanup_expired_sessions(self) -> None:
"""Cleans up all expired sessions."""
expired_sessions = [sid for sid, session in self.sessions.items() if session.is_expired()]

for sid in expired_sessions:
del self.sessions[sid]

def cache_query_result_and_get_first_page(
self, session_id: str, query_results: list[str]
) -> dict[str, Any]:
"""
:param session_id:
:param query_results: Log entries from the query to cache.
:return: Forwards `SessionState.cache_query_result_and_get_first_page`'s return values.
"""
session = self.get_or_create_session(session_id)

return session.cache_query_result_and_get_first_page(results=query_results)

def get_nth_page(self, session_id: str, page_index: int) -> dict[str, Any]:
"""
Retrieves the n-th page of a paginated response with the paging metadata from the previous
query.
:param session_id:
:param page_index: Index of the page to retrieve (zero-based, e.g., 0 is the first page).
:return: Forwards `SessionState.get_page_data`'s return values.
"""
session = self.get_or_create_session(session_id)

return session.get_page_data(page_index)

def get_or_create_session(self, session_id: str) -> SessionState:
"""
Gets an existing session or creates a new one.
:param session_id:
:return: The `SessionState` object for the given `session_id`.
"""
if session_id in self.sessions and self.sessions[session_id].is_expired():
del self.sessions[session_id]

if session_id not in self.sessions:
self.sessions[session_id] = SessionState(
session_id, constants.NUM_ITEMS_PER_PAGE, self._session_ttl_minutes
)

session = self.sessions[session_id]

session.update_access_time()
return session
Loading
Loading