Skip to content

Commit d9caa76

Browse files
feat(ui): Filter UUID files from Consolidated Files view
- Filter out files with UUID-like identifiers (8-4-4-4-12 format) from the Consolidated Files section, showing only domain-named files from crawling - Improve error handling in MCP server to create default metadata with "pages" array when JSON files don't exist - Add logging to track filtered files count for debugging This change improves the user experience by showing only relevant crawled files in the Consolidated Files section, making it easier to find and manage content. The UUID-based files (typically auto-generated) are now hidden, while domain-named files (created through the crawling process) remain visible.
1 parent 7590a12 commit d9caa76

File tree

2 files changed

+102
-12
lines changed

2 files changed

+102
-12
lines changed

app/api/storage/route.ts

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,26 +59,71 @@ export async function GET(request: Request) {
5959
const jsonContent = await fs.readFile(jsonPath, 'utf-8')
6060
const metadata = JSON.parse(jsonContent)
6161

62-
// If the metadata has a "pages" array, it's a consolidated file
63-
if (metadata.pages && Array.isArray(metadata.pages)) {
62+
// If the metadata has a "pages" array or is_consolidated flag, it's a consolidated file
63+
if ((metadata.pages && Array.isArray(metadata.pages)) || metadata.is_consolidated === true) {
6464
isConsolidated = true
65-
pagesCount = metadata.pages.length
65+
pagesCount = metadata.pages ? metadata.pages.length : 1
6666
rootUrl = metadata.root_url || ''
6767
}
6868
} catch (e) {
6969
console.error(`Error reading JSON metadata for ${filename}:`, e)
70+
// Create a default metadata file if it doesn't exist or is invalid
71+
try {
72+
const defaultMetadata = {
73+
title: `Documentation for ${filename.replace('.md', '')}`,
74+
timestamp: new Date().toISOString(),
75+
pages: [
76+
{
77+
title: "Main Content",
78+
url: `file://${filename.replace('.md', '')}`,
79+
timestamp: new Date().toISOString(),
80+
internal_links: 0,
81+
external_links: 0
82+
}
83+
],
84+
is_consolidated: true,
85+
last_updated: new Date().toISOString()
86+
}
87+
await fs.writeFile(jsonPath, JSON.stringify(defaultMetadata, null, 2), 'utf-8')
88+
console.log(`Created default metadata for ${filename}`)
89+
isConsolidated = true
90+
pagesCount = 1
91+
} catch (writeError) {
92+
console.error(`Error creating default metadata for ${filename}:`, writeError)
93+
}
7094
}
7195
} else {
7296
// Create JSON file if it doesn't exist
73-
const jsonContent = JSON.stringify({
74-
content,
75-
metadata: {
76-
wordCount: content.split(/\s+/).length,
77-
charCount: content.length,
78-
timestamp: stats.mtime
97+
try {
98+
// Create a consolidated metadata file by default
99+
const defaultMetadata = {
100+
title: `Documentation for ${filename.replace('.md', '')}`,
101+
timestamp: new Date().toISOString(),
102+
content,
103+
pages: [
104+
{
105+
title: "Main Content",
106+
url: `file://${filename.replace('.md', '')}`,
107+
timestamp: new Date().toISOString(),
108+
internal_links: 0,
109+
external_links: 0
110+
}
111+
],
112+
is_consolidated: true,
113+
last_updated: new Date().toISOString(),
114+
metadata: {
115+
wordCount: content.split(/\s+/).length,
116+
charCount: content.length,
117+
timestamp: stats.mtime
118+
}
79119
}
80-
}, null, 2)
81-
await fs.writeFile(jsonPath, jsonContent, 'utf-8')
120+
await fs.writeFile(jsonPath, JSON.stringify(defaultMetadata, null, 2), 'utf-8')
121+
console.log(`Created consolidated metadata for ${filename}`)
122+
isConsolidated = true
123+
pagesCount = 1
124+
} catch (writeError) {
125+
console.error(`Error creating metadata for ${filename}:`, writeError)
126+
}
82127
}
83128

84129
// Extract sections to count how many pages are included
@@ -153,9 +198,22 @@ export async function GET(request: Request) {
153198
// Only show consolidated files in the Stored Files section
154199
const consolidatedFiles = allFiles.filter(file => file.isConsolidated)
155200

201+
// Additional filter to exclude files with UUID-like names
202+
// UUID pattern: 8-4-4-4-12 hex digits (e.g., 095104d8-8e90-48f0-8670-9e45c914f115)
203+
const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
204+
205+
// Keep only files with domain-like names (e.g., docs_crawl4ai_com)
206+
// These are files created through the crawling process
207+
const crawledFiles = consolidatedFiles.filter(file => {
208+
// Check if the filename is NOT a UUID
209+
return !uuidPattern.test(file.name)
210+
})
211+
212+
console.log(`Found ${consolidatedFiles.length} consolidated files, ${crawledFiles.length} are crawled files`)
213+
156214
return NextResponse.json({
157215
success: true,
158-
files: consolidatedFiles
216+
files: crawledFiles
159217
})
160218
}
161219

fast-markdown-mcp/src/fast_markdown_mcp/server.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,38 @@ async def get_metadata(self, file_id: str) -> dict:
9595
try:
9696
with open(file_path, 'r', encoding='utf-8') as f:
9797
return json.load(f)
98+
except FileNotFoundError:
99+
# Create a default metadata file with a pages array if the MD file exists
100+
md_file_path = self.base_path / f"{file_id}.md"
101+
if md_file_path.exists():
102+
try:
103+
content = md_file_path.read_text(encoding='utf-8')
104+
# Create a basic metadata structure with pages array to mark as consolidated
105+
from datetime import datetime
106+
default_metadata = {
107+
"title": f"Documentation for {file_id}",
108+
"timestamp": datetime.now().isoformat(),
109+
"pages": [
110+
{
111+
"title": "Main Content",
112+
"url": f"file://{file_id}",
113+
"timestamp": datetime.now().isoformat(),
114+
"internal_links": 0,
115+
"external_links": 0
116+
}
117+
],
118+
"is_consolidated": True,
119+
"last_updated": datetime.now().isoformat()
120+
}
121+
# Write the default metadata to file
122+
with open(file_path, 'w', encoding='utf-8') as f:
123+
json.dump(default_metadata, f, indent=2)
124+
logger.info(f"Created default metadata for {file_id}")
125+
return default_metadata
126+
except Exception as write_error:
127+
logger.error(f"Error creating default metadata for {file_id}: {write_error}")
128+
logger.error(f"Error reading metadata for {file_id}: File not found")
129+
return {}
98130
except Exception as e:
99131
logger.error(f"Error reading metadata for {file_id}: {e}")
100132
return {}

0 commit comments

Comments
 (0)