Skip to content

Commit 0258d0a

Browse files
fixes that are more docker based networking
1 parent d3eb888 commit 0258d0a

36 files changed

+8830
-162
lines changed

.roo/mcp.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"mcpServers": {}
3+
}

app/api/all-files/route.ts

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,21 @@ import path from 'path'
44

55
const STORAGE_DIR = path.join(process.cwd(), 'storage/markdown')
66

7+
// Define the structure for file details returned by this API
8+
interface FileDetails {
9+
name: string;
10+
jsonPath: string;
11+
markdownPath: string;
12+
timestamp: Date;
13+
size: number;
14+
wordCount: number;
15+
charCount: number;
16+
isConsolidated: boolean;
17+
pagesCount: number;
18+
rootUrl: string;
19+
isInMemory: boolean;
20+
}
21+
722
export async function GET(request: Request) {
823
try {
924
// Only get .md files
@@ -87,34 +102,10 @@ export async function GET(request: Request) {
87102
metadata?: any;
88103
}
89104

90-
// Get in-memory files from the backend
91-
let memoryFiles = []
92-
try {
93-
const memoryResponse = await fetch('http://backend:24125/api/memory-files')
94-
if (memoryResponse.ok) {
95-
const memoryData = await memoryResponse.json()
96-
if (memoryData.success && Array.isArray(memoryData.files)) {
97-
// Convert in-memory files to the same format as disk files
98-
memoryFiles = memoryData.files
99-
.filter((file: MemoryFile) => !file.isJson) // Only include markdown files
100-
.map((file: MemoryFile) => ({
101-
name: file.name,
102-
jsonPath: file.path.replace('.md', '.json'),
103-
markdownPath: file.path,
104-
timestamp: new Date(file.timestamp),
105-
size: file.size,
106-
wordCount: file.wordCount,
107-
charCount: file.charCount,
108-
isConsolidated: false,
109-
pagesCount: 1,
110-
rootUrl: '',
111-
isInMemory: true
112-
}))
113-
}
114-
}
115-
} catch (e) {
116-
console.error('Error fetching in-memory files:', e)
117-
}
105+
// Removed fetch for /api/memory-files as the endpoint no longer exists in the backend.
106+
// The concept of separate "memory files" fetched via API is deprecated.
107+
// The 'memoryFiles' array remains empty, and the route now only lists files from disk.
108+
const memoryFiles: FileDetails[] = [] // Explicitly type the empty array
118109

119110
// Combine disk and memory files - return ALL files, not just consolidated ones
120111
const allFiles = [...diskFileDetails, ...memoryFiles]
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { NextResponse } from 'next/server';
2+
3+
// Define the backend URL, using environment variable or a default
4+
const INTERNAL_BACKEND_URL = process.env.BACKEND_URL || 'http://backend:24125';
5+
6+
export async function GET(
7+
request: Request, // Keep request parameter even if unused for standard signature
8+
{ params }: { params: { job_id: string } }
9+
) {
10+
const { job_id } = params;
11+
12+
if (!job_id) {
13+
return NextResponse.json({ error: 'Job ID is required' }, { status: 400 });
14+
}
15+
16+
const targetUrl = `${INTERNAL_BACKEND_URL}/api/crawl-status/${job_id}`;
17+
console.log(`[API Route] GET /api/crawl-status/${job_id} - Proxying to: ${targetUrl}`);
18+
19+
try {
20+
const response = await fetch(targetUrl, {
21+
method: 'GET',
22+
headers: {
23+
'Content-Type': 'application/json',
24+
},
25+
cache: 'no-store', // Ensure fresh data is fetched every time
26+
});
27+
28+
if (!response.ok) {
29+
console.error(`[API Route] Backend request failed for job ${job_id}: ${response.status} ${response.statusText}`);
30+
// Attempt to read error body from backend if available
31+
let errorBody = { error: 'Failed to fetch status from backend' };
32+
try {
33+
errorBody = await response.json();
34+
} catch (parseError) {
35+
console.error(`[API Route] Could not parse error response body from backend for job ${job_id}`);
36+
}
37+
return NextResponse.json(errorBody, { status: response.status });
38+
}
39+
40+
const data = await response.json();
41+
// console.log(`[API Route] Successfully fetched status for job ${job_id}:`, data); // Optional: Log successful data
42+
return NextResponse.json(data);
43+
44+
} catch (error) {
45+
console.error(`[API Route] Network or other error fetching status for job ${job_id}:`, error);
46+
return NextResponse.json({ error: 'Internal Server Error' }, { status: 500 });
47+
}
48+
}

app/api/crawl/route.ts

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,55 @@
11
import { NextResponse } from 'next/server'
2-
import { DiscoveredPage } from '@/lib/types'
2+
import { DiscoveredPage, CrawlRequest } from '@/lib/types' // Added CrawlRequest
33

44
export async function POST(request: Request) {
55
try {
6-
const { pages } = await request.json()
6+
// Extract pages and job_id from the request
7+
const { pages, job_id }: CrawlRequest = await request.json()
78

8-
if (!Array.isArray(pages)) {
9+
// Validate input
10+
if (!Array.isArray(pages) || !job_id) {
911
return NextResponse.json(
10-
{ error: 'Pages array is required' },
12+
{ error: 'Pages array and job_id are required' },
1113
{ status: 400 }
1214
)
1315
}
1416

15-
// TODO: Replace with actual Crawl4AI Python backend call
16-
// For now, return mock markdown data for testing the UI
17-
const mockMarkdown = `# Documentation
18-
${pages.map((page: DiscoveredPage) => `
19-
## ${page.title || 'Untitled Page'}
20-
URL: ${page.url}
21-
22-
This is mock content for ${page.title || 'this page'}.
23-
It will be replaced with actual crawled content from the Crawl4AI backend.
24-
25-
---`).join('\n')}
26-
`
27-
28-
// Simulate network delay and processing time
29-
await new Promise(resolve => setTimeout(resolve, 2000))
30-
31-
return NextResponse.json({
32-
markdown: mockMarkdown,
33-
stats: {
34-
pagesCrawled: pages.length,
35-
totalWords: mockMarkdown.split(/\s+/).length,
36-
dataSize: `${Math.round(mockMarkdown.length / 1024)} KB`
37-
}
38-
})
17+
// Define backend URL
18+
const INTERNAL_BACKEND_URL = process.env.BACKEND_URL || 'http://backend:24125';
19+
const backendCrawlUrl = `${INTERNAL_BACKEND_URL}/api/crawl`;
20+
21+
console.log(`Proxying crawl request for job_id: ${job_id} to ${backendCrawlUrl} with ${pages.length} pages.`);
22+
23+
// Make the actual backend call
24+
const response = await fetch(backendCrawlUrl, {
25+
method: 'POST',
26+
headers: {
27+
'Content-Type': 'application/json',
28+
},
29+
body: JSON.stringify({ pages, job_id }),
30+
});
31+
32+
// Handle backend error response
33+
if (!response.ok) {
34+
const errorText = await response.text();
35+
console.error(`Backend crawl request failed for job_id: ${job_id}. Status: ${response.status}, Body: ${errorText}`);
36+
return NextResponse.json(
37+
{ error: `Backend request failed: ${response.statusText}`, details: errorText },
38+
{ status: response.status }
39+
);
40+
}
41+
42+
// Parse and forward the successful backend response
43+
const backendData = await response.json();
44+
console.log(`Backend crawl request successful for job_id: ${job_id}. Response:`, backendData);
45+
return NextResponse.json(backendData); // Forward backend response directly
46+
3947
} catch (error) {
40-
console.error('Error in crawl route:', error)
48+
console.error('Error in Next.js /api/crawl route:', error)
49+
// Ensure error is an instance of Error before accessing message
50+
const errorMessage = error instanceof Error ? error.message : 'An unknown error occurred';
4151
return NextResponse.json(
42-
{ error: 'Failed to crawl pages' },
52+
{ error: 'Failed to proxy crawl request', details: errorMessage },
4353
{ status: 500 }
4454
)
4555
}

app/api/discover/route.ts

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ export async function POST(request: Request) {
1717
console.log('Making discover request for URL:', url, 'with depth:', validatedDepth)
1818

1919
// Make a direct request to the backend API instead of using the discoverSubdomains function
20-
const backendUrl = process.env.NEXT_PUBLIC_BACKEND_URL || process.env.BACKEND_URL || 'http://localhost:24125'
21-
console.log('Using backend URL:', backendUrl)
20+
const INTERNAL_BACKEND_URL = process.env.BACKEND_URL || 'http://backend:24125';
21+
console.log('Using internal backend URL:', INTERNAL_BACKEND_URL);
2222

23-
console.log('Sending request to backend API:', `${backendUrl}/api/discover`)
24-
const response = await fetch(`${backendUrl}/api/discover`, {
23+
console.log('Sending request to backend API:', `${INTERNAL_BACKEND_URL}/api/discover`);
24+
const response = await fetch(`${INTERNAL_BACKEND_URL}/api/discover`, {
2525
method: 'POST',
2626
headers: {
2727
'Content-Type': 'application/json',
@@ -50,10 +50,8 @@ export async function POST(request: Request) {
5050
}
5151

5252
// Even if we get an empty array, we should still return it with a 200 status
53-
return NextResponse.json({
54-
pages: data.pages || [],
55-
message: data.message || (data.pages?.length === 0 ? 'No pages discovered' : `Found ${data.pages?.length} pages`)
56-
})
53+
// Forward the exact response from the backend, including the job_id
54+
return NextResponse.json(data)
5755

5856
} catch (error) {
5957
console.error('Error in discover route:', error)

app/api/mcp/config/route.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { NextRequest, NextResponse } from 'next/server';
2+
3+
// Define the internal backend URL with a fallback
4+
const INTERNAL_BACKEND_URL = process.env.BACKEND_URL || 'http://backend:24125';
5+
const CONFIG_ENDPOINT = `${INTERNAL_BACKEND_URL}/api/mcp/config`;
6+
7+
export async function GET(request: NextRequest) {
8+
console.log(`[API MCP Config] Received GET request`);
9+
10+
try {
11+
console.log(`[API MCP Config] Fetching config from: ${CONFIG_ENDPOINT}`);
12+
const response = await fetch(CONFIG_ENDPOINT, {
13+
method: 'GET',
14+
headers: {
15+
'Content-Type': 'application/json',
16+
// Forward any relevant headers if needed in the future
17+
},
18+
cache: 'no-store', // Ensure fresh data is fetched
19+
});
20+
21+
if (!response.ok) {
22+
const errorText = await response.text();
23+
console.error(`[API MCP Config] Backend request failed: ${response.status} ${response.statusText}`, errorText);
24+
return NextResponse.json(
25+
{ error: `Failed to fetch MCP config from backend: ${response.statusText}`, details: errorText },
26+
{ status: response.status }
27+
);
28+
}
29+
30+
const data = await response.json();
31+
console.log(`[API MCP Config] Successfully fetched config.`);
32+
return NextResponse.json(data);
33+
34+
} catch (error) {
35+
console.error('[API MCP Config] Error fetching MCP config:', error);
36+
const errorMessage = error instanceof Error ? error.message : 'An unknown error occurred';
37+
return NextResponse.json(
38+
{ error: 'Internal Server Error fetching MCP config', details: errorMessage },
39+
{ status: 500 }
40+
);
41+
}
42+
}

app/api/mcp/status/route.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { NextRequest, NextResponse } from 'next/server';
2+
3+
// Define the internal backend URL with a fallback
4+
const INTERNAL_BACKEND_URL = process.env.BACKEND_URL || 'http://backend:24125';
5+
const STATUS_ENDPOINT = `${INTERNAL_BACKEND_URL}/api/mcp/status`;
6+
7+
export async function GET(request: NextRequest) {
8+
console.log(`[API MCP Status] Received GET request`);
9+
10+
try {
11+
console.log(`[API MCP Status] Fetching status from: ${STATUS_ENDPOINT}`);
12+
const response = await fetch(STATUS_ENDPOINT, {
13+
method: 'GET',
14+
headers: {
15+
'Content-Type': 'application/json',
16+
// Forward any relevant headers if needed in the future
17+
},
18+
cache: 'no-store', // Ensure fresh data is fetched
19+
});
20+
21+
if (!response.ok) {
22+
const errorText = await response.text();
23+
console.error(`[API MCP Status] Backend request failed: ${response.status} ${response.statusText}`, errorText);
24+
return NextResponse.json(
25+
{ error: `Failed to fetch MCP status from backend: ${response.statusText}`, details: errorText },
26+
{ status: response.status }
27+
);
28+
}
29+
30+
const data = await response.json();
31+
console.log(`[API MCP Status] Successfully fetched status.`);
32+
return NextResponse.json(data);
33+
34+
} catch (error) {
35+
console.error('[API MCP Status] Error fetching MCP status:', error);
36+
const errorMessage = error instanceof Error ? error.message : 'An unknown error occurred';
37+
return NextResponse.json(
38+
{ error: 'Internal Server Error fetching MCP status', details: errorMessage },
39+
{ status: 500 }
40+
);
41+
}
42+
}

app/page.tsx

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ import {
1818
DialogTitle,
1919
DialogTrigger,
2020
} from "@/components/ui/dialog"; // Import Dialog components
21-
import { Info } from 'lucide-react'; // Import an icon for the trigger
21+
import { Info, Settings } from 'lucide-react'; // Import icons
2222
import { discoverSubdomains, crawlPages, validateUrl, formatBytes } from '@/lib/crawl-service'
2323
import { saveMarkdown, loadMarkdown } from '@/lib/storage'
2424
import { useToast } from "@/components/ui/use-toast"
2525
import { DiscoveredPage, CrawlJobStatus, OverallStatus, UrlStatus } from '@/lib/types' // Import status types & UrlStatus
26-
import ConsolidatedFiles from '@/components/ConsolidatedFiles' // Import ConsolidatedFiles
26+
import ConsolidatedFiles from '@/components/ConsolidatedFiles'; // Import ConsolidatedFiles
27+
import { MCPSettingsPopover } from '@/components/MCPSettingsPopover'; // Import MCP Settings Popover (Named Export)
2728

2829
export default function Home() {
2930
const [url, setUrl] = useState('')
@@ -161,7 +162,7 @@ const handleCrawlSelectedClick = async () => {
161162
// setDiscoveredPages(...)
162163

163164
// Call updated service function, passing job ID
164-
const crawlResponse = await crawlPages({ pages: pagesToCrawl, jobId: currentJobId })
165+
const crawlResponse = await crawlPages({ pages: pagesToCrawl, job_id: currentJobId }) // Changed jobId to job_id
165166
console.log('Crawl initiation response:', crawlResponse)
166167

167168
if (!crawlResponse.success || crawlResponse.error) {
@@ -265,7 +266,6 @@ const handleCrawlSelectedClick = async () => {
265266
let intervalId: NodeJS.Timeout | null = null;
266267
let isFetching = false; // Prevent overlapping fetches
267268
const POLLING_INTERVAL = 3000; // Poll every 3 seconds
268-
const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:24125';
269269

270270

271271
const fetchStatus = async () => {
@@ -275,7 +275,7 @@ const handleCrawlSelectedClick = async () => {
275275

276276
try {
277277
console.log(`(Page) Fetching status for job: ${currentJobId}`);
278-
const response = await fetch(`${BACKEND_URL}/api/crawl-status/${currentJobId}`);
278+
const response = await fetch(`/api/crawl-status/${currentJobId}`);
279279
const data: CrawlJobStatus = await response.json();
280280

281281
if (!response.ok) {
@@ -346,11 +346,18 @@ const handleCrawlSelectedClick = async () => {
346346
</header>
347347

348348
<div className="container mx-auto px-4 py-8 space-y-6">
349+
{/* Container for Job Stats and Settings Button */}
349350
<div className="bg-gray-800/50 backdrop-blur-lg rounded-2xl p-6 border border-gray-700 shadow-xl">
350-
{/* Replace ProcessingBlock with JobStatsSummary */}
351-
{/* <h2 className="text-2xl font-semibold mb-4 text-purple-400">Processing Status</h2> */}
352-
{/* The title is now inside JobStatsSummary */}
353-
<JobStatsSummary jobStatus={jobStatus} />
351+
<div className="flex justify-between items-start"> {/* Flex container - Align items to top */}
352+
{/* JobStatsSummary remains here */}
353+
<JobStatsSummary jobStatus={jobStatus} />
354+
{/* MCP Settings Popover Trigger */}
355+
<MCPSettingsPopover>
356+
<Button variant="outline" size="icon" aria-label="MCP Settings" className="bg-white text-black hover:bg-gray-100 hover:text-black">
357+
<Settings className="h-4 w-4" />
358+
</Button>
359+
</MCPSettingsPopover>
360+
</div>
354361
</div>
355362

356363
<div className="bg-gray-800/50 backdrop-blur-lg rounded-2xl p-6 border border-gray-700 shadow-xl">

backend/app/crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ async def discover_pages(
222222
await asyncio.sleep(poll_interval) # Use await for asyncio.sleep
223223
logger.debug(f"Polling link discovery task {task_id} (attempt {attempt+1}/{max_attempts})")
224224
status_response = requests.get(
225-
poll_url,
225+
f"{CRAWL4AI_URL}/task/{task_id}",
226226
headers=headers,
227227
timeout=10
228228
)

0 commit comments

Comments
 (0)