@@ -59,26 +59,71 @@ export async function GET(request: Request) {
5959 const jsonContent = await fs . readFile ( jsonPath , 'utf-8' )
6060 const metadata = JSON . parse ( jsonContent )
6161
62- // If the metadata has a "pages" array, it's a consolidated file
63- if ( metadata . pages && Array . isArray ( metadata . pages ) ) {
62+ // If the metadata has a "pages" array or is_consolidated flag , it's a consolidated file
63+ if ( ( metadata . pages && Array . isArray ( metadata . pages ) ) || metadata . is_consolidated === true ) {
6464 isConsolidated = true
65- pagesCount = metadata . pages . length
65+ pagesCount = metadata . pages ? metadata . pages . length : 1
6666 rootUrl = metadata . root_url || ''
6767 }
6868 } catch ( e ) {
6969 console . error ( `Error reading JSON metadata for ${ filename } :` , e )
70+ // Create a default metadata file if it doesn't exist or is invalid
71+ try {
72+ const defaultMetadata = {
73+ title : `Documentation for ${ filename . replace ( '.md' , '' ) } ` ,
74+ timestamp : new Date ( ) . toISOString ( ) ,
75+ pages : [
76+ {
77+ title : "Main Content" ,
78+ url : `file://${ filename . replace ( '.md' , '' ) } ` ,
79+ timestamp : new Date ( ) . toISOString ( ) ,
80+ internal_links : 0 ,
81+ external_links : 0
82+ }
83+ ] ,
84+ is_consolidated : true ,
85+ last_updated : new Date ( ) . toISOString ( )
86+ }
87+ await fs . writeFile ( jsonPath , JSON . stringify ( defaultMetadata , null , 2 ) , 'utf-8' )
88+ console . log ( `Created default metadata for ${ filename } ` )
89+ isConsolidated = true
90+ pagesCount = 1
91+ } catch ( writeError ) {
92+ console . error ( `Error creating default metadata for ${ filename } :` , writeError )
93+ }
7094 }
7195 } else {
7296 // Create JSON file if it doesn't exist
73- const jsonContent = JSON . stringify ( {
74- content,
75- metadata : {
76- wordCount : content . split ( / \s + / ) . length ,
77- charCount : content . length ,
78- timestamp : stats . mtime
97+ try {
98+ // Create a consolidated metadata file by default
99+ const defaultMetadata = {
100+ title : `Documentation for ${ filename . replace ( '.md' , '' ) } ` ,
101+ timestamp : new Date ( ) . toISOString ( ) ,
102+ content,
103+ pages : [
104+ {
105+ title : "Main Content" ,
106+ url : `file://${ filename . replace ( '.md' , '' ) } ` ,
107+ timestamp : new Date ( ) . toISOString ( ) ,
108+ internal_links : 0 ,
109+ external_links : 0
110+ }
111+ ] ,
112+ is_consolidated : true ,
113+ last_updated : new Date ( ) . toISOString ( ) ,
114+ metadata : {
115+ wordCount : content . split ( / \s + / ) . length ,
116+ charCount : content . length ,
117+ timestamp : stats . mtime
118+ }
79119 }
80- } , null , 2 )
81- await fs . writeFile ( jsonPath , jsonContent , 'utf-8' )
120+ await fs . writeFile ( jsonPath , JSON . stringify ( defaultMetadata , null , 2 ) , 'utf-8' )
121+ console . log ( `Created consolidated metadata for ${ filename } ` )
122+ isConsolidated = true
123+ pagesCount = 1
124+ } catch ( writeError ) {
125+ console . error ( `Error creating metadata for ${ filename } :` , writeError )
126+ }
82127 }
83128
84129 // Extract sections to count how many pages are included
@@ -153,9 +198,22 @@ export async function GET(request: Request) {
153198 // Only show consolidated files in the Stored Files section
154199 const consolidatedFiles = allFiles . filter ( file => file . isConsolidated )
155200
201+ // Additional filter to exclude files with UUID-like names
202+ // UUID pattern: 8-4-4-4-12 hex digits (e.g., 095104d8-8e90-48f0-8670-9e45c914f115)
203+ const uuidPattern = / ^ [ 0 - 9 a - f ] { 8 } - [ 0 - 9 a - f ] { 4 } - [ 0 - 9 a - f ] { 4 } - [ 0 - 9 a - f ] { 4 } - [ 0 - 9 a - f ] { 12 } $ / i
204+
205+ // Keep only files with domain-like names (e.g., docs_crawl4ai_com)
206+ // These are files created through the crawling process
207+ const crawledFiles = consolidatedFiles . filter ( file => {
208+ // Check if the filename is NOT a UUID
209+ return ! uuidPattern . test ( file . name )
210+ } )
211+
212+ console . log ( `Found ${ consolidatedFiles . length } consolidated files, ${ crawledFiles . length } are crawled files` )
213+
156214 return NextResponse . json ( {
157215 success : true ,
158- files : consolidatedFiles
216+ files : crawledFiles
159217 } )
160218 }
161219
0 commit comments