diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e662683c4..91ade8b2d 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -43,8 +43,9 @@ interface InterpreterOptions { binaryCallback: (output: any, mimeType: string) => (void | Promise); debug: boolean; debugChannel: Partial<{ - activeId: Function, - debugMessage: Function, + activeId: (id: number) => void, + debugMessage: (msg: string) => void, + setActionType: (type: string) => void, }> } @@ -377,12 +378,20 @@ export default class Interpreter extends EventEmitter { */ const wawActions: Record void> = { screenshot: async (params: PageScreenshotOptions) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('screenshot'); + } + const screenshotBuffer = await page.screenshot({ ...params, path: undefined, }); await this.options.binaryCallback(screenshotBuffer, 'image/png'); }, enqueueLinks: async (selector: string) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('enqueueLinks'); + } + const links: string[] = await page.locator(selector) .evaluateAll( // @ts-ignore @@ -409,6 +418,10 @@ export default class Interpreter extends EventEmitter { await page.close(); }, scrape: async (selector?: string) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('scrape'); + } + await this.ensureScriptsLoaded(page); const scrapeResults: Record[] = await page.evaluate((s) => window.scrape(s ?? null), selector); @@ -416,48 +429,40 @@ export default class Interpreter extends EventEmitter { }, scrapeSchema: async (schema: Record) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('scrapeSchema'); + } + await this.ensureScriptsLoaded(page); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); - const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult]; - newResults.forEach((result) => { - Object.entries(result).forEach(([key, value]) => { - const keyExists = this.cumulativeResults.some( - (item) => key in item && item[key] !== undefined - ); - - if (!keyExists) { - this.cumulativeResults.push({ [key]: value }); - } - }); + if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) { + this.cumulativeResults = []; + } + + if (this.cumulativeResults.length === 0) { + this.cumulativeResults.push({}); + } + + const mergedResult = this.cumulativeResults[0]; + const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult; + + Object.entries(resultToProcess).forEach(([key, value]) => { + if (value !== undefined) { + mergedResult[key] = value; + } }); - - const mergedResult: Record[] = [ - Object.fromEntries( - Object.entries( - this.cumulativeResults.reduce((acc, curr) => { - Object.entries(curr).forEach(([key, value]) => { - // If the key doesn't exist or the current value is not undefined, add/update it - if (value !== undefined) { - acc[key] = value; - } - }); - return acc; - }, {}) - ) - ) - ]; - - // Log cumulative results after each action - console.log("CUMULATIVE results:", this.cumulativeResults); - console.log("MERGED results:", mergedResult); - - await this.options.serializableCallback(mergedResult); - // await this.options.serializableCallback(scrapeResult); + + console.log("Updated merged result:", mergedResult); + await this.options.serializableCallback([mergedResult]); }, scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('scrapeList'); + } + await this.ensureScriptsLoaded(page); if (!config.pagination) { const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); @@ -469,6 +474,10 @@ export default class Interpreter extends EventEmitter { }, scrapeListAuto: async (config: { listSelector: string }) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('scrapeListAuto'); + } + await this.ensureScriptsLoaded(page); const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => { @@ -479,6 +488,10 @@ export default class Interpreter extends EventEmitter { }, scroll: async (pages?: number) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('scroll'); + } + await page.evaluate(async (pagesInternal) => { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { // @ts-ignore @@ -488,6 +501,10 @@ export default class Interpreter extends EventEmitter { }, script: async (code: string) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('script'); + } + const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( async () => { }, ).constructor; @@ -496,6 +513,10 @@ export default class Interpreter extends EventEmitter { }, flag: async () => new Promise((res) => { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType('flag'); + } + this.emit('flag', page, res); }), }; @@ -526,6 +547,10 @@ export default class Interpreter extends EventEmitter { const params = !step.args || Array.isArray(step.args) ? step.args : [step.args]; await wawActions[step.action as CustomFunctions](...(params ?? [])); } else { + if (this.options.debugChannel?.setActionType) { + this.options.debugChannel.setActionType(String(step.action)); + } + // Implements the dot notation for the "method name" in the workflow const levels = String(step.action).split('.'); const methodName = levels[levels.length - 1]; diff --git a/package.json b/package.json index 75350f1fa..1cfa1894f 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,6 @@ "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.15", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", diff --git a/public/locales/de.json b/public/locales/de.json index 2d48b16db..54c009f0c 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -535,20 +535,23 @@ "output_data": "Ausgabedaten", "log": "Protokoll" }, - "empty_output": "Die Ausgabe ist leer.", - "loading": "Ausführung läuft. Extrahierte Daten werden nach Abschluss des Durchlaufs hier angezeigt.", + "buttons": { + "stop": "Stoppen" + }, + "loading": "Daten werden geladen...", + "empty_output": "Keine Ausgabedaten verfügbar", "captured_data": { "title": "Erfasste Daten", - "download_json": "Als JSON herunterladen", - "download_csv": "Als CSV herunterladen" + "download_csv": "CSV herunterladen", + "view_full": "Vollständige Daten anzeigen", + "items": "Elemente", + "schema_title": "Erfasste Texte", + "list_title": "Erfasste Listen" }, "captured_screenshot": { - "title": "Erfasster Screenshot", - "download": "Screenshot herunterladen", - "render_failed": "Das Bild konnte nicht gerendert werden" - }, - "buttons": { - "stop": "Stoppen" + "title": "Erfasste Screenshots", + "download": "Herunterladen", + "render_failed": "Fehler beim Rendern des Screenshots" } }, "navbar": { diff --git a/public/locales/en.json b/public/locales/en.json index 6f7fe38d8..ee2e2e925 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -177,6 +177,11 @@ "pagination": "Select how the robot can capture the rest of the list", "limit": "Choose the number of items to extract", "complete": "Capture is complete" + }, + "actions": { + "text": "Capture Text", + "list": "Capture List", + "screenshot": "Capture Screenshot" } }, "right_panel": { @@ -543,20 +548,23 @@ "output_data": "Output Data", "log": "Log" }, - "empty_output": "The output is empty.", - "loading": "Run in progress. Extracted data will appear here once run completes.", + "buttons": { + "stop": "Stop" + }, + "loading": "Loading data...", + "empty_output": "No output data available", "captured_data": { "title": "Captured Data", - "download_json": "Download as JSON", - "download_csv": "Download as CSV" + "download_csv": "Download CSV", + "view_full": "View Full Data", + "items": "items", + "schema_title": "Captured Texts", + "list_title": "Captured Lists" }, "captured_screenshot": { - "title": "Captured Screenshot", - "download": "Download Screenshot", - "render_failed": "The image failed to render" - }, - "buttons": { - "stop": "Stop" + "title": "Captured Screenshots", + "download": "Download", + "render_failed": "Failed to render screenshot" } }, "navbar": { diff --git a/public/locales/es.json b/public/locales/es.json index edc762a46..d2d487c13 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -536,20 +536,23 @@ "output_data": "Datos de Salida", "log": "Registro" }, - "empty_output": "La salida está vacía.", - "loading": "Ejecución en curso. Los datos extraídos aparecerán aquí una vez que se complete la ejecución.", + "buttons": { + "stop": "Detener" + }, + "loading": "Cargando datos...", + "empty_output": "No hay datos de salida disponibles", "captured_data": { - "title": "Datos Capturados", - "download_json": "Descargar como JSON", - "download_csv": "Descargar como CSV" + "title": "Datos capturados", + "download_csv": "Descargar CSV", + "view_full": "Ver datos completos", + "items": "elementos", + "schema_title": "Textos capturados", + "list_title": "Listas capturadas" }, "captured_screenshot": { - "title": "Captura de Pantalla", - "download": "Descargar Captura", - "render_failed": "No se pudo renderizar la imagen" - }, - "buttons": { - "stop": "Detener" + "title": "Capturas de pantalla", + "download": "Descargar", + "render_failed": "Error al renderizar la captura de pantalla" } }, "navbar": { diff --git a/public/locales/ja.json b/public/locales/ja.json index 15d8c7e02..833494ee0 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -536,20 +536,23 @@ "output_data": "出力データ", "log": "ログ" }, - "empty_output": "出力は空です。", - "loading": "実行中です。実行が完了すると、抽出されたデータがここに表示されます。", + "buttons": { + "stop": "停止" + }, + "loading": "データを読み込み中...", + "empty_output": "出力データがありません", "captured_data": { - "title": "キャプチャされたデータ", - "download_json": "JSONとしてダウンロード", - "download_csv": "CSVとしてダウンロード" + "title": "キャプチャしたデータ", + "download_csv": "CSVをダウンロード", + "view_full": "完全なデータを表示", + "items": "アイテム", + "schema_title": "キャプチャしたテキスト", + "list_title": "キャプチャしたリスト" }, "captured_screenshot": { - "title": "キャプチャされたスクリーンショット", - "download": "スクリーンショットをダウンロード", - "render_failed": "画像のレンダリングに失敗しました" - }, - "buttons": { - "stop": "停止" + "title": "キャプチャしたスクリーンショット", + "download": "ダウンロード", + "render_failed": "スクリーンショットのレンダリングに失敗しました" } }, "navbar": { diff --git a/public/locales/zh.json b/public/locales/zh.json index faaf04961..27bb1e635 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -536,20 +536,23 @@ "output_data": "输出数据", "log": "日志" }, - "empty_output": "输出为空。", - "loading": "运行中。运行完成后,提取的数据将显示在此处。", + "buttons": { + "stop": "停止" + }, + "loading": "加载数据中...", + "empty_output": "没有可用的输出数据", "captured_data": { - "title": "捕获的数据", - "download_json": "下载为JSON", - "download_csv": "下载为CSV" + "title": "已捕获的数据", + "download_csv": "下载CSV", + "view_full": "查看完整数据", + "items": "项目", + "schema_title": "已捕获的文本", + "list_title": "已捕获的列表" }, "captured_screenshot": { - "title": "捕获的截图", - "download": "下载截图", - "render_failed": "图像渲染失败" - }, - "buttons": { - "stop": "停止" + "title": "已捕获的截图", + "download": "下载", + "render_failed": "渲染截图失败" } }, "navbar": { diff --git a/server/src/api/record.ts b/server/src/api/record.ts index b4014c3a8..2d6077a9c 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -586,6 +586,11 @@ async function executeRun(id: string, userId: string) { const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); + const categorizedOutput = { + scrapeSchema: interpretationInfo.scrapeSchemaOutput || {}, + scrapeList: interpretationInfo.scrapeListOutput || {}, + }; + await destroyRemoteBrowser(plainRun.browserId, userId); const updatedRun = await run.update({ @@ -594,7 +599,10 @@ async function executeRun(id: string, userId: string) { finishedAt: new Date().toLocaleString(), browserId: plainRun.browserId, log: interpretationInfo.log.join('\n'), - serializableOutput: interpretationInfo.serializableOutput, + serializableOutput: { + scrapeSchema: Object.values(categorizedOutput.scrapeSchema), + scrapeList: Object.values(categorizedOutput.scrapeList), + }, binaryOutput: uploadedBinaryOutput, }); diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 0639f4c72..54a706978 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -255,7 +255,6 @@ async function processRunExecution(job: Job) { return { success: true }; } - // Process the results const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); @@ -264,36 +263,55 @@ async function processRunExecution(job: Job) { return { success: true }; } - // Update the run record with results + const categorizedOutput = { + scrapeSchema: interpretationInfo.scrapeSchemaOutput || {}, + scrapeList: interpretationInfo.scrapeListOutput || {} + }; + await run.update({ ...run, status: 'success', finishedAt: new Date().toLocaleString(), browserId: plainRun.browserId, log: interpretationInfo.log.join('\n'), - serializableOutput: interpretationInfo.serializableOutput, + serializableOutput: { + scrapeSchema: Object.values(categorizedOutput.scrapeSchema), + scrapeList: Object.values(categorizedOutput.scrapeList), + }, binaryOutput: uploadedBinaryOutput, }); // Track extraction metrics - let totalRowsExtracted = 0; + let totalSchemaItemsExtracted = 0; + let totalListItemsExtracted = 0; let extractedScreenshotsCount = 0; - let extractedItemsCount = 0; - - if (run.dataValues.binaryOutput && run.dataValues.binaryOutput["item-0"]) { - extractedScreenshotsCount = 1; + + if (categorizedOutput.scrapeSchema) { + Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => { + if (Array.isArray(schemaResult)) { + totalSchemaItemsExtracted += schemaResult.length; + } else if (schemaResult && typeof schemaResult === 'object') { + totalSchemaItemsExtracted += 1; + } + }); } - - if (run.dataValues.serializableOutput && run.dataValues.serializableOutput["item-0"]) { - const itemsArray = run.dataValues.serializableOutput["item-0"]; - extractedItemsCount = itemsArray.length; - - totalRowsExtracted = itemsArray.reduce((total, item) => { - return total + Object.keys(item).length; - }, 0); + + if (categorizedOutput.scrapeList) { + Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => { + if (Array.isArray(listResult)) { + totalListItemsExtracted += listResult.length; + } + }); } - - console.log(`Extracted Items Count: ${extractedItemsCount}`); + + if (uploadedBinaryOutput) { + extractedScreenshotsCount = Object.keys(uploadedBinaryOutput).length; + } + + const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted; + + console.log(`Extracted Schema Items Count: ${totalSchemaItemsExtracted}`); + console.log(`Extracted List Items Count: ${totalListItemsExtracted}`); console.log(`Extracted Screenshots Count: ${extractedScreenshotsCount}`); console.log(`Total Rows Extracted: ${totalRowsExtracted}`); @@ -306,7 +324,8 @@ async function processRunExecution(job: Job) { created_at: new Date().toISOString(), status: 'success', totalRowsExtracted, - extractedItemsCount, + schemaItemsExtracted: totalSchemaItemsExtracted, + listItemsExtracted: totalListItemsExtracted, extractedScreenshotsCount, } ); @@ -339,7 +358,7 @@ async function processRunExecution(job: Job) { robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() - });; + }); // Check for and process queued runs before destroying the browser const queuedRunProcessed = await checkAndProcessQueuedRun(data.userId, plainRun.browserId); @@ -458,7 +477,10 @@ async function abortRun(runId: string, userId: string): Promise { } let currentLog = 'Run aborted by user'; - let serializableOutput: Record = {}; + let categorizedOutput = { + scrapeSchema: {}, + scrapeList: {}, + }; let binaryOutput: Record = {}; try { @@ -467,16 +489,15 @@ async function abortRun(runId: string, userId: string): Promise { currentLog = browser.interpreter.debugMessages.join('\n') || currentLog; } - if (browser.interpreter.serializableData) { - browser.interpreter.serializableData.forEach((item, index) => { - serializableOutput[`item-${index}`] = item; - }); + if (browser.interpreter.serializableDataByType) { + categorizedOutput = { + scrapeSchema: collectDataByType(browser.interpreter.serializableDataByType.scrapeSchema || []), + scrapeList: collectDataByType(browser.interpreter.serializableDataByType.scrapeList || []), + }; } if (browser.interpreter.binaryData) { - browser.interpreter.binaryData.forEach((item, index) => { - binaryOutput[`item-${index}`] = item; - }); + binaryOutput = collectBinaryData(browser.interpreter.binaryData); } } } catch (interpreterError) { @@ -488,7 +509,10 @@ async function abortRun(runId: string, userId: string): Promise { finishedAt: new Date().toLocaleString(), browserId: plainRun.browserId, log: currentLog, - serializableOutput, + serializableOutput: { + scrapeSchema: Object.values(categorizedOutput.scrapeSchema), + scrapeList: Object.values(categorizedOutput.scrapeList), + }, binaryOutput, }); @@ -529,6 +553,30 @@ async function abortRun(runId: string, userId: string): Promise { } } +/** + * Helper function to collect data from arrays into indexed objects + * @param dataArray Array of data to be transformed into an object with indexed keys + * @returns Object with indexed keys + */ +function collectDataByType(dataArray: any[]): Record { + return dataArray.reduce((result: Record, item, index) => { + result[`item-${index}`] = item; + return result; + }, {}); +} + +/** + * Helper function to collect binary data (like screenshots) + * @param binaryDataArray Array of binary data objects to be transformed + * @returns Object with indexed keys + */ +function collectBinaryData(binaryDataArray: { mimetype: string, data: string, type?: string }[]): Record { + return binaryDataArray.reduce((result: Record, item, index) => { + result[`item-${index}`] = item; + return result; + }, {}); +} + async function registerRunExecutionWorker() { try { const registeredUserQueues = new Map(); diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index c8aec13c4..0481687d8 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -87,9 +87,20 @@ export class WorkflowInterpreter { public debugMessages: string[] = []; /** - * An array of all the serializable data extracted from the run. + * Storage for different types of serializable data */ - public serializableData: string[] = []; + public serializableDataByType: { + scrapeSchema: any[], + scrapeList: any[], + } = { + scrapeSchema: [], + scrapeList: [], + }; + + /** + * Track the current action type being processed + */ + private currentActionType: string | null = null; /** * An array of all the binary data extracted from the run. @@ -167,9 +178,9 @@ export class WorkflowInterpreter { ) => { const params = settings.params ? settings.params : null; delete settings.params; - + const processedWorkflow = processWorkflow(workflow, true); - + const options = { ...settings, debugChannel: { @@ -181,25 +192,49 @@ export class WorkflowInterpreter { this.debugMessages.push(`[${new Date().toLocaleString()}] ` + msg); this.socket.emit('log', msg) }, + setActionType: (type: string) => { + this.currentActionType = type; + } }, serializableCallback: (data: any) => { - this.socket.emit('serializableCallback', data); + if (this.currentActionType === 'scrapeSchema') { + if (Array.isArray(data) && data.length > 0) { + this.socket.emit('serializableCallback', { + type: 'captureText', + data + }); + } else { + this.socket.emit('serializableCallback', { + type: 'captureText', + data : [data] + }); + } + } else if (this.currentActionType === 'scrapeList') { + this.socket.emit('serializableCallback', { + type: 'captureList', + data + }); + } }, binaryCallback: (data: string, mimetype: string) => { - this.socket.emit('binaryCallback', { data, mimetype }); + this.socket.emit('binaryCallback', { + data, + mimetype, + type: 'captureScreenshot' + }); } } - + const interpreter = new Interpreter(processedWorkflow, options); this.interpreter = interpreter; - + interpreter.on('flag', async (page, resume) => { if (this.activeId !== null && this.breakpoints[this.activeId]) { logger.log('debug', `breakpoint hit id: ${this.activeId}`); this.socket.emit('breakpointHit'); this.interpretationIsPaused = true; } - + if (this.interpretationIsPaused) { this.interpretationResume = resume; logger.log('debug', `Paused inside of flag: ${page.url()}`); @@ -209,13 +244,13 @@ export class WorkflowInterpreter { resume(); } }); - + this.socket.emit('log', '----- Starting the interpretation -----', false); - + const status = await interpreter.run(page, params); - + this.socket.emit('log', `----- The interpretation finished with status: ${status} -----`, false); - + logger.log('debug', `Interpretation finished`); this.interpreter = null; this.socket.emit('activePairId', -1); @@ -246,7 +281,11 @@ export class WorkflowInterpreter { this.interpreter = null; this.breakpoints = []; this.interpretationResume = null; - this.serializableData = []; + this.currentActionType = null; + this.serializableDataByType = { + scrapeSchema: [], + scrapeList: [], + }; this.binaryData = []; } @@ -267,6 +306,8 @@ export class WorkflowInterpreter { const processedWorkflow = processWorkflow(workflow); + let mergedScrapeSchema = {}; + const options = { ...settings, debugChannel: { @@ -278,9 +319,23 @@ export class WorkflowInterpreter { this.debugMessages.push(`[${new Date().toLocaleString()}] ` + msg); this.socket.emit('debugMessage', msg) }, + setActionType: (type: string) => { + this.currentActionType = type; + } }, serializableCallback: (data: any) => { - this.serializableData.push(data); + if (this.currentActionType === 'scrapeSchema') { + if (Array.isArray(data) && data.length > 0) { + mergedScrapeSchema = { ...mergedScrapeSchema, ...data[0] }; + this.serializableDataByType.scrapeSchema.push(data); + } else { + mergedScrapeSchema = { ...mergedScrapeSchema, ...data }; + this.serializableDataByType.scrapeSchema.push([data]); + } + } else if (this.currentActionType === 'scrapeList') { + this.serializableDataByType.scrapeList.push(data); + } + this.socket.emit('serializableCallback', data); }, binaryCallback: async (data: string, mimetype: string) => { @@ -311,16 +366,21 @@ export class WorkflowInterpreter { const status = await interpreter.run(page, params); - const lastArray = this.serializableData.length > 1 - ? [this.serializableData[this.serializableData.length - 1]] - : this.serializableData; - + // Structure the output to maintain separate data for each action type const result = { log: this.debugMessages, result: status, - serializableOutput: lastArray.reduce((reducedObject, item, index) => { + scrapeSchemaOutput: Object.keys(mergedScrapeSchema).length > 0 + ? { "schema-merged": [mergedScrapeSchema] } + : this.serializableDataByType.scrapeSchema.reduce((reducedObject, item, index) => { + return { + [`schema-${index}`]: item, + ...reducedObject, + } + }, {}), + scrapeListOutput: this.serializableDataByType.scrapeList.reduce((reducedObject, item, index) => { return { - [`item-${index}`]: item, + [`list-${index}`]: item, ...reducedObject, } }, {}), diff --git a/server/src/workflow-management/integrations/airtable.ts b/server/src/workflow-management/integrations/airtable.ts index 934747211..401bc11d4 100644 --- a/server/src/workflow-management/integrations/airtable.ts +++ b/server/src/workflow-management/integrations/airtable.ts @@ -11,6 +11,11 @@ interface AirtableUpdateTask { retries: number; } +interface SerializableOutput { + scrapeSchema?: any[]; + scrapeList?: any[]; +} + const MAX_RETRIES = 3; const BASE_API_DELAY = 2000; @@ -39,38 +44,108 @@ async function refreshAirtableToken(refreshToken: string) { } } +function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record) { + const mergedRecords: Record[] = []; + + const maxLength = Math.max( + ...[ + ...(serializableOutput.scrapeSchema ?? []).map(arr => arr?.length ?? 0), + ...(serializableOutput.scrapeList ?? []).map(arr => arr?.length ?? 0), + 0 + ] + ); + + for (let i = 0; i < maxLength; i++) { + mergedRecords.push({}); + } + + if (serializableOutput.scrapeSchema) { + for (const schemaArray of serializableOutput.scrapeSchema) { + if (!Array.isArray(schemaArray)) continue; + + for (let i = 0; i < schemaArray.length; i++) { + if (i >= mergedRecords.length) break; + mergedRecords[i] = { ...mergedRecords[i], ...schemaArray[i] }; + } + } + } + + if (serializableOutput.scrapeList) { + for (const listArray of serializableOutput.scrapeList) { + if (!Array.isArray(listArray)) continue; + + for (let i = 0; i < listArray.length; i++) { + if (i >= mergedRecords.length) break; + mergedRecords[i] = { ...mergedRecords[i], ...listArray[i] }; + } + } + } + + if (binaryOutput && Object.keys(binaryOutput).length > 0) { + for (let i = 0; i < mergedRecords.length; i++) { + const screenshotKey = `item-${i}`; + if (binaryOutput[screenshotKey]) { + mergedRecords[i].Screenshot = binaryOutput[screenshotKey]; + mergedRecords[i].Key = screenshotKey; + } + } + + for (const [key, url] of Object.entries(binaryOutput)) { + if (mergedRecords.some(record => record.Key === key)) { + continue; + } + + mergedRecords.push({ + "Key": key, + "Screenshot": url + }); + } + } + + return mergedRecords; +} + export async function updateAirtable(robotId: string, runId: string) { try { + console.log(`Starting Airtable update for run: ${runId}, robot: ${robotId}`); + const run = await Run.findOne({ where: { runId } }); if (!run) throw new Error(`Run not found for runId: ${runId}`); const plainRun = run.toJSON(); if (plainRun.status !== 'success') { - console.log('Run status is not success'); + console.log('Run status is not success, skipping Airtable update'); return; } - let data: { [key: string]: any }[] = []; - if (plainRun.serializableOutput?.['item-0']) { - data = plainRun.serializableOutput['item-0'] as { [key: string]: any }[]; - } else if (plainRun.binaryOutput?.['item-0']) { - data = [{ "File URL": plainRun.binaryOutput['item-0'] }]; - } - const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) throw new Error(`Robot not found for robotId: ${robotId}`); const plainRobot = robot.toJSON(); - if (plainRobot.airtable_base_id && plainRobot.airtable_table_name && plainRobot.airtable_table_id) { - console.log(`Writing to Airtable base ${plainRobot.airtable_base_id}`); + + if (!plainRobot.airtable_base_id || !plainRobot.airtable_table_name || !plainRobot.airtable_table_id) { + console.log('Airtable integration not configured'); + return; + } + + console.log(`Airtable configuration found - Base: ${plainRobot.airtable_base_id}, Table: ${plainRobot.airtable_table_name}`); + + const serializableOutput = plainRun.serializableOutput as SerializableOutput; + const binaryOutput = plainRun.binaryOutput || {}; + + const mergedData = mergeRelatedData(serializableOutput, binaryOutput); + + if (mergedData.length > 0) { await writeDataToAirtable( robotId, plainRobot.airtable_base_id, plainRobot.airtable_table_name, plainRobot.airtable_table_id, - data + mergedData ); - console.log(`Data written to Airtable for ${robotId}`); + console.log(`All data written to Airtable for ${robotId}`); + } else { + console.log(`No data to write to Airtable for ${robotId}`); } } catch (error: any) { console.error(`Airtable update failed: ${error.message}`); @@ -125,42 +200,142 @@ export async function writeDataToAirtable( tableId: string, data: any[] ) { + if (!data || data.length === 0) { + console.log('No data to write. Skipping.'); + return; + } + try { return await withTokenRefresh(robotId, async (accessToken: string) => { const airtable = new Airtable({ apiKey: accessToken }); const base = airtable.base(baseId); + const processedData = data.map(item => { + const cleanedItem: Record = {}; + + for (const [key, value] of Object.entries(item)) { + if (value === null || value === undefined) { + cleanedItem[key] = ''; + } else if (typeof value === 'object' && !Array.isArray(value)) { + cleanedItem[key] = JSON.stringify(value); + } else { + cleanedItem[key] = value; + } + } + + return cleanedItem; + }); + const existingFields = await getExistingFields(base, tableName); - console.log(`Found ${existingFields.length} existing fields in Airtable`); + console.log(`Found ${existingFields.length} existing fields in Airtable: ${existingFields.join(', ')}`); - const dataFields = [...new Set(data.flatMap(row => Object.keys(row)))]; + const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; console.log(`Found ${dataFields.length} fields in data: ${dataFields.join(', ')}`); const missingFields = dataFields.filter(field => !existingFields.includes(field)); - console.log(`Found ${missingFields.length} missing fields: ${missingFields.join(', ')}`); + const hasNewColumns = missingFields.length > 0; + console.log(`Found ${missingFields.length} new fields: ${missingFields.join(', ')}`); for (const field of missingFields) { - const sampleRow = data.find(row => field in row); + const sampleRow = processedData.find(row => field in row); if (sampleRow) { const sampleValue = sampleRow[field]; try { await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId); console.log(`Successfully created field: ${field}`); + + await new Promise(resolve => setTimeout(resolve, 200)); } catch (fieldError: any) { console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`); } } } - - await deleteEmptyRecords(base, tableName); - const BATCH_SIZE = 10; - for (let i = 0; i < data.length; i += BATCH_SIZE) { - const batch = data.slice(i, i + BATCH_SIZE); - await retryableAirtableWrite(base, tableName, batch); + let existingRecords: Array<{ id: string, fields: Record }> = []; + + if (hasNewColumns) { + existingRecords = await fetchAllRecords(base, tableName); + console.log(`Found ${existingRecords.length} existing records in Airtable`); } - logger.log('info', `Successfully wrote ${data.length} records to Airtable`); + if (hasNewColumns && existingRecords.length > 0) { + const recordsToUpdate = []; + const recordsToCreate = []; + + const newColumnData = processedData.map(record => { + const newColumnsOnly: Record = {}; + missingFields.forEach(field => { + if (field in record) { + newColumnsOnly[field] = record[field]; + } + }); + return newColumnsOnly; + }); + + for (let i = 0; i < Math.min(existingRecords.length, newColumnData.length); i++) { + if (Object.keys(newColumnData[i]).length > 0) { + recordsToUpdate.push({ + id: existingRecords[i].id, + fields: newColumnData[i] + }); + } + } + + const existingColumnsBeingUpdated = dataFields.filter(field => + existingFields.includes(field) && !missingFields.includes(field) + ); + + if (existingColumnsBeingUpdated.length > 0) { + recordsToCreate.push(...processedData.map(record => ({ fields: record }))); + console.log(`Will append ${recordsToCreate.length} new records with all data`); + } else { + if (processedData.length > existingRecords.length) { + const additionalRecords = processedData.slice(existingRecords.length); + recordsToCreate.push(...additionalRecords.map(record => ({ fields: record }))); + console.log(`Will append ${recordsToCreate.length} additional records`); + } + } + + if (recordsToUpdate.length > 0) { + console.log(`Updating ${recordsToUpdate.length} existing records with new columns`); + const BATCH_SIZE = 10; + for (let i = 0; i < recordsToUpdate.length; i += BATCH_SIZE) { + const batch = recordsToUpdate.slice(i, i + BATCH_SIZE); + console.log(`Updating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToUpdate.length/BATCH_SIZE)}`); + + try { + await retryableAirtableUpdate(base, tableName, batch); + } catch (batchError: any) { + console.error(`Error updating batch: ${batchError.message}`); + throw batchError; + } + + await new Promise(resolve => setTimeout(resolve, 500)); + } + } + } else { + console.log(`Appending all ${processedData.length} records to Airtable`); + const recordsToCreate = processedData.map(record => ({ fields: record })); + + const BATCH_SIZE = 10; + for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { + const batch = recordsToCreate.slice(i, i + BATCH_SIZE); + console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`); + + try { + await retryableAirtableCreate(base, tableName, batch); + } catch (batchError: any) { + console.error(`Error creating batch: ${batchError.message}`); + throw batchError; + } + + await new Promise(resolve => setTimeout(resolve, 500)); + } + } + + await deleteEmptyRecords(base, tableName); + + logger.log('info', `Successfully processed ${processedData.length} records in Airtable`); }); } catch (error: any) { logger.log('error', `Airtable write failed: ${error.message}`); @@ -168,6 +343,20 @@ export async function writeDataToAirtable( } } +async function fetchAllRecords(base: Airtable.Base, tableName: string): Promise }>> { + try { + console.log(`Fetching all records from ${tableName}...`); + const records = await base(tableName).select().all(); + return records.map(record => ({ + id: record.id, + fields: record.fields + })); + } catch (error: any) { + console.warn(`Warning: Could not fetch all records: ${error.message}`); + return []; + } +} + async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promise { console.log('Checking for empty records to clear...'); @@ -183,31 +372,53 @@ async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promi }); if (emptyRecords.length > 0) { + console.log(`Found ${emptyRecords.length} empty records to delete`); const BATCH_SIZE = 10; for (let i = 0; i < emptyRecords.length; i += BATCH_SIZE) { const batch = emptyRecords.slice(i, i + BATCH_SIZE); const recordIds = batch.map(record => record.id); await base(tableName).destroy(recordIds); + console.log(`Deleted batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(emptyRecords.length/BATCH_SIZE)}`); } - } + console.log(`Successfully deleted ${emptyRecords.length} empty records`); + } else { + console.log('No empty records found to delete'); + } } catch (error: any) { console.warn(`Warning: Could not clear empty records: ${error.message}`); console.warn('Will continue without deleting empty records'); } } -async function retryableAirtableWrite( +async function retryableAirtableCreate( + base: Airtable.Base, + tableName: string, + batch: any[], + retries = MAX_RETRIES +): Promise { + try { + await base(tableName).create(batch); + } catch (error) { + if (retries > 0) { + await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY)); + return retryableAirtableCreate(base, tableName, batch, retries - 1); + } + throw error; + } +} + +async function retryableAirtableUpdate( base: Airtable.Base, tableName: string, batch: any[], retries = MAX_RETRIES ): Promise { try { - await base(tableName).create(batch.map(row => ({ fields: row }))); + await base(tableName).update(batch); } catch (error) { if (retries > 0) { await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY)); - return retryableAirtableWrite(base, tableName, batch, retries - 1); + return retryableAirtableUpdate(base, tableName, batch, retries - 1); } throw error; } @@ -217,18 +428,19 @@ async function retryableAirtableWrite( async function getExistingFields(base: Airtable.Base, tableName: string): Promise { try { const records = await base(tableName).select({ pageSize: 5 }).firstPage(); + const fieldNames = new Set(); + if (records.length > 0) { - const fieldNames = new Set(); records.forEach(record => { Object.keys(record.fields).forEach(field => fieldNames.add(field)); }); - - const headers = Array.from(fieldNames); - console.log(`Found ${headers.length} headers from records: ${headers.join(', ')}`); - return headers; } - return []; + + const headers = Array.from(fieldNames); + console.log(`Found ${headers.length} headers from records: ${headers.join(', ')}`); + return headers; } catch (error) { + console.warn(`Warning: Error fetching existing fields: ${error}`); return []; } } @@ -299,17 +511,27 @@ export const processAirtableUpdates = async () => { for (const runId in airtableUpdateTasks) { const task = airtableUpdateTasks[runId]; - if (task.status !== 'pending') continue; - - hasPendingTasks = true; - try { - await updateAirtable(task.robotId, task.runId); - delete airtableUpdateTasks[runId]; - } catch (error: any) { - task.retries += 1; - if (task.retries >= MAX_RETRIES) { - task.status = 'failed'; - logger.log('error', `Permanent failure for run ${runId}: ${error.message}`); + + if (task.status === 'pending') { + hasPendingTasks = true; + console.log(`Processing Airtable update for run: ${runId}`); + + try { + await updateAirtable(task.robotId, task.runId); + console.log(`Successfully updated Airtable for runId: ${runId}`); + airtableUpdateTasks[runId].status = 'completed'; + delete airtableUpdateTasks[runId]; + } catch (error: any) { + console.error(`Failed to update Airtable for run ${task.runId}:`, error); + + if (task.retries < MAX_RETRIES) { + airtableUpdateTasks[runId].retries += 1; + console.log(`Retrying task for runId: ${runId}, attempt: ${task.retries + 1}`); + } else { + airtableUpdateTasks[runId].status = 'failed'; + console.log(`Max retries reached for runId: ${runId}. Marking task as failed.`); + logger.log('error', `Permanent failure for run ${runId}: ${error.message}`); + } } } } @@ -319,6 +541,7 @@ export const processAirtableUpdates = async () => { break; } + console.log('Waiting for 5 seconds before checking again...'); await new Promise(resolve => setTimeout(resolve, 5000)); } }; \ No newline at end of file diff --git a/server/src/workflow-management/integrations/gsheet.ts b/server/src/workflow-management/integrations/gsheet.ts index b492a7df9..402ace5d7 100644 --- a/server/src/workflow-management/integrations/gsheet.ts +++ b/server/src/workflow-management/integrations/gsheet.ts @@ -10,6 +10,11 @@ interface GoogleSheetUpdateTask { retries: number; } +interface SerializableOutput { + scrapeSchema?: any[]; + scrapeList?: any[]; +} + const MAX_RETRIES = 5; export let googleSheetUpdateTasks: { [runId: string]: GoogleSheetUpdateTask } = {}; @@ -25,18 +30,6 @@ export async function updateGoogleSheet(robotId: string, runId: string) { const plainRun = run.toJSON(); if (plainRun.status === 'success') { - let data: { [key: string]: any }[] = []; - if (plainRun.serializableOutput && Object.keys(plainRun.serializableOutput).length > 0) { - data = plainRun.serializableOutput['item-0'] as { [key: string]: any }[]; - - } else if (plainRun.binaryOutput && plainRun.binaryOutput['item-0']) { - // Handle binaryOutput by setting the URL as a data entry - const binaryUrl = plainRun.binaryOutput['item-0'] as string; - - // Create a placeholder object with the binary URL - data = [{ "Screenshot URL": binaryUrl }]; - } - const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { @@ -44,35 +37,159 @@ export async function updateGoogleSheet(robotId: string, runId: string) { } const plainRobot = robot.toJSON(); - const spreadsheetId = plainRobot.google_sheet_id; - if (plainRobot.google_sheet_email && spreadsheetId) { - console.log(`Preparing to write data to Google Sheet for robot: ${robotId}, spreadsheetId: ${spreadsheetId}`); - - await writeDataToSheet(robotId, spreadsheetId, data); - console.log(`Data written to Google Sheet successfully for Robot: ${robotId} and Run: ${runId}`); - } else { + + if (!plainRobot.google_sheet_email || !spreadsheetId) { console.log('Google Sheets integration not configured.'); + return; + } + + console.log(`Preparing to write data to Google Sheet for robot: ${robotId}, spreadsheetId: ${spreadsheetId}`); + + const serializableOutput = plainRun.serializableOutput as SerializableOutput; + + if (serializableOutput) { + if (serializableOutput.scrapeSchema && serializableOutput.scrapeSchema.length > 0) { + await processOutputType( + robotId, + spreadsheetId, + 'Text', + serializableOutput.scrapeSchema, + plainRobot + ); + } + + if (serializableOutput.scrapeList && serializableOutput.scrapeList.length > 0) { + await processOutputType( + robotId, + spreadsheetId, + 'List', + serializableOutput.scrapeList, + plainRobot + ); + } + } + + if (plainRun.binaryOutput && Object.keys(plainRun.binaryOutput).length > 0) { + const screenshots = Object.entries(plainRun.binaryOutput).map(([key, url]) => ({ + "Screenshot Key": key, + "Screenshot URL": url + })); + + await processOutputType( + robotId, + spreadsheetId, + 'Screenshot', + [screenshots], + plainRobot + ); } + + console.log(`Data written to Google Sheet successfully for Robot: ${robotId} and Run: ${runId}`); } else { console.log('Run status is not success or serializableOutput is missing.'); } } catch (error: any) { console.error(`Failed to write data to Google Sheet for Robot: ${robotId} and Run: ${runId}: ${error.message}`); + throw error; + } +} + +async function processOutputType( + robotId: string, + spreadsheetId: string, + outputType: string, + outputData: any[], + robotConfig: any +) { + for (let i = 0; i < outputData.length; i++) { + const data = outputData[i]; + + if (!data || data.length === 0) { + console.log(`No data to write for ${outputType}-${i}. Skipping.`); + continue; + } + + const sheetName = `${outputType}-${i}`; + + await ensureSheetExists(spreadsheetId, sheetName, robotConfig); + + await writeDataToSheet(robotId, spreadsheetId, data, sheetName, robotConfig); + console.log(`Data written to ${sheetName} sheet for ${outputType} data`); } -}; +} -export async function writeDataToSheet(robotId: string, spreadsheetId: string, data: any[]) { +async function ensureSheetExists(spreadsheetId: string, sheetName: string, robotConfig: any) { try { - const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + const oauth2Client = getOAuth2Client(robotConfig); + const sheets = google.sheets({ version: 'v4', auth: oauth2Client }); + + const response = await sheets.spreadsheets.get({ + spreadsheetId, + fields: 'sheets.properties.title' + }); + + const existingSheets = response.data.sheets?.map(sheet => sheet.properties?.title) || []; + + if (!existingSheets.includes(sheetName)) { + await sheets.spreadsheets.batchUpdate({ + spreadsheetId, + requestBody: { + requests: [ + { + addSheet: { + properties: { + title: sheetName + } + } + } + ] + } + }); + console.log(`Created new sheet: ${sheetName}`); + } + } catch (error: any) { + logger.log('error', `Error ensuring sheet exists: ${error.message}`); + throw error; + } +} + +function getOAuth2Client(robotConfig: any) { + const oauth2Client = new google.auth.OAuth2( + process.env.GOOGLE_CLIENT_ID, + process.env.GOOGLE_CLIENT_SECRET, + process.env.GOOGLE_REDIRECT_URI + ); + + oauth2Client.setCredentials({ + access_token: robotConfig.google_access_token, + refresh_token: robotConfig.google_refresh_token, + }); + return oauth2Client; +} + +export async function writeDataToSheet( + robotId: string, + spreadsheetId: string, + data: any[], + sheetName: string = 'Sheet1', + robotConfig?: any +) { + try { + let robot = robotConfig; + if (!robot) { - throw new Error(`Robot not found for robotId: ${robotId}`); - } + robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); - const plainRobot = robot.toJSON(); + if (!robot) { + throw new Error(`Robot not found for robotId: ${robotId}`); + } + + robot = robot.toJSON(); + } - if (!plainRobot.google_access_token || !plainRobot.google_refresh_token) { + if (!robot.google_access_token || !robot.google_refresh_token) { throw new Error('Google Sheets access not configured for user'); } @@ -83,16 +200,19 @@ export async function writeDataToSheet(robotId: string, spreadsheetId: string, d ); oauth2Client.setCredentials({ - access_token: plainRobot.google_access_token, - refresh_token: plainRobot.google_refresh_token, + access_token: robot.google_access_token, + refresh_token: robot.google_refresh_token, }); oauth2Client.on('tokens', async (tokens) => { - if (tokens.refresh_token) { - await robot.update({ google_refresh_token: tokens.refresh_token }); - } - if (tokens.access_token) { - await robot.update({ google_access_token: tokens.access_token }); + if (tokens.refresh_token || tokens.access_token) { + const robotModel = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + if (robotModel) { + const updateData: any = {}; + if (tokens.refresh_token) updateData.google_refresh_token = tokens.refresh_token; + if (tokens.access_token) updateData.google_access_token = tokens.access_token; + await robotModel.update(updateData); + } } }); @@ -100,7 +220,7 @@ export async function writeDataToSheet(robotId: string, spreadsheetId: string, d const checkResponse = await sheets.spreadsheets.values.get({ spreadsheetId, - range: 'Sheet1!1:1', + range: `${sheetName}!1:1`, }); if (!data || data.length === 0) { @@ -109,7 +229,6 @@ export async function writeDataToSheet(robotId: string, spreadsheetId: string, d } const expectedHeaders = Object.keys(data[0]); - const rows = data.map(item => Object.values(item)); const existingHeaders = @@ -129,28 +248,28 @@ export async function writeDataToSheet(robotId: string, spreadsheetId: string, d if (isSheetEmpty || !headersMatch) { resource = { values: [expectedHeaders, ...rows] }; - console.log('Including headers in the append operation.'); + console.log(`Including headers in the append operation for sheet ${sheetName}.`); } else { resource = { values: rows }; - console.log('Headers already exist and match, only appending data rows.'); + console.log(`Headers already exist and match in sheet ${sheetName}, only appending data rows.`); } - console.log('Attempting to write to spreadsheet:', spreadsheetId); + console.log(`Attempting to write to spreadsheet: ${spreadsheetId}, sheet: ${sheetName}`); const response = await sheets.spreadsheets.values.append({ spreadsheetId, - range: 'Sheet1!A1', + range: `${sheetName}!A1`, valueInputOption: 'USER_ENTERED', requestBody: resource, }); if (response.status === 200) { - console.log('Data successfully appended to Google Sheet.'); + console.log(`Data successfully appended to sheet: ${sheetName}`); } else { console.error('Google Sheets append failed:', response); } - logger.log(`info`, `Data written to Google Sheet: ${spreadsheetId}`); + logger.log(`info`, `Data written to Google Sheet: ${spreadsheetId}, sheet: ${sheetName}`); } catch (error: any) { logger.log(`error`, `Error writing data to Google Sheet: ${error.message}`); throw error; @@ -169,6 +288,7 @@ export const processGoogleSheetUpdates = async () => { try { await updateGoogleSheet(task.robotId, task.runId); console.log(`Successfully updated Google Sheet for runId: ${runId}`); + googleSheetUpdateTasks[runId].status = 'completed'; delete googleSheetUpdateTasks[runId]; } catch (error: any) { console.error(`Failed to update Google Sheets for run ${task.runId}:`, error); diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 8267fbb81..1559e63c3 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -132,6 +132,11 @@ async function executeRun(id: string, userId: string) { const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); + const categorizedOutput = { + scrapeSchema: interpretationInfo.scrapeSchemaOutput || {}, + scrapeList: interpretationInfo.scrapeListOutput || {}, + }; + await destroyRemoteBrowser(plainRun.browserId, userId); await run.update({ @@ -140,7 +145,10 @@ async function executeRun(id: string, userId: string) { finishedAt: new Date().toLocaleString(), browserId: plainRun.browserId, log: interpretationInfo.log.join('\n'), - serializableOutput: interpretationInfo.serializableOutput, + serializableOutput: { + scrapeSchema: Object.values(categorizedOutput.scrapeSchema), + scrapeList: Object.values(categorizedOutput.scrapeList), + }, binaryOutput: uploadedBinaryOutput, }); diff --git a/src/components/browser/BrowserContent.tsx b/src/components/browser/BrowserContent.tsx index a73ed6e72..14b9385ee 100644 --- a/src/components/browser/BrowserContent.tsx +++ b/src/components/browser/BrowserContent.tsx @@ -78,7 +78,7 @@ export const BrowserContent = () => { [socket] ); - const handleUrlChanged = (url: string) => { + const handleUrlChanged = useCallback((url: string) => { const parsedUrl = new URL(url); if (parsedUrl.hostname) { const host = parsedUrl.hostname @@ -100,7 +100,7 @@ export const BrowserContent = () => { ]); } } - }; + }, [tabs, tabIndex]); const tabHasBeenClosedHandler = useCallback( (index: number) => { @@ -132,7 +132,7 @@ export const BrowserContent = () => { .catch((error) => { console.log("Fetching current url failed"); }); - }, [handleUrlChanged]); + }, []); return (
diff --git a/src/components/recorder/RightSidePanel.tsx b/src/components/recorder/RightSidePanel.tsx index 9bbbc8b26..89a2c486d 100644 --- a/src/components/recorder/RightSidePanel.tsx +++ b/src/components/recorder/RightSidePanel.tsx @@ -34,9 +34,6 @@ const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) = ).catch((error) => { console.log(error.message) }) }; -// TODO: -// 1. Add description for each browser step -// 2. Handle non custom action steps interface RightSidePanelProps { onFinishCapture: () => void; } @@ -46,8 +43,6 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const [errors, setErrors] = useState<{ [id: string]: string }>({}); const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: string]: boolean }>({}); const [confirmedListTextFields, setConfirmedListTextFields] = useState<{ [listId: string]: { [fieldKey: string]: boolean } }>({}); - // const [showPaginationOptions, setShowPaginationOptions] = useState(false); - // const [showLimitOptions, setShowLimitOptions] = useState(false); const [showCaptureList, setShowCaptureList] = useState(true); const [showCaptureScreenshot, setShowCaptureScreenshot] = useState(true); const [showCaptureText, setShowCaptureText] = useState(true); @@ -58,15 +53,31 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const { panelHeight } = useBrowserDimensionsStore(); const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog } = useGlobalInfoStore(); - const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList, startPaginationMode, stopPaginationMode, paginationType, updatePaginationType, limitType, customLimit, updateLimitType, updateCustomLimit, stopLimitMode, startLimitMode, captureStage, setCaptureStage, showPaginationOptions, setShowPaginationOptions, showLimitOptions, setShowLimitOptions, workflow, setWorkflow } = useActionContext(); + const { + getText, startGetText, stopGetText, + getList, startGetList, stopGetList, + getScreenshot, startGetScreenshot, stopGetScreenshot, + startPaginationMode, stopPaginationMode, + paginationType, updatePaginationType, + limitType, customLimit, updateLimitType, updateCustomLimit, + stopLimitMode, startLimitMode, + captureStage, setCaptureStage, + showPaginationOptions, setShowPaginationOptions, + showLimitOptions, setShowLimitOptions, + workflow, setWorkflow, + activeAction, setActiveAction, + startAction, finishAction + } = useActionContext(); + const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField } = useBrowserSteps(); const { id, socket } = useSocketStore(); const { t } = useTranslation(); + const isAnyActionActive = activeAction !== 'none'; + const workflowHandler = useCallback((data: WorkflowFile) => { setWorkflow(data); - //setRecordingLength(data.workflow.length); - }, []) + }, [setWorkflow]); useEffect(() => { if (socket) { @@ -113,12 +124,10 @@ export const RightSidePanel: React.FC = ({ onFinishCapture hasScrapeSchemaAction, }); - const shouldHideActions = hasScrapeListAction || hasScrapeSchemaAction || hasScreenshotAction; - - setShowCaptureList(!shouldHideActions); - setShowCaptureScreenshot(!shouldHideActions); - setShowCaptureText(!(hasScrapeListAction || hasScreenshotAction)); - }, [workflow]); + setShowCaptureList(true); + setShowCaptureScreenshot(true); + setShowCaptureText(true); + }, [workflow, setCurrentWorkflowActionsState]); const handleMouseEnter = (id: number) => { setHoverStates(prev => ({ ...prev, [id]: true })); @@ -128,8 +137,6 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setHoverStates(prev => ({ ...prev, [id]: false })); }; - const handlePairDelete = () => { } - const handleStartGetText = () => { setIsCaptureTextConfirmed(false); startGetText(); @@ -140,6 +147,10 @@ export const RightSidePanel: React.FC = ({ onFinishCapture startGetList(); } + const handleStartGetScreenshot = () => { + startGetScreenshot(); + }; + const handleTextLabelChange = (id: number, label: string, listId?: number, fieldKey?: string) => { if (listId !== undefined && fieldKey !== undefined) { // Prevent editing if the field is confirmed @@ -253,7 +264,6 @@ export const RightSidePanel: React.FC = ({ onFinishCapture return settings; }, [browserSteps, browserStepIdList]); - const stopCaptureAndEmitGetTextSettings = useCallback(() => { const hasUnconfirmedTextSteps = browserSteps.some(step => step.type === 'text' && !confirmedTextSteps[step.id]); if (hasUnconfirmedTextSteps) { @@ -268,8 +278,9 @@ export const RightSidePanel: React.FC = ({ onFinishCapture } setIsCaptureTextConfirmed(true); resetInterpretationLog(); + finishAction('text'); onFinishCapture(); - }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog]); + }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog, finishAction, notify, onFinishCapture, t]); const getListSettingsObject = useCallback(() => { let settings: { @@ -311,7 +322,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setShowLimitOptions(false); updateLimitType(''); updateCustomLimit(''); - }, [updatePaginationType, updateLimitType, updateCustomLimit]); + }, [setShowPaginationOptions, updatePaginationType, setShowLimitOptions, updateLimitType, updateCustomLimit]); const handleStopGetList = useCallback(() => { stopGetList(); @@ -326,10 +337,17 @@ export const RightSidePanel: React.FC = ({ onFinishCapture notify('error', t('right_panel.errors.unable_create_settings')); } handleStopGetList(); + resetInterpretationLog(); + finishAction('list'); onFinishCapture(); - }, [stopGetList, getListSettingsObject, socket, notify, handleStopGetList]); + }, [getListSettingsObject, socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t]); - const hasUnconfirmedListTextFields = browserSteps.some(step => step.type === 'list' && Object.values(step.fields).some(field => !confirmedListTextFields[step.id]?.[field.id])); + const hasUnconfirmedListTextFields = browserSteps.some(step => + step.type === 'list' && + Object.entries(step.fields).some(([fieldKey]) => + !confirmedListTextFields[step.id]?.[fieldKey] + ) + ); const handleConfirmListCapture = useCallback(() => { switch (captureStage) { @@ -378,7 +396,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setCaptureStage('initial'); break; } - }, [captureStage, paginationType, limitType, customLimit, startPaginationMode, stopPaginationMode, startLimitMode, stopLimitMode, notify, stopCaptureAndEmitGetListSettings, getListSettingsObject]); + }, [captureStage, paginationType, limitType, customLimit, startPaginationMode, setShowPaginationOptions, setCaptureStage, getListSettingsObject, notify, stopPaginationMode, startLimitMode, setShowLimitOptions, stopLimitMode, setIsCaptureListConfirmed, stopCaptureAndEmitGetListSettings, t]); const handleBackCaptureList = useCallback(() => { switch (captureStage) { @@ -395,7 +413,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setCaptureStage('initial'); break; } - }, [captureStage, stopLimitMode, startPaginationMode, stopPaginationMode]); + }, [captureStage, stopLimitMode, setShowLimitOptions, startPaginationMode, setShowPaginationOptions, setCaptureStage, stopPaginationMode]); const handlePaginationSettingSelect = (option: PaginationType) => { updatePaginationType(option); @@ -413,7 +431,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setConfirmedTextSteps({}); setIsCaptureTextConfirmed(false); notify('error', t('right_panel.errors.capture_text_discarded')); - }, [browserSteps, stopGetText, deleteBrowserStep]); + }, [browserSteps, stopGetText, deleteBrowserStep, notify, t]); const discardGetList = useCallback(() => { stopGetList(); @@ -431,8 +449,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setConfirmedListTextFields({}); setIsCaptureListConfirmed(false); notify('error', t('right_panel.errors.capture_list_discarded')); - }, [browserSteps, stopGetList, deleteBrowserStep, resetListState]); - + }, [browserSteps, stopGetList, deleteBrowserStep, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t]); const captureScreenshot = (fullPage: boolean) => { const screenshotSettings: ScreenshotSettings = { @@ -446,10 +463,12 @@ export const RightSidePanel: React.FC = ({ onFinishCapture socket?.emit('action', { action: 'screenshot', settings: screenshotSettings }); addScreenshotStep(fullPage); stopGetScreenshot(); + resetInterpretationLog(); + finishAction('screenshot'); + onFinishCapture(); }; const isConfirmCaptureDisabled = useMemo(() => { - // Check if we are in the initial stage and if there are no browser steps or no valid list selectors with fields if (captureStage !== 'initial') return false; const hasValidListSelector = browserSteps.some(step => @@ -458,7 +477,6 @@ export const RightSidePanel: React.FC = ({ onFinishCapture Object.keys(step.fields).length > 0 ); - // Disable the button if there are no valid list selectors or if there are unconfirmed list text fields return !hasValidListSelector || hasUnconfirmedListTextFields; }, [captureStage, browserSteps, hasUnconfirmedListTextFields]); @@ -467,15 +485,41 @@ export const RightSidePanel: React.FC = ({ onFinishCapture return ( - {/* - Last action: {` ${lastAction}`} - */} - {!getText && !getScreenshot && !getList && showCaptureList && } + {!isAnyActionActive && ( + <> + {showCaptureList && ( + + )} + + {showCaptureText && ( + + )} + + {showCaptureScreenshot && ( + + )} + + )} {getList && ( - <> + {(captureStage === 'pagination' || captureStage === 'limit') && ( - - )} - {showPaginationOptions && ( - - {t('right_panel.pagination.title')} - - - - - + + {showPaginationOptions && ( + + {t('right_panel.pagination.title')} + + + + + + + )} + + {showLimitOptions && ( + + +

{t('right_panel.limit.title')}

+
+ updateLimitType(e.target.value as LimitType)} + sx={{ + display: 'flex', + flexDirection: 'column', + width: '500px' + }} + > + } label="10" /> + } label="100" /> +
+ } label={t('right_panel.limit.custom')} /> + {limitType === 'custom' && ( + ) => { + const value = parseInt(e.target.value); + if (e.target.value === '' || value >= 1) { + updateCustomLimit(e.target.value); + } + }} + inputProps={{ + min: 1, + onKeyPress: (e: React.KeyboardEvent) => { + const value = (e.target as HTMLInputElement).value + e.key; + if (parseInt(value) < 1) { + e.preventDefault(); + } + } + }} + placeholder={t('right_panel.limit.enter_number')} + sx={{ + marginLeft: '10px', + '& input': { + padding: '10px', + }, + width: '150px', + background: isDarkMode ? "#1E2124" : 'white', + color: isDarkMode ? "white" : 'black', + }} + /> + )} +
+
+
+ )}
)} - {showLimitOptions && ( - - -

{t('right_panel.limit.title')}

-
- updateLimitType(e.target.value as LimitType)} - sx={{ - display: 'flex', - flexDirection: 'column', - width: '500px' - }} - > - } label="10" /> - } label="100" /> -
- } label={t('right_panel.limit.custom')} /> - {limitType === 'custom' && ( - ) => { - const value = parseInt(e.target.value); - // Only update if the value is greater than or equal to 1 or if the field is empty - if (e.target.value === '' || value >= 1) { - updateCustomLimit(e.target.value); - } - }} - inputProps={{ - min: 1, - onKeyPress: (e: React.KeyboardEvent) => { - const value = (e.target as HTMLInputElement).value + e.key; - if (parseInt(value) < 1) { - e.preventDefault(); - } - } - }} - placeholder={t('right_panel.limit.enter_number')} - sx={{ - marginLeft: '10px', - '& input': { - padding: '10px', - - }, - width: '150px', - background: isDarkMode ? "#1E2124" : 'white', - color: isDarkMode ? "white" : 'black', // Ensure the text field does not go outside the panel - }} - /> - )} -
-
-
- )} - {/* {!getText && !getScreenshot && !getList && showCaptureText && } */} - - {!getText && !getScreenshot && !getList && showCaptureText && } - {getText && - <> + + {getText && ( + - - } - {/* {!getText && !getScreenshot && !getList && showCaptureScreenshot && } */} - {!getText && !getScreenshot && !getList && showCaptureScreenshot && } + + )} + {getScreenshot && ( - - + + )}
+ {browserSteps.map(step => ( handleMouseEnter(step.id)} onMouseLeave={() => handleMouseLeave(step.id)} sx={{ padding: '10px', margin: '11px', borderRadius: '5px', position: 'relative', background: isDarkMode ? "#1E2124" : 'white', color: isDarkMode ? "white" : 'black' }}> @@ -716,7 +769,6 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ) }} - /> {!confirmedTextSteps[step.id] ? ( diff --git a/src/components/run/InterpretationLog.tsx b/src/components/run/InterpretationLog.tsx index fa749efae..be9012500 100644 --- a/src/components/run/InterpretationLog.tsx +++ b/src/components/run/InterpretationLog.tsx @@ -1,7 +1,7 @@ import * as React from 'react'; import SwipeableDrawer from '@mui/material/SwipeableDrawer'; import Typography from '@mui/material/Typography'; -import { Button, Grid } from '@mui/material'; +import { Button, Grid, Tabs, Tab, Box } from '@mui/material'; import { useCallback, useEffect, useRef, useState } from "react"; import { useSocketStore } from "../../context/socket"; import { Buffer } from 'buffer'; @@ -29,9 +29,16 @@ export const InterpretationLog: React.FC = ({ isOpen, se const { t } = useTranslation(); const [log, setLog] = useState(''); const [customValue, setCustomValue] = useState(''); - const [tableData, setTableData] = useState([]); - const [binaryData, setBinaryData] = useState(null); + + const [captureListData, setCaptureListData] = useState([]); + const [captureTextData, setCaptureTextData] = useState([]); + const [screenshotData, setScreenshotData] = useState([]); + const [captureListPage, setCaptureListPage] = useState(0); + const [screenshotPage, setScreenshotPage] = useState(0); + + const [activeTab, setActiveTab] = useState(0); + const logEndRef = useRef(null); const { browserWidth, outputPreviewHeight, outputPreviewWidth } = useBrowserDimensionsStore(); @@ -62,34 +69,57 @@ export const InterpretationLog: React.FC = ({ isOpen, se setLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg); } scrollLogToBottom(); - }, [log, scrollLogToBottom]); + }, []); - const handleSerializableCallback = useCallback((data: any) => { + const handleSerializableCallback = useCallback(({ type, data }: { type: string, data: any }) => { setLog((prevState) => prevState + '\n' + t('interpretation_log.data_sections.serializable_received') + '\n' + JSON.stringify(data, null, 2) + '\n' + t('interpretation_log.data_sections.separator')); - - if (Array.isArray(data)) { - setTableData(data); + + if (type === 'captureList') { + setCaptureListData(prev => [...prev, data]); + if (captureListData.length === 0) { + const availableTabs = getAvailableTabs(); + const tabIndex = availableTabs.findIndex(tab => tab.id === 'captureList'); + if (tabIndex !== -1) setActiveTab(tabIndex); + } + } else if (type === 'captureText') { + if (Array.isArray(data)) { + setCaptureTextData(data); + } else { + setCaptureTextData([data]); + } + if (captureTextData.length === 0) { + const availableTabs = getAvailableTabs(); + const tabIndex = availableTabs.findIndex(tab => tab.id === 'captureText'); + if (tabIndex !== -1) setActiveTab(tabIndex); + } } - + scrollLogToBottom(); - }, [log, scrollLogToBottom, t]); - - const handleBinaryCallback = useCallback(({ data, mimetype }: any) => { + }, [captureListData.length, captureTextData.length, t]); + + const handleBinaryCallback = useCallback(({ data, mimetype, type }: { data: any, mimetype: string, type: string }) => { const base64String = Buffer.from(data).toString('base64'); const imageSrc = `data:${mimetype};base64,${base64String}`; - + setLog((prevState) => prevState + '\n' + t('interpretation_log.data_sections.binary_received') + '\n' + t('interpretation_log.data_sections.mimetype') + mimetype + '\n' + t('interpretation_log.data_sections.image_below') + '\n' + t('interpretation_log.data_sections.separator')); - - setBinaryData(imageSrc); + + if (type === 'captureScreenshot') { + setScreenshotData(prev => [...prev, imageSrc]); + if (screenshotData.length === 0) { + const availableTabs = getAvailableTabs(); + const tabIndex = availableTabs.findIndex(tab => tab.id === 'captureScreenshot'); + if (tabIndex !== -1) setActiveTab(tabIndex); + } + } + scrollLogToBottom(); - }, [log, scrollLogToBottom, t]); - + }, [screenshotData.length, t]); const handleCustomValueChange = (event: React.ChangeEvent) => { setCustomValue(event.target.value); @@ -98,8 +128,12 @@ export const InterpretationLog: React.FC = ({ isOpen, se useEffect(() => { if (shouldResetInterpretationLog) { setLog(''); - setTableData([]); - setBinaryData(null); + setCaptureListData([]); + setCaptureTextData([]); + setScreenshotData([]); + setActiveTab(0); + setCaptureListPage(0); + setScreenshotPage(0); } }, [shouldResetInterpretationLog]); @@ -114,10 +148,33 @@ export const InterpretationLog: React.FC = ({ isOpen, se }; }, [socket, handleLog, handleSerializableCallback, handleBinaryCallback]); - // Extract columns dynamically from the first item of tableData - const columns = tableData.length > 0 ? Object.keys(tableData[0]) : []; + const getAvailableTabs = useCallback(() => { + const tabs = []; + + if (captureListData.length > 0) { + tabs.push({ id: 'captureList', label: 'Lists' }); + } + + if (captureTextData.length > 0) { + tabs.push({ id: 'captureText', label: 'Texts' }); + } + + if (screenshotData.length > 0) { + tabs.push({ id: 'captureScreenshot', label: 'Screenshots' }); + } + + return tabs; + }, [captureListData.length, captureTextData.length, screenshotData.length]); + + const availableTabs = getAvailableTabs(); + + useEffect(() => { + if (activeTab >= availableTabs.length && availableTabs.length > 0) { + setActiveTab(0); + } + }, [activeTab, availableTabs.length]); - const { hasScrapeListAction, hasScreenshotAction, hasScrapeSchemaAction } = currentWorkflowActionsState + const { hasScrapeListAction, hasScreenshotAction, hasScrapeSchemaAction } = currentWorkflowActionsState; useEffect(() => { if (hasScrapeListAction || hasScrapeSchemaAction || hasScreenshotAction) { @@ -127,6 +184,19 @@ export const InterpretationLog: React.FC = ({ isOpen, se const { darkMode } = useThemeMode(); + const getCaptureTextColumns = captureTextData.length > 0 ? Object.keys(captureTextData[0]) : []; + + const shouldShowTabs = availableTabs.length > 1; + + const getSingleContentType = () => { + if (availableTabs.length === 1) { + return availableTabs[0].id; + } + return null; + }; + + const singleContentType = getSingleContentType(); + return ( @@ -167,6 +237,7 @@ export const InterpretationLog: React.FC = ({ isOpen, se height: outputPreviewHeight, width: outputPreviewWidth, display: 'flex', + flexDirection: 'column', borderRadius: '10px 10px 0 0', }, }} @@ -175,67 +246,239 @@ export const InterpretationLog: React.FC = ({ isOpen, se {t('interpretation_log.titles.output_preview')} -
- { - binaryData ? ( -
- - {t('interpretation_log.titles.screenshot')} - - {t('interpretation_log.titles.screenshot')} -
- ) : tableData.length > 0 ? ( - <> - - + + {availableTabs.length > 0 ? ( + <> + {shouldShowTabs && ( + + {availableTabs.map((tab, index) => ( + setActiveTab(index)} + sx={{ + px: 4, + py: 2, + cursor: 'pointer', + borderBottom: activeTab === index ? '2px solid' : 'none', + borderColor: activeTab === index ? (darkMode ? '#ff00c3' : '#ff00c3') : 'transparent', + backgroundColor: activeTab === index ? (darkMode ? '#34404d' : '#e9ecef') : 'transparent', + color: darkMode ? 'white' : 'black', + fontWeight: activeTab === index ? 500 : 400, + textAlign: 'center', + position: 'relative', + '&:hover': { + backgroundColor: activeTab !== index ? (darkMode ? '#303b49' : '#e2e6ea') : undefined + } + }} + > + + {tab.label} + + + ))} + + )} + + + {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureList') || singleContentType === 'captureList') && captureListData.length > 0 && ( + + + + {`Table ${captureListPage + 1} of ${captureListData.length}`} + + + + + + + +
+ + + {captureListData[captureListPage] && captureListData[captureListPage].length > 0 && + Object.keys(captureListData[captureListPage][0]).map((column) => ( + + {column} + + )) + } + + + + {captureListData[captureListPage] && + captureListData[captureListPage].map((row: any, idx: any) => ( + + {Object.keys(row).map((column) => ( + + {row[column]} + + ))} + + ))} + +
+
+ + )} + + {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureScreenshot') || singleContentType === 'captureScreenshot') && screenshotData.length > 0 && ( + + {screenshotData.length > 1 && ( + + + {`Screenshot ${screenshotPage + 1} of ${screenshotData.length}`} + + + + + + + )} + {screenshotData.length > 0 && ( + + + {t('interpretation_log.titles.screenshot')} {screenshotPage + 1} + + {`${t('interpretation_log.titles.screenshot')} + + )} + + )} + + {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureText') || singleContentType === 'captureText') && captureTextData.length > 0 && ( + + - {columns.map((column) => ( - {column} + {getCaptureTextColumns.map((column) => ( + + {column} + ))} - {tableData.slice(0, Math.min(5, tableData.length)).map((row, index) => ( - - {columns.map((column) => ( - {row[column]} + {captureTextData.map((row, idx) => ( + + {getCaptureTextColumns.map((column) => ( + + {row[column]} + ))} ))}
- - {t('interpretation_log.messages.additional_rows')} - - - ) : ( - - - {hasScrapeListAction || hasScrapeSchemaAction || hasScreenshotAction ? ( - <> - - {t('interpretation_log.messages.successful_training')} - - - - ) : ( - - {t('interpretation_log.messages.no_selection')} - - )} - - - )} -
-
+ )} + + + ) : ( + + + {hasScrapeListAction || hasScrapeSchemaAction || hasScreenshotAction ? ( + <> + + {t('interpretation_log.messages.successful_training')} + + + + ) : ( + + {t('interpretation_log.messages.no_selection')} + + )} + + + )} +
diff --git a/src/components/run/RunContent.tsx b/src/components/run/RunContent.tsx index 77bce5449..faa425d32 100644 --- a/src/components/run/RunContent.tsx +++ b/src/components/run/RunContent.tsx @@ -1,10 +1,23 @@ -import { Box, Tabs, Typography, Tab, Paper, Button, CircularProgress } from "@mui/material"; +import { + Box, + Tabs, + Typography, + Tab, + Paper, + Button, + CircularProgress, + Accordion, + AccordionSummary, + AccordionDetails, + ButtonGroup +} from "@mui/material"; import Highlight from "react-highlight"; import * as React from "react"; import { Data } from "./RunsTable"; import { TabPanel, TabContext } from "@mui/lab"; -import ArticleIcon from '@mui/icons-material/Article'; -import ImageIcon from '@mui/icons-material/Image'; +import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; +import ArrowBackIcon from '@mui/icons-material/ArrowBack'; +import ArrowForwardIcon from '@mui/icons-material/ArrowForward'; import { useEffect, useState } from "react"; import Table from '@mui/material/Table'; import TableBody from '@mui/material/TableBody'; @@ -26,53 +39,343 @@ interface RunContentProps { export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRef, abortRunHandler }: RunContentProps) => { const { t } = useTranslation(); const [tab, setTab] = React.useState('output'); - const [tableData, setTableData] = useState([]); - const [columns, setColumns] = useState([]); + + const [schemaData, setSchemaData] = useState([]); + const [schemaColumns, setSchemaColumns] = useState([]); + + const [listData, setListData] = useState([]); + const [listColumns, setListColumns] = useState([]); + const [currentListIndex, setCurrentListIndex] = useState(0); + + const [screenshotKeys, setScreenshotKeys] = useState([]); + const [currentScreenshotIndex, setCurrentScreenshotIndex] = useState(0); + + const [legacyData, setLegacyData] = useState([]); + const [legacyColumns, setLegacyColumns] = useState([]); + const [isLegacyData, setIsLegacyData] = useState(false); useEffect(() => { setTab(tab); }, [interpretationInProgress]); useEffect(() => { - if (row.serializableOutput && Object.keys(row.serializableOutput).length > 0) { - const firstKey = Object.keys(row.serializableOutput)[0]; - const data = row.serializableOutput[firstKey]; + if (!row.serializableOutput) return; + + if (!row.serializableOutput.scrapeSchema && + !row.serializableOutput.scrapeList && + Object.keys(row.serializableOutput).length > 0) { + + setIsLegacyData(true); + processLegacyData(row.serializableOutput); + return; + } + + setIsLegacyData(false); + + if (row.serializableOutput.scrapeSchema && Object.keys(row.serializableOutput.scrapeSchema).length > 0) { + processDataCategory(row.serializableOutput.scrapeSchema, setSchemaData, setSchemaColumns); + } + + if (row.serializableOutput.scrapeList) { + processScrapeList(row.serializableOutput.scrapeList); + } + }, [row.serializableOutput]); + + useEffect(() => { + if (row.binaryOutput && Object.keys(row.binaryOutput).length > 0) { + setScreenshotKeys(Object.keys(row.binaryOutput)); + setCurrentScreenshotIndex(0); + } + }, [row.binaryOutput]); + + const processLegacyData = (legacyOutput: Record) => { + let allData: any[] = []; + + Object.keys(legacyOutput).forEach(key => { + const data = legacyOutput[key]; if (Array.isArray(data)) { - // Filter out completely empty rows const filteredData = data.filter(row => Object.values(row).some(value => value !== undefined && value !== "") ); - setTableData(filteredData); - if (filteredData.length > 0) { - setColumns(Object.keys(filteredData[0])); - } + allData = [...allData, ...filteredData]; } + }); + + if (allData.length > 0) { + const allColumns = new Set(); + allData.forEach(item => { + Object.keys(item).forEach(key => allColumns.add(key)); + }); + + setLegacyData(allData); + setLegacyColumns(Array.from(allColumns)); + } + }; + + const processDataCategory = ( + categoryData: Record, + setData: React.Dispatch>, + setColumns: React.Dispatch> + ) => { + let allData: any[] = []; + + Object.keys(categoryData).forEach(key => { + const data = categoryData[key]; + if (Array.isArray(data)) { + const filteredData = data.filter(row => + Object.values(row).some(value => value !== undefined && value !== "") + ); + allData = [...allData, ...filteredData]; + } + }); + + if (allData.length > 0) { + const allColumns = new Set(); + allData.forEach(item => { + Object.keys(item).forEach(key => allColumns.add(key)); + }); + + setData(allData); + setColumns(Array.from(allColumns)); + } + }; + + const processScrapeList = (scrapeListData: any) => { + const tablesList: any[][] = []; + const columnsList: string[][] = []; + + if (Array.isArray(scrapeListData)) { + scrapeListData.forEach(tableData => { + if (Array.isArray(tableData) && tableData.length > 0) { + const filteredData = tableData.filter(row => + Object.values(row).some(value => value !== undefined && value !== "") + ); + + if (filteredData.length > 0) { + tablesList.push(filteredData); + + const tableColumns = new Set(); + filteredData.forEach(item => { + Object.keys(item).forEach(key => tableColumns.add(key)); + }); + + columnsList.push(Array.from(tableColumns)); + } + } + }); + } else if (typeof scrapeListData === 'object') { + Object.keys(scrapeListData).forEach(key => { + const tableData = scrapeListData[key]; + if (Array.isArray(tableData) && tableData.length > 0) { + const filteredData = tableData.filter(row => + Object.values(row).some(value => value !== undefined && value !== "") + ); + + if (filteredData.length > 0) { + tablesList.push(filteredData); + + const tableColumns = new Set(); + filteredData.forEach(item => { + Object.keys(item).forEach(key => tableColumns.add(key)); + }); + + columnsList.push(Array.from(tableColumns)); + } + } + }); } - }, [row.serializableOutput]); + setListData(tablesList); + setListColumns(columnsList); + setCurrentListIndex(0); + }; // Function to convert table data to CSV format const convertToCSV = (data: any[], columns: string[]): string => { const header = columns.join(','); const rows = data.map(row => - columns.map(col => JSON.stringify(row[col], null, 2)).join(',') + columns.map(col => JSON.stringify(row[col] || "", null, 2)).join(',') ); return [header, ...rows].join('\n'); }; - const downloadCSV = () => { - const csvContent = convertToCSV(tableData, columns); + // Function to download a specific dataset as CSV + const downloadCSV = (data: any[], columns: string[], filename: string) => { + const csvContent = convertToCSV(data, columns); const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; - link.setAttribute("download", "data.csv"); + link.setAttribute("download", filename); + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + }; + + const downloadJSON = (data: any[], filename: string) => { + const jsonContent = JSON.stringify(data, null, 2); + const blob = new Blob([jsonContent], { type: 'application/json;charset=utf-8;' }); + const url = URL.createObjectURL(blob); + + const link = document.createElement("a"); + link.href = url; + link.setAttribute("download", filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); + + setTimeout(() => { + URL.revokeObjectURL(url); + }, 100); + }; + + const navigateListTable = (direction: 'next' | 'prev') => { + if (direction === 'next' && currentListIndex < listData.length - 1) { + setCurrentListIndex(currentListIndex + 1); + } else if (direction === 'prev' && currentListIndex > 0) { + setCurrentListIndex(currentListIndex - 1); + } + }; + + const navigateScreenshots = (direction: 'next' | 'prev') => { + if (direction === 'next' && currentScreenshotIndex < screenshotKeys.length - 1) { + setCurrentScreenshotIndex(currentScreenshotIndex + 1); + } else if (direction === 'prev' && currentScreenshotIndex > 0) { + setCurrentScreenshotIndex(currentScreenshotIndex - 1); + } + }; + + const renderDataTable = ( + data: any[], + columns: string[], + title: string, + csvFilename: string, + jsonFilename: string, + isPaginatedList: boolean = false + ) => { + if (!isPaginatedList && data.length === 0) return null; + if (isPaginatedList && (listData.length === 0 || currentListIndex >= listData.length)) return null; + + const currentData = isPaginatedList ? listData[currentListIndex] : data; + const currentColumns = isPaginatedList ? listColumns[currentListIndex] : columns; + + if (!currentData || currentData.length === 0) return null; + + return ( + + } + aria-controls={`${title.toLowerCase()}-content`} + id={`${title.toLowerCase()}-header`} + > + + + {title} + + + + + + + + + + + + {isPaginatedList && listData.length > 1 && ( + + + + + )} + + + + + + {(isPaginatedList ? currentColumns : columns).map((column) => ( + {column} + ))} + + + + {(isPaginatedList ? currentData : data).map((row, index) => ( + + {(isPaginatedList ? currentColumns : columns).map((column) => ( + + {row[column] === undefined || row[column] === "" ? "-" : row[column]} + + ))} + + ))} + +
+
+
+
+ ); }; + const hasData = schemaData.length > 0 || listData.length > 0 || legacyData.length > 0; + const hasScreenshots = row.binaryOutput && Object.keys(row.binaryOutput).length > 0; + return ( @@ -82,11 +385,9 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe onChange={(e, newTab) => setTab(newTab)} aria-label="run-content-tabs" sx={{ - // Remove the default blue indicator '& .MuiTabs-indicator': { - backgroundColor: '#FF00C3', // Change to pink + backgroundColor: '#FF00C3', }, - // Remove default transition effects '& .MuiTab-root': { '&.Mui-selected': { color: '#FF00C3', @@ -147,103 +448,149 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe {t('run_content.buttons.stop')} : null} - + {row.status === 'running' || row.status === 'queued' ? ( {t('run_content.loading')} - ) : (!row || !row.serializableOutput || !row.binaryOutput - || (Object.keys(row.serializableOutput).length === 0 && Object.keys(row.binaryOutput).length === 0) - ? {t('run_content.empty_output')} + ) : (!hasData && !hasScreenshots + ? {t('run_content.empty_output')} : null)} - {row.serializableOutput && - Object.keys(row.serializableOutput).length !== 0 && -
- - - {t('run_content.captured_data.title')} - - - - - {t('run_content.captured_data.download_json')} - - - - {t('run_content.captured_data.download_csv')} - - - {tableData.length > 0 ? ( - - - - - {columns.map((column) => ( - {column} - ))} - - - - {tableData.map((row, index) => ( - - {columns.map((column) => ( - - {row[column] === undefined || row[column] === "" ? "-" : row[column]} - - ))} - - ))} - -
-
- ) : ( - -
-                    {JSON.stringify(row.serializableOutput, null, 2)}
-                  
-
+ {hasData && ( + + {isLegacyData && ( + renderDataTable( + legacyData, + legacyColumns, + t('run_content.captured_data.title'), + 'data.csv', + 'data.json' + ) )} -
- } - {row.binaryOutput && Object.keys(row.binaryOutput).length !== 0 && -
- - - {t('run_content.captured_screenshot.title')} - - {Object.keys(row.binaryOutput).map((key) => { - try { - const imageUrl = row.binaryOutput[key]; - return ( - - - {t('run_content.captured_screenshot.download')} - - {key} + + {!isLegacyData && ( + <> + {renderDataTable( + schemaData, + schemaColumns, + t('run_content.captured_data.schema_title'), + 'schema_data.csv', + 'schema_data.json' + )} + + {listData.length > 0 && renderDataTable( + [], + [], + t('run_content.captured_data.list_title'), + 'list_data.csv', + 'list_data.json', + true + )} + + )} + + )} + + {hasScreenshots && ( + <> + + } + aria-controls="screenshot-content" + id="screenshot-header" + > + + + {t('run_content.captured_screenshot.title', 'Screenshots')} + + + + + + + + {screenshotKeys.length > 1 && ( + + + + + )} + + + + + {`Screenshot - ) - } catch (e) { - console.log(e) - return - {key}: {t('run_content.captured_screenshot.render_failed')} - - } - })} -
- } +
+ + + + )} diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index 42f9f95e4..1a8b8518d 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -6,6 +6,7 @@ import { emptyWorkflow } from '../shared/constants'; export type PaginationType = 'scrollDown' | 'scrollUp' | 'clickNext' | 'clickLoadMore' | 'none' | ''; export type LimitType = '10' | '100' | 'custom' | ''; export type CaptureStage = 'initial' | 'pagination' | 'limit' | 'complete' | ''; +export type ActionType = 'text' | 'list' | 'screenshot'; interface ActionContextProps { getText: boolean; @@ -19,18 +20,22 @@ interface ActionContextProps { customLimit: string; captureStage: CaptureStage; showPaginationOptions: boolean; - showLimitOptions: boolean; + showLimitOptions: boolean; + activeAction: 'none' | 'text' | 'list' | 'screenshot'; + setActiveAction: (action: 'none' | 'text' | 'list' | 'screenshot') => void; setWorkflow: (workflow: WorkflowFile) => void; setShowPaginationOptions: (show: boolean) => void; setShowLimitOptions: (show: boolean) => void; setCaptureStage: (stage: CaptureStage) => void; - startPaginationMode: () => void; + startAction: (action: 'text' | 'list' | 'screenshot') => void; + finishAction: (action: 'text' | 'list' | 'screenshot') => void; startGetText: () => void; stopGetText: () => void; startGetList: () => void; stopGetList: () => void; startGetScreenshot: () => void; stopGetScreenshot: () => void; + startPaginationMode: () => void; stopPaginationMode: () => void; updatePaginationType: (type: PaginationType) => void; startLimitMode: () => void; @@ -54,9 +59,45 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { const [captureStage, setCaptureStage] = useState('initial'); const [showPaginationOptions, setShowPaginationOptions] = useState(false); const [showLimitOptions, setShowLimitOptions] = useState(false); + const [activeAction, setActiveAction] = useState<'none' | 'text' | 'list' | 'screenshot'>('none'); const { socket } = useSocketStore(); + const startAction = (action: 'text' | 'list' | 'screenshot') => { + if (activeAction !== 'none') return; + + setActiveAction(action); + + if (action === 'text') { + setGetText(true); + } else if (action === 'list') { + setGetList(true); + socket?.emit('setGetList', { getList: true }); + setCaptureStage('initial'); + } else if (action === 'screenshot') { + setGetScreenshot(true); + } + }; + + const finishAction = (action: 'text' | 'list' | 'screenshot') => { + if (activeAction !== action) return; + + setActiveAction('none'); + + if (action === 'text') { + setGetText(false); + } else if (action === 'list') { + setGetList(false); + setPaginationType(''); + setLimitType(''); + setCustomLimit(''); + setCaptureStage('complete'); + socket?.emit('setGetList', { getList: false }); + } else if (action === 'screenshot') { + setGetScreenshot(false); + } + }; + const updatePaginationType = (type: PaginationType) => setPaginationType(type); const updateLimitType = (type: LimitType) => setLimitType(type); const updateCustomLimit = (limit: string) => setCustomLimit(limit); @@ -69,7 +110,7 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { }; const stopPaginationMode = () => { - setPaginationMode(false); + setPaginationMode(false), socket?.emit('setPaginationMode', { pagination: false }); }; @@ -80,15 +121,15 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { const stopLimitMode = () => setLimitMode(false); - const startGetText = () => setGetText(true); - const stopGetText = () => setGetText(false); - - const startGetList = () => { - setGetList(true); - socket?.emit('setGetList', { getList: true }); - setCaptureStage('initial'); - } - + const startGetText = () => startAction('text'); + + const stopGetText = () => { + setGetText(false); + setActiveAction('none'); + }; + + const startGetList = () => startAction('list'); + const stopGetList = () => { setGetList(false); socket?.emit('setGetList', { getList: false }); @@ -96,10 +137,15 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { setLimitType(''); setCustomLimit(''); setCaptureStage('complete'); + setActiveAction('none'); + }; + + const startGetScreenshot = () => startAction('screenshot'); + + const stopGetScreenshot = () => { + setGetScreenshot(false); + setActiveAction('none'); }; - - const startGetScreenshot = () => setGetScreenshot(true); - const stopGetScreenshot = () => setGetScreenshot(false); return ( { captureStage, showPaginationOptions, showLimitOptions, + activeAction, + setActiveAction, setWorkflow, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, + startAction, + finishAction, startGetText, stopGetText, startGetList, @@ -127,9 +177,9 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { stopGetScreenshot, startPaginationMode, stopPaginationMode, + updatePaginationType, startLimitMode, stopLimitMode, - updatePaginationType, updateLimitType, updateCustomLimit }}> @@ -144,4 +194,4 @@ export const useActionContext = () => { throw new Error('useActionContext must be used within an ActionProvider'); } return context; -}; +}; \ No newline at end of file