Skip to content

feat: add sitemap loader to admin frontend #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Jun 16, 2025
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e7c28ac
feat: add Confluence integration with configurable parameters for doc…
a-klos May 23, 2025
e495cc8
chore: update submodules to latest main
github-actions[bot] May 23, 2025
39748d8
feat: enable MinIO feature in Helm chart configuration and remove def…
a-klos May 23, 2025
d3fccec
Merge branch 'chore/adminfrontend' of github.com:stackitcloud/rag-tem…
a-klos May 23, 2025
38b9d91
feat: add pytest configuration and update testing setup across multip…
a-klos Jun 3, 2025
026a2df
Update frontend/libs/i18n/admin/en.json
a-klos Jun 4, 2025
5be7dfb
feat: enhance Confluence upload form with additional input fields and…
a-klos Jun 4, 2025
962fcc2
Merge branch 'chore/adminfrontend' of github.com:stackitcloud/rag-tem…
a-klos Jun 4, 2025
4955d73
Merge branch 'main' into chore/adminfrontend
a-klos Jun 4, 2025
1067071
feat: add sitemap upload functionality with configuration options and…
a-klos Jun 4, 2025
5702fa7
feat: add header template support for sitemap upload with JSON valida…
a-klos Jun 4, 2025
409dfb3
chore: update subproject commit reference in rag-core-library
a-klos Jun 4, 2025
7bbeef0
chore: update .gitignore to include todo files, remove unused Conflue…
a-klos Jun 5, 2025
aa4ff4f
chore: update subproject commit reference in rag-core-library
a-klos Jun 5, 2025
6c920bd
chore: update subproject commit reference in rag-core-library
a-klos Jun 5, 2025
916d74c
chore: update subproject commit reference in rag-core-library
a-klos Jun 5, 2025
f1c9cfd
chore: update submodules to latest main
github-actions[bot] Jun 5, 2025
ac8bcff
feat: update dependencies and add new packages
a-klos Jun 11, 2025
3a2a2ec
merge
a-klos Jun 11, 2025
ded5813
chore: update submodules to latest main
github-actions[bot] Jun 11, 2025
7973085
Update subproject commits for rag-core-library and rag-infrastructure
a-klos Jun 13, 2025
6da4864
No code changes detected.
a-klos Jun 13, 2025
ccf2927
chore: update submodules to latest main
github-actions[bot] Jun 13, 2025
02abfd1
chore: update subproject commit for rag-core-library
a-klos Jun 13, 2025
bff149a
Merge branch 'chore/adminfrontend-sitemap-loader' of github.com:stack…
a-klos Jun 13, 2025
917e71e
Update frontend/libs/admin-app/feature-document/DocumentUploadContain…
a-klos Jun 13, 2025
0a9e50a
Update frontend/libs/admin-app/data-access/+state/documents.store.ts
a-klos Jun 13, 2025
18e82e8
Update DocumentUploadContainer.vue
a-klos Jun 13, 2025
f5ac9e4
chore: update submodule
a-klos Jun 16, 2025
7435630
Merge branch 'chore/adminfrontend-sitemap-loader' of github.com:stack…
a-klos Jun 16, 2025
beae515
Merge branch 'main' into chore/adminfrontend-sitemap-loader
a-klos Jun 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ auth
# Node Modules
node_modules/


**/.notebooks
**/todo*.md

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
303 changes: 284 additions & 19 deletions admin-backend/poetry.lock

Large diffs are not rendered by default.

2,057 changes: 1,053 additions & 1,004 deletions document-extractor/poetry.lock

Large diffs are not rendered by default.

28 changes: 26 additions & 2 deletions frontend/libs/admin-app/data-access/+state/documents.store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ import { ref } from 'vue';
import { DocumentModel } from "../../models/document.model.ts";
import { ErrorType } from "../../models/error-type";
import { UploadedDocument, mapToUploadDocument } from "../../models/uploaded-document.model";
import { DocumentAPI, ConfluenceConfig } from "../document.api";
import { ConfluenceConfig, DocumentAPI, SitemapConfig } from "../document.api";

export const useDocumentsStore = defineStore('chat', () => {
const uploadedDocuments = ref<UploadedDocument[]>([]);
const allDocuments = ref<DocumentModel[]>();
const error = ref<ErrorType | null>(null);
const isLoadingConfluence = ref(false);
const isLoadingSitemap = ref(false);

function updateUploadedDocumentData(documentId: string, data: Partial<UploadedDocument>) {
const document = uploadedDocuments.value.find((d: UploadedDocument) => d.id === documentId);
Expand Down Expand Up @@ -75,6 +76,29 @@ export const useDocumentsStore = defineStore('chat', () => {
}
};

const loadSitemap = async (config: SitemapConfig) => {
isLoadingSitemap.value = true;
error.value = null;
try {
// provide sitemap configuration from frontend
await DocumentAPI.loadSitemap(config);
await loadDocuments(); // Refresh the document list after uploading
} catch(err) {
if (err.response && err.response.status === 501) {
error.value = "sitemap_not_configured";
console.error("Sitemap loader is not configured.");
} else if (err.response && err.response.status === 423) {
error.value = "sitemap_locked";
console.error("Sitemap loader returned a warning.");
} else {
error.value = "sitemap";
console.error(err);
}
} finally {
isLoadingSitemap.value = false;
}
};

const uploadDocuments = async (files: File[]) => {
try {
const uploads = files.map(uploadDocument);
Expand Down Expand Up @@ -103,5 +127,5 @@ export const useDocumentsStore = defineStore('chat', () => {
uploadedDocuments.value = uploadedDocuments.value.filter(o => o.id !== documentId);
};

return {removeUploadedDocument, uploadDocuments, loadDocuments, deleteDocument, loadConfluence, allDocuments, uploadedDocuments, error};
return {removeUploadedDocument, uploadDocuments, loadDocuments, deleteDocument, loadConfluence, loadSitemap, allDocuments, uploadedDocuments, error, isLoadingSitemap};
});
48 changes: 48 additions & 0 deletions frontend/libs/admin-app/data-access/document.api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ export interface ConfluenceConfig {
name: string;
}

// sitemap configuration interface
export interface SitemapConfig {
webPath: string;
filterUrls: string;
headerTemplate: string;
name: string;
}

export class DocumentAPI {
static async loadDocuments(): Promise<DocumentModel[]> {
try {
Expand Down Expand Up @@ -62,6 +70,46 @@ export class DocumentAPI {
}
}

static async loadSitemap(config: SitemapConfig): Promise<void> {
try {
// convert config to list of key/value items for backend
const payload = [
{ key: 'web_path', value: config.webPath }
];

// add filter_urls only if provided
if (config.filterUrls && config.filterUrls.trim()) {
// Convert multiline string to array and filter out empty lines
const filterUrlsArray = config.filterUrls
.split('\n')
.map(url => url.trim())
.filter(url => url.length > 0);

if (filterUrlsArray.length > 0) {
payload.push({ key: 'filter_urls', value: JSON.stringify(filterUrlsArray) });
}
}

// add header_template only if provided
if (config.headerTemplate && config.headerTemplate.trim()) {
try {
// Validate JSON format
JSON.parse(config.headerTemplate);
payload.push({ key: 'header_template', value: config.headerTemplate });
} catch (jsonError) {
throw new Error('Header template must be valid JSON format');
}
}

// include required query parameters
await axios.post<void>('/upload_source', payload, {
params: { source_type: 'sitemap', name: config.name }
});
} catch(error) {
this.handleError(error);
}
}

static async deleteDocument(documentId: string): Promise<void> {
try {
await axios.delete<void>(`/delete_document/${documentId}`);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ const fileInputRef = ref<HTMLInputElement>();
const uploadedDocuments = computed((): UploadedDocument[] => store.uploadedDocuments);
const isInvalidFileType = ref(false);
const allowedFileTypes = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'text/xml'];
const uploadMethod = ref<'file' | 'confluence'>('file');
const uploadMethod = ref<'file' | 'confluence' | 'sitemap'>('file');


// confluence configuration refs
Expand All @@ -23,6 +23,12 @@ const confluenceToken = ref('');
const confluenceUrl = ref('');
const maxPages = ref<number>();

// sitemap configuration refs
const sitemapName = ref('');
const sitemapWebPath = ref('');
const sitemapFilterUrls = ref('');
const sitemapHeaderTemplate = ref('');

const error = computed(() => store.error);

const uploadDocuments = (files: File[]) => {
Expand Down Expand Up @@ -73,6 +79,16 @@ const handleConfluenceUpload = () => {
});
}

const handleSitemapUpload = () => {
// send configured parameters to backend
store.loadSitemap({
name: sitemapName.value,
webPath: sitemapWebPath.value,
filterUrls: sitemapFilterUrls.value,
headerTemplate: sitemapHeaderTemplate.value
});
}

const clearError = () => {
store.error = null
}
Expand All @@ -91,6 +107,12 @@ const getErrorMessage = (errorType: string) => {
return t('documents.confluenceNotConfigured');
case 'confluence_locked':
return t('documents.confluenceLocked');
case 'sitemap':
return t('documents.sitemapError');
case 'sitemap_not_configured':
return t('documents.sitemapNotConfigured');
case 'sitemap_locked':
return t('documents.sitemapLocked');
default:
return t('documents.unknownError');
}
Expand Down Expand Up @@ -124,6 +146,10 @@ const getErrorMessage = (errorType: string) => {
@click="uploadMethod = 'confluence'">
{{ t('documents.confluenceUpload') }}
</a>
<a class="tab" :class="{'tab-active': uploadMethod === 'sitemap'}"
@click="uploadMethod = 'sitemap'">
{{ t('documents.sitemapUpload') }}
</a>
</div>

<!-- File upload area -->
Expand All @@ -144,7 +170,7 @@ const getErrorMessage = (errorType: string) => {
</div>

<!-- Confluence load area -->
<div v-else
<div v-else-if="uploadMethod === 'confluence'"
class="flex flex-col m-auto justify-center items-center w-full h-112 bg-base-100 rounded-box border border-base-300">
<div class="flex flex-col justify-center items-center pt-5 pb-6">
<GlobeAltIcon class="w-10 h-10 mb-4 text-accent-content" />
Expand All @@ -169,6 +195,30 @@ const getErrorMessage = (errorType: string) => {
</div>
</div>

<!-- Sitemap load area -->
<div v-else-if="uploadMethod === 'sitemap'"
class="flex flex-col m-auto justify-center items-center w-full h-112 bg-base-100 rounded-box border border-base-300">
<div class="flex flex-col justify-center items-center pt-5 pb-6">
<GlobeAltIcon class="w-10 h-10 mb-4 text-accent-content" />
<p class="mb-1 font-bold">{{ t('documents.sitemapLoadTitle') }}</p>
<!-- configuration inputs -->
<div class="space-y-2 mb-4 w-full max-w-sm">
<label for="sitemapName" class="sr-only">Sitemap Name</label>
<input id="sitemapName" v-model="sitemapName" type="text" placeholder="Name" class="input input-bordered w-full" required/>
<label for="sitemapWebPath" class="sr-only">Sitemap URL</label>
<input v-model="sitemapWebPath" type="url" placeholder="Sitemap URL (required)" class="input input-bordered w-full" required />
<label for="sitemapFilterUrls" class="sr-only">Filter URLs</label>
<textarea v-model="sitemapFilterUrls" placeholder="Filter URLs (optional) - one regex pattern per line" class="textarea textarea-bordered w-full" rows="3"></textarea>
<label for="sitemapHeaderTemplate" class="sr-only">Headers JSON</label>
<textarea v-model="sitemapHeaderTemplate" placeholder="Headers (optional) - JSON format: {&quot;Authorization&quot;: &quot;Bearer token&quot;}" class="textarea textarea-bordered w-full" rows="2"></textarea>
</div>
<p class="text-xs opacity-50 mb-4">{{ t('documents.sitemapLoadDescription') }}</p>
<button class="btn btn-sm btn-accent" @click="handleSitemapUpload">
{{ t('documents.loadSitemap') }}
</button>
</div>
</div>

<!-- Uploaded documents -->
<div class="mx-auto mt-4 w-full">
<div class="mb-4" v-for="uploadDocument in uploadedDocuments" :key="uploadDocument.id">
Expand Down
7 changes: 7 additions & 0 deletions frontend/libs/i18n/admin/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
"confluenceLoadTitle": "Load all Confluence pages from a space",
"confluenceLoadDescription": "Click the button below to load pages from Confluence",
"loadConfluence": "Load Confluence",
"sitemapUpload": "Sitemap",
"sitemapLoadTitle": "Load content from a sitemap",
"sitemapLoadDescription": "Enter a sitemap URL to extract and load content from all linked pages",
"loadSitemap": "Load Sitemap",
"select": "Select",
"chat": "Start chat",
"uploadDocumentFailed": "Upload failed",
Expand All @@ -25,6 +29,9 @@
"confluenceError": "Failed to load from Confluence",
"confluenceNotConfigured": "Confluence is not configured",
"confluenceLocked": "Confluence Loader is busy. Please try again later",
"sitemapError": "Failed to load from sitemap",
"sitemapNotConfigured": "Sitemap is not configured",
"sitemapLocked": "Sitemap Loader is busy. Please try again later",
"unknownError": "An unknown error occurred"
}
}
Loading
Loading