77from typing import Dict
88
99import streamlit as st
10+ import yaml
1011from jvclient .lib .utils import call_api , get_reports_payload
1112from jvclient .lib .widgets import app_header , app_update_action
1213from streamlit_router import StreamlitRouter
@@ -21,6 +22,8 @@ def render(router: StreamlitRouter, agent_id: str, action_id: str, info: dict) -
2122 :param info: Additional information.
2223 """
2324 (model_key , module_root ) = app_header (agent_id , action_id , info )
25+ if "job_id_details" not in st .session_state :
26+ st .session_state .job_id_details = ""
2427
2528 # add documents section
2629 with st .expander ("Configure" , False ):
@@ -112,6 +115,11 @@ def render(router: StreamlitRouter, agent_id: str, action_id: str, info: dict) -
112115 value = True ,
113116 )
114117
118+ chunker_type = st .selectbox (
119+ "Chunker type" ,
120+ options = ["toc" , "hybrid" , "hierarchical" ],
121+ key = f"{ model_key } _chunker_type" ,
122+ )
115123 # Process inputs
116124 url_list = [url .strip () for url in doc_urls .split ("\n " ) if url .strip ()]
117125 metadata_list = []
@@ -149,6 +157,7 @@ def render(router: StreamlitRouter, agent_id: str, action_id: str, info: dict) -
149157 "to_page" : int (to_page ) if to_page is not None else 0 ,
150158 "lang" : str (lang ),
151159 "with_embeddings" : with_embeddings ,
160+ "chunker_type" : chunker_type ,
152161 }
153162
154163 # Add optional fields only if they exist
@@ -271,6 +280,105 @@ def get_status_badge(status: str) -> str:
271280 color = color_map .get (status , "gray" )
272281 return f"<span style='background-color: { color } ; color: white; padding: 2px 6px; border-radius: 4px;'>{ status } </span>"
273282
283+ with st .expander ("Export document" , False ):
284+ # Fetch documents with pagination parameters
285+ with_embeddings = st .toggle (
286+ "Export with Embeddings" , value = True , key = f"{ model_key } _with_embeddings"
287+ )
288+ result = call_api (
289+ endpoint = "action/walker/deepdoc_client_action/export_documents" ,
290+ json_data = {
291+ "agent_id" : agent_id ,
292+ "reporting" : True ,
293+ "with_embeddings" : with_embeddings ,
294+ },
295+ timeout = 120 ,
296+ )
297+
298+ if result and result .status_code == 200 :
299+ payload = get_reports_payload (result )
300+ if payload :
301+ st .download_button (
302+ label = "Download Documents" ,
303+ data = json .dumps (payload , indent = 2 , ensure_ascii = False ),
304+ file_name = "deepdoc_documents.json" ,
305+ mime = "application/json" ,
306+ )
307+ else :
308+ st .error ("No job ID returned from the API. Please try again." )
309+
310+ with st .expander ("Import document" , False ):
311+ knode_source = st .radio (
312+ "Choose data source:" ,
313+ ("Text input" , "Upload file" ),
314+ key = f"{ model_key } _knode_source" ,
315+ )
316+
317+ purge_collection = st .toggle (
318+ "Purge Collection" ,
319+ value = False ,
320+ key = f"{ model_key } _purge_collection" ,
321+ )
322+
323+ data_to_import = ""
324+ if knode_source == "Text input" :
325+ data_to_import = st .text_area (
326+ "Document in YAML or JSON" ,
327+ value = "" ,
328+ height = 170 ,
329+ key = f"{ model_key } _knode_data" ,
330+ )
331+
332+ uploaded_file = None
333+ if knode_source == "Upload file" :
334+ uploaded_file = st .file_uploader (
335+ "Upload file (YAML or JSON)" ,
336+ type = ["yaml" , "json" ],
337+ key = f"{ model_key } _document_upload" ,
338+ )
339+
340+ with_embeddings = st .toggle (
341+ "Import with Embeddings" ,
342+ value = True ,
343+ key = f"{ model_key } _import_embeddings" ,
344+ )
345+
346+ if st .button ("Import" , key = f"{ model_key } _btn_import_document" ):
347+ if uploaded_file :
348+ try :
349+ file_content = uploaded_file .read ().decode (
350+ "utf-8" , errors = "replace"
351+ )
352+ if uploaded_file .type == "application/json" :
353+ data_to_import = json .loads (file_content )
354+ else :
355+ data_to_import = yaml .safe_load (file_content )
356+ data_to_import = json .dumps (data_to_import , ensure_ascii = False )
357+ except Exception as e :
358+ st .error (f"Error loading file: { e } " )
359+
360+ if data_to_import :
361+ result = call_api (
362+ endpoint = "action/walker/deepdoc_client_action/import_documents" ,
363+ json_data = {
364+ "agent_id" : agent_id ,
365+ "data" : data_to_import ,
366+ "with_embeddings" : with_embeddings ,
367+ "purge" : purge_collection ,
368+ },
369+ )
370+
371+ if result :
372+ st .success ("Agent documents imported successfully" )
373+ else :
374+ st .error (
375+ "Failed to import document. Ensure valid YAML/JSON format."
376+ )
377+ else :
378+ st .error (
379+ "No data to import. Please provide valid text or upload a file."
380+ )
381+
274382 with st .expander ("Document List" , True ):
275383 # Initialize session state variables for pagination
276384 if "current_page" not in st .session_state :
@@ -493,13 +601,23 @@ def get_status_badge(status: str) -> str:
493601 if st .button ("No, Keep Job" ):
494602 st .session_state .confirm_state = {"active" : False }
495603 st .rerun ()
496- elif st .button ("Delete Job" , key = f"delete_job_{ job_id } " ):
497- st .session_state .confirm_state = {
498- "active" : True ,
499- "type" : "delete_job" ,
500- "job_id" : job_id ,
501- }
502- st .rerun ()
604+
605+ elif status == "COMPLETED" :
606+ col1 , col2 = st .columns (2 )
607+ with col1 :
608+ if st .button ("Delete Job" , key = f"delete_job_{ job_id } " ):
609+ st .session_state .confirm_state = {
610+ "active" : True ,
611+ "type" : "delete_job" ,
612+ "job_id" : job_id ,
613+ }
614+ st .rerun ()
615+ with col2 :
616+ if st .button ("View Job" , key = f"view_job_{ job_id } " ):
617+ st .session_state .current_page = 3
618+ st .session_state .job_id_details = job_id
619+ st .session_state .job_details = documents
620+ st .rerun ()
503621
504622 # Display each document in the job
505623 for document in documents :
@@ -618,9 +736,94 @@ def get_status_badge(status: str) -> str:
618736 time .sleep (5 )
619737 st .rerun ()
620738
621- else :
622- st .info (
623- "No documents found. Your uploaded documents will be shown here."
624- )
625- else :
626- st .info ("No documents found. Your uploaded documents will be shown here." )
739+ if st .session_state .job_id_details :
740+ st .write ("---" )
741+ st .write ("## Job Details" )
742+
743+ if "page" not in st .session_state [model_key ]:
744+ st .session_state [model_key ]["page" ] = 1
745+ if "per_page" not in st .session_state [model_key ]:
746+ st .session_state [model_key ]["per_page" ] = 10
747+
748+ # Items per page selection
749+ per_page_options = [10 , 20 , 30 , 50 , 100 ]
750+ new_per_page = st .selectbox (
751+ "Documents per page:" ,
752+ per_page_options ,
753+ index = per_page_options .index (st .session_state [model_key ]["per_page" ]),
754+ )
755+
756+ # Reset page if per_page changes
757+ if new_per_page != st .session_state [model_key ]["per_page" ]:
758+ st .session_state [model_key ]["per_page" ] = new_per_page
759+ st .session_state [model_key ]["page" ] = 1
760+ st .rerun ()
761+
762+ st .session_state [model_key ]["pages_input" ] = st .text_input (
763+ "Enter page numbers (comma or space separated):" ,
764+ value = "" , # optional default value
765+ placeholder = "e.g., 1,2,3" ,
766+ )
767+
768+ st .session_state [model_key ]["pages_input" ] = [
769+ p .strip ()
770+ for p in st .session_state [model_key ]["pages_input" ]
771+ .replace ("," , " " )
772+ .split ()
773+ if p .strip ().isdigit ()
774+ ]
775+ st .session_state [model_key ][
776+ "filter_by"
777+ ] = f'metadata.job_id:="{ st .session_state .job_id_details } "'
778+
779+ if st .session_state [model_key ]["pages_input" ]:
780+ st .session_state [model_key ][
781+ "filter_by"
782+ ] += f' && metadata.page:=[{ "," .join (st .session_state [model_key ]["pages_input" ])} ]'
783+
784+ params = {
785+ "page" : st .session_state [model_key ].get ("page" , 1 ),
786+ "per_page" : st .session_state [model_key ].get ("per_page" , 10 ),
787+ "filter_by" : st .session_state [model_key ]["filter_by" ],
788+ "agent_id" : agent_id ,
789+ }
790+
791+ response = call_api (
792+ endpoint = "action/walker/typesense_vector_store_action/list_documents" ,
793+ json_data = params ,
794+ )
795+
796+ if response and response .status_code == 200 :
797+ result = get_reports_payload (response )
798+ documents = result .get ("documents" , [])
799+
800+ for doc in documents :
801+ if doc ["metadata" ].get ("title" ):
802+ title = doc ["metadata" ]["title" ][0 ].strip ()
803+ else :
804+ title = doc ["text" ]
805+ title = title .split ("\n " )[0 ].strip ()
806+
807+ title = title [:40 ]
808+ page = doc ["metadata" ].get ("page" , "N/A" )
809+
810+ with st .expander (f"{ title } (Page { page } )" , expanded = False ):
811+
812+ st .write (doc ["text" ])
813+ st .write ("---" )
814+
815+ col1 , col2 = st .columns ([5 , 1 ]) # first column 5x width of second
816+ with col1 :
817+ st .markdown (f"**Page:** { page } " )
818+ with col2 :
819+ # Delete button
820+ if st .button ("Delete" , key = f"delete_{ doc ['id' ]} " ):
821+ args = {"id" : doc ["id" ], "agent_id" : agent_id }
822+ result = call_api (
823+ endpoint = "action/walker/typesense_vector_store_action/delete_document" ,
824+ json_data = args ,
825+ )
826+
827+ if result and result .status_code == 200 :
828+ get_reports_payload (result )
829+ st .rerun ()
0 commit comments