Skip to content

Commit 6ce1eb7

Browse files
committed
feat: add create and drop knowledge index functions for improved database management
1 parent fc4a7df commit 6ce1eb7

File tree

4 files changed

+51
-2
lines changed

4 files changed

+51
-2
lines changed

apps/common/event/listener_manage.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from common.utils.page_utils import page_desc
2727
from knowledge.models import Paragraph, Status, Document, ProblemParagraphMapping, TaskType, State, SourceType, \
2828
SearchMode
29+
from knowledge.serializers.common import create_knowledge_index
2930
from maxkb.conf import (PROJECT_DIR)
3031

3132
lock = threading.Lock()
@@ -290,6 +291,8 @@ def is_the_task_interrupted():
290291
ListenerManagement.get_aggregation_document_status(
291292
document_id)),
292293
is_the_task_interrupted)
294+
# 检查是否存在索引
295+
create_knowledge_index(document_id=document_id)
293296
except Exception as e:
294297
maxkb_logger.error(_('Vectorized document: {document_id} error {error} {traceback}').format(
295298
document_id=document_id, error=str(e), traceback=traceback.format_exc()))

apps/knowledge/serializers/common.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818

1919
from common.config.embedding_config import ModelManage
2020
from common.db.search import native_search
21-
from common.db.sql_execute import update_execute
21+
from common.db.sql_execute import sql_execute, update_execute
2222
from common.exception.app_exception import AppApiException
2323
from common.utils.common import get_file_content
2424
from common.utils.fork import Fork
25+
from common.utils.logger import maxkb_logger
26+
from knowledge.models import Document
2527
from knowledge.models import Paragraph, Problem, ProblemParagraphMapping, Knowledge, File
2628
from maxkb.conf import PROJECT_DIR
2729
from models_provider.tools import get_model
@@ -220,3 +222,44 @@ def get_knowledge_operation_object(knowledge_id: str):
220222
"update_time": knowledge_model.update_time
221223
}
222224
return {}
225+
226+
227+
def create_knowledge_index(knowledge_id=None, document_id=None):
228+
if knowledge_id is None and document_id is None:
229+
raise AppApiException(500, _('Knowledge ID or Document ID must be provided'))
230+
231+
if knowledge_id is not None:
232+
k_id = knowledge_id
233+
else:
234+
document = QuerySet(Document).filter(id=document_id).first()
235+
k_id = document.knowledge_id
236+
237+
sql = f"SELECT indexname, indexdef FROM pg_indexes WHERE tablename = 'embedding' AND indexname = 'embedding_hnsw_idx_{k_id}'"
238+
index = sql_execute(sql, [])
239+
if not index:
240+
sql = f"SELECT vector_dims(embedding) AS dims FROM embedding WHERE knowledge_id = '{k_id}' LIMIT 1"
241+
result = sql_execute(sql, [])
242+
if len(result) == 0:
243+
return
244+
dims = result[0]['dims']
245+
sql = f"""CREATE INDEX "embedding_hnsw_idx_{k_id}" ON embedding USING hnsw ((embedding::vector({dims})) vector_l2_ops) WHERE knowledge_id = '{k_id}'"""
246+
update_execute(sql, [])
247+
maxkb_logger.info(f'Created index for knowledge ID: {k_id}')
248+
249+
250+
def drop_knowledge_index(knowledge_id=None, document_id=None):
251+
if knowledge_id is None and document_id is None:
252+
raise AppApiException(500, _('Knowledge ID or Document ID must be provided'))
253+
254+
if knowledge_id is not None:
255+
k_id = knowledge_id
256+
else:
257+
document = QuerySet(Document).filter(id=document_id).first()
258+
k_id = document.knowledge_id
259+
260+
sql = f"SELECT indexname, indexdef FROM pg_indexes WHERE tablename = 'embedding' AND indexname = 'embedding_hnsw_idx_{k_id}'"
261+
index = sql_execute(sql, [])
262+
if index:
263+
sql = f'DROP INDEX "embedding_hnsw_idx_{k_id}"'
264+
update_execute(sql, [])
265+
maxkb_logger.info(f'Dropped index for knowledge ID: {k_id}')

apps/knowledge/serializers/knowledge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from common.utils.split_model import get_split_model
3131
from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \
3232
ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder, File
33-
from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
33+
from knowledge.serializers.common import ProblemParagraphManage, drop_knowledge_index, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
3434
GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph, write_image, zip_dir
3535
from knowledge.serializers.document import DocumentSerializers
3636
from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge
@@ -418,6 +418,7 @@ def delete(self):
418418
QuerySet(Problem).filter(knowledge=knowledge).delete()
419419
QuerySet(WorkspaceUserResourcePermission).filter(target=knowledge.id).delete()
420420
QuerySet(ApplicationKnowledgeMapping).filter(knowledge_id=knowledge.id).delete()
421+
drop_knowledge_index(knowledge_id=knowledge.id)
421422
knowledge.delete()
422423
File.objects.filter(
423424
source_id=knowledge.id,

apps/knowledge/task/embedding.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
UpdateEmbeddingDocumentIdArgs
1414
from common.utils.logger import maxkb_logger
1515
from knowledge.models import Document, TaskType, State
16+
from knowledge.serializers.common import drop_knowledge_index
1617
from models_provider.tools import get_model
1718
from models_provider.models import Model
1819
from ops import celery_app
@@ -102,6 +103,7 @@ def embedding_by_knowledge(knowledge_id, model_id):
102103
maxkb_logger.info(_('Start--->Vectorized knowledge: {knowledge_id}').format(knowledge_id=knowledge_id))
103104
try:
104105
ListenerManagement.delete_embedding_by_knowledge(knowledge_id)
106+
drop_knowledge_index(knowledge_id=knowledge_id)
105107
document_list = QuerySet(Document).filter(knowledge_id=knowledge_id)
106108
maxkb_logger.info(_('Knowledge documentation: {document_names}').format(
107109
document_names=", ".join([d.name for d in document_list])))

0 commit comments

Comments
 (0)