Skip to content

Commit 2be32de

Browse files
authored
tf-idf embeddings for search (#64)
* Adds deletion of tf-idf embeddings * comment
1 parent e69d0c7 commit 2be32de

File tree

2 files changed

+67
-40
lines changed

2 files changed

+67
-40
lines changed

upgrade_logic/business_objects/gateway.py

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,45 +12,8 @@
1212

1313

1414
def gateway_1_15_0() -> bool:
15-
# here, we update data for cognition using the gateway pattern
16-
# as the corresponding database updates (alembic) are managed using the refinery gateway it is
17-
# ensured that these updates are executed at the correct time
18-
__gateway_1_15_0_add_cognition_project_file_defaults()
19-
__gateway_1_15_0_add_cognition_conversation_file_defaults()
20-
__gateway_1_15_0_remove_cognition_step_type_relevance()
21-
return True
22-
23-
24-
def __gateway_1_15_0_add_cognition_project_file_defaults() -> bool:
25-
query = """
26-
UPDATE cognition.project
27-
SET max_file_size_mb = 3,
28-
allow_file_upload = FALSE
29-
WHERE max_file_size_mb IS NULL
30-
"""
31-
general.execute(query)
32-
general.commit()
33-
return True
34-
35-
36-
def __gateway_1_15_0_add_cognition_conversation_file_defaults() -> bool:
37-
query = """
38-
UPDATE cognition.conversation
39-
SET has_tmp_files = FALSE,
40-
archived = FALSE
41-
WHERE has_tmp_files IS NULL
42-
"""
43-
general.execute(query)
44-
general.commit()
45-
return True
46-
47-
48-
def __gateway_1_15_0_remove_cognition_step_type_relevance() -> bool:
49-
query = """
50-
DELETE FROM cognition.strategy_step WHERE step_type = 'RELEVANCE'
51-
"""
52-
general.execute(query)
53-
general.commit()
15+
# Note: A previous version had the previous update listed as v1.15.
16+
# That was false, the updates already ran through. This is now for the actual 1.15 release
5417
return True
5518

5619

@@ -60,6 +23,9 @@ def gateway_1_14_0() -> bool:
6023
# ensured that these updates are executed at the correct time
6124
gateway_1_14_0_add_cognition_project_state()
6225
gateway_1_14_0_add_cognition_strategy_complexity()
26+
__gateway_1_14_0_add_cognition_project_file_defaults()
27+
__gateway_1_14_0_add_cognition_conversation_file_defaults()
28+
__gateway_1_14_0_remove_cognition_step_type_relevance()
6329
return True
6430

6531

@@ -94,6 +60,39 @@ def gateway_1_14_0_add_cognition_strategy_complexity() -> bool:
9460
return True
9561

9662

63+
def __gateway_1_14_0_add_cognition_project_file_defaults() -> bool:
64+
query = """
65+
UPDATE cognition.project
66+
SET max_file_size_mb = 3,
67+
allow_file_upload = FALSE
68+
WHERE max_file_size_mb IS NULL
69+
"""
70+
general.execute(query)
71+
general.commit()
72+
return True
73+
74+
75+
def __gateway_1_14_0_add_cognition_conversation_file_defaults() -> bool:
76+
query = """
77+
UPDATE cognition.conversation
78+
SET has_tmp_files = FALSE,
79+
archived = FALSE
80+
WHERE has_tmp_files IS NULL
81+
"""
82+
general.execute(query)
83+
general.commit()
84+
return True
85+
86+
87+
def __gateway_1_14_0_remove_cognition_step_type_relevance() -> bool:
88+
query = """
89+
DELETE FROM cognition.strategy_step WHERE step_type = 'RELEVANCE'
90+
"""
91+
general.execute(query)
92+
general.commit()
93+
return True
94+
95+
9796
def gateway_1_10_1() -> bool:
9897
__gateway_1_10_1_add_additional_embedding_information()
9998
return True

upgrade_logic/business_objects/neural_search.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,40 @@
11
import os
22
import requests
33

4-
from submodules.model.business_objects import embedding
4+
from submodules.model.business_objects import embedding, general
55

66

77
NEURAL_SEARCH = os.getenv("NEURAL_SEARCH")
88

99

10+
def neural_search_1_15_0() -> bool:
11+
neural_search_1_15_0_delete_all_tf_idf_embeddings()
12+
return True
13+
14+
15+
def neural_search_1_15_0_delete_all_tf_idf_embeddings() -> bool:
16+
# previous tf-idf embeddings didn't do anything useful so we can just delete them
17+
# used fixed values instead of enum keys to ensure changes dont break
18+
query = "SELECT id FROM embedding WHERE platform = 'python' AND model = 'tf-idf'"
19+
embedding_ids = general.execute_all(query)
20+
if len(embedding_ids) > 0:
21+
url_delete = f"{NEURAL_SEARCH}/delete_collection"
22+
for embedding_id in embedding_ids:
23+
try:
24+
params = {"embedding_id": embedding_id[0]}
25+
requests.put(url_delete, params=params)
26+
except Exception as e:
27+
print(
28+
f"Error deleting tf-idf embedding {embedding_id[0]} from qdrant: {e}"
29+
)
30+
31+
# Note that tensors are deleted by cascading
32+
query = "DELETE FROM embedding WHERE platform = 'python' AND model = 'tf-idf'"
33+
general.execute(query)
34+
general.commit()
35+
return True
36+
37+
1038
def neural_search_1_12_0() -> bool:
1139
__neural_search_1_12_0_update_qdrant()
1240
return True

0 commit comments

Comments
 (0)