|
11 | 11 | # See the License for the specific language governing permissions and |
12 | 12 | # limitations under the License. |
13 | 13 | # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== |
14 | | -import datetime |
15 | | -import os |
16 | 14 | import re |
17 | 15 | import uuid |
18 | 16 | from typing import ( |
|
31 | 29 | BaseVectorStorage, |
32 | 30 | MilvusStorage, |
33 | 31 | QdrantStorage, |
34 | | - VectorDBQuery, |
35 | 32 | ) |
36 | 33 | from camel.types import StorageType |
37 | 34 | from camel.utils import Constants |
@@ -126,62 +123,6 @@ def _collection_name_generator( |
126 | 123 |
|
127 | 124 | return collection_name |
128 | 125 |
|
129 | | - def _get_file_modified_date_from_file( |
130 | | - self, content_input_path: str |
131 | | - ) -> str: |
132 | | - r"""Retrieves the last modified date and time of a given file. This |
133 | | - function takes a file path as input and returns the last modified date |
134 | | - and time of that file. |
135 | | -
|
136 | | - Args: |
137 | | - content_input_path (str): The file path of the content whose |
138 | | - modified date is to be retrieved. |
139 | | -
|
140 | | - Returns: |
141 | | - str: The last modified time from file. |
142 | | - """ |
143 | | - mod_time = os.path.getmtime(content_input_path) |
144 | | - readable_mod_time = datetime.datetime.fromtimestamp( |
145 | | - mod_time |
146 | | - ).isoformat(timespec='seconds') |
147 | | - return readable_mod_time |
148 | | - |
149 | | - def _get_file_modified_date_from_storage( |
150 | | - self, vector_storage_instance: BaseVectorStorage |
151 | | - ) -> str: |
152 | | - r"""Retrieves the last modified date and time of a given file. This |
153 | | - function takes vector storage instance as input and returns the last |
154 | | - modified date from the metadata. |
155 | | -
|
156 | | - Args: |
157 | | - vector_storage_instance (BaseVectorStorage): The vector storage |
158 | | - where modified date is to be retrieved from metadata. |
159 | | -
|
160 | | - Returns: |
161 | | - str: The last modified date from vector storage. |
162 | | - """ |
163 | | - |
164 | | - # Insert any query to get modified date from vector db |
165 | | - # NOTE: Can be optimized when CAMEL vector storage support |
166 | | - # direct chunk payload extraction |
167 | | - query_vector_any = self.embedding_model.embed(obj="any_query") |
168 | | - query_any = VectorDBQuery(query_vector_any, top_k=1) |
169 | | - result_any = vector_storage_instance.query(query_any) |
170 | | - |
171 | | - # Extract the file's last modified date from the metadata |
172 | | - # in the query result |
173 | | - if result_any[0].record.payload is not None: |
174 | | - file_modified_date_from_meta = result_any[0].record.payload[ |
175 | | - "metadata" |
176 | | - ]['last_modified'] |
177 | | - else: |
178 | | - raise ValueError( |
179 | | - "The vector storage exits but the payload is None," |
180 | | - "please check the collection" |
181 | | - ) |
182 | | - |
183 | | - return file_modified_date_from_meta |
184 | | - |
185 | 126 | def run_vector_retriever( |
186 | 127 | self, |
187 | 128 | query: str, |
@@ -246,34 +187,7 @@ def run_vector_retriever( |
246 | 187 | collection_name |
247 | 188 | ) |
248 | 189 |
|
249 | | - # Check the modified time of the input file path, only works |
250 | | - # for local path since no standard way for remote url |
251 | | - file_is_modified = False # initialize with a default value |
252 | | - if ( |
253 | | - vector_storage_instance.status().vector_count != 0 |
254 | | - and isinstance(content, str) |
255 | | - and os.path.exists(content) |
256 | | - ): |
257 | | - # Get original modified date from file |
258 | | - modified_date_from_file = ( |
259 | | - self._get_file_modified_date_from_file(content) |
260 | | - ) |
261 | | - # Get modified date from vector storage |
262 | | - modified_date_from_storage = ( |
263 | | - self._get_file_modified_date_from_storage( |
264 | | - vector_storage_instance |
265 | | - ) |
266 | | - ) |
267 | | - # Determine if the file has been modified since the last |
268 | | - # check |
269 | | - file_is_modified = ( |
270 | | - modified_date_from_file != modified_date_from_storage |
271 | | - ) |
272 | | - |
273 | | - if ( |
274 | | - vector_storage_instance.status().vector_count == 0 |
275 | | - or file_is_modified |
276 | | - ): |
| 190 | + if vector_storage_instance.status().vector_count == 0: |
277 | 191 | # Clear the vector storage |
278 | 192 | vector_storage_instance.clear() |
279 | 193 | # Process and store the content to the vector storage |
|
0 commit comments