@@ -140,6 +140,29 @@ def __init__(
140140
141141 log_debug (f"Initialized LanceDb with table: '{ self .table_name } '" )
142142
143+ def _prepare_vector (self , embedding ) -> List [float ]:
144+ """Prepare vector embedding for insertion, ensuring correct dimensions and type."""
145+ if embedding is not None :
146+ # Convert to list of floats
147+ vector = [float (x ) for x in embedding ]
148+
149+ # Ensure vector has correct dimensions if specified
150+ if self .dimensions :
151+ if len (vector ) != self .dimensions :
152+ if len (vector ) > self .dimensions :
153+ # Truncate if too long
154+ vector = vector [: self .dimensions ]
155+ log_debug (f"Truncated vector from { len (embedding )} to { self .dimensions } dimensions" )
156+ else :
157+ # Pad with zeros if too short
158+ vector .extend ([0.0 ] * (self .dimensions - len (vector )))
159+ log_debug (f"Padded vector from { len (embedding )} to { self .dimensions } dimensions" )
160+
161+ return vector
162+ else :
163+ # Fallback if embedding is None
164+ return [0.0 ] * (self .dimensions or 1536 )
165+
143166 async def _get_async_connection (self ) -> lancedb .AsyncConnection :
144167 """Get or create an async connection to LanceDB."""
145168 if self .async_connection is None :
@@ -195,9 +218,16 @@ async def async_create(self) -> None:
195218 raise
196219
197220 def _base_schema (self ) -> pa .Schema :
221+ # Use fixed-size list for vector field as required by LanceDB
222+ if self .dimensions :
223+ vector_field = pa .field (self ._vector_col , pa .list_ (pa .float32 (), self .dimensions ))
224+ else :
225+ # Fallback to dynamic list if dimensions not known (should be rare)
226+ vector_field = pa .field (self ._vector_col , pa .list_ (pa .float32 ()))
227+
198228 return pa .schema (
199229 [
200- pa . field ( self . _vector_col , pa . list_ ( pa . float32 ())) ,
230+ vector_field ,
201231 pa .field (self ._id , pa .string ()),
202232 pa .field ("payload" , pa .string ()),
203233 ]
@@ -286,9 +316,7 @@ def insert(self, content_hash: str, documents: List[Document], filters: Optional
286316 data .append (
287317 {
288318 "id" : doc_id ,
289- "vector" : [float (x ) for x in document .embedding ]
290- if document .embedding
291- else [0.0 ] * (self .dimensions or 1536 ),
319+ "vector" : self ._prepare_vector (document .embedding ),
292320 "payload" : json .dumps (payload ),
293321 }
294322 )
@@ -353,9 +381,7 @@ async def async_insert(
353381 data .append (
354382 {
355383 "id" : doc_id ,
356- "vector" : [float (x ) for x in document .embedding ]
357- if document .embedding
358- else [0.0 ] * (self .dimensions or 1536 ),
384+ "vector" : self ._prepare_vector (document .embedding ),
359385 "payload" : json .dumps (payload ),
360386 }
361387 )
@@ -670,26 +696,25 @@ async def async_get_count(self) -> int:
670696 return await self .async_table .count_rows ()
671697 return 0
672698
673- def _async_get_count_sync (self ) -> int :
674- """Helper method to run async_get_count in a new thread with its own event loop"""
675- import asyncio
676-
677- return asyncio .run (self .async_get_count ())
678-
679699 def get_count (self ) -> int :
680700 # If we have data in the async table but sync table isn't available, try to get count from async table
681701 if self .async_table is not None :
682702 try :
683703 import asyncio
684704
685- # Check if we're already in an async context
705+ # Check if we're already in an event loop
686706 try :
687- return self ._async_get_count_sync ()
707+ asyncio .get_running_loop ()
708+ # We're in an async context, can't use asyncio.run
709+ log_debug ("Already in async context, falling back to sync table for count" )
688710 except RuntimeError :
689711 # No event loop running, safe to use asyncio.run
690- return asyncio .run (self .async_get_count ())
691- except Exception :
692- pass
712+ try :
713+ return asyncio .run (self .async_get_count ())
714+ except Exception as e :
715+ log_debug (f"Failed to get async count: { e } " )
716+ except Exception as e :
717+ log_debug (f"Error in async count logic: { e } " )
693718
694719 if self .exists () and self .table :
695720 return self .table .count_rows ()
0 commit comments