fixes

psyb0t · psyb0t · commit fe5d2eb356e0 · 2025-07-24T08:06:40.000+03:00
diff --git a/main.py b/main.py
@@ -180,9 +180,18 @@ def main():
 
     # Build or load index only if RAG directory is provided
     if args.rag_dir:
+        # Validate RAG directory exists first
+        rag_path = Path(args.rag_dir).expanduser()
+        if not rag_path.exists():
+            console.print(f"[bold red]Error: RAG directory does not exist: {args.rag_dir}[/]")
+            sys.exit(1)
+        if not rag_path.is_dir():
+            console.print(f"[bold red]Error: RAG path is not a directory: {args.rag_dir}[/]")
+            sys.exit(1)
+            
         try:
             index, store = build_or_load(
-                Path(args.rag_dir).expanduser(),
+                rag_path,
                 args.embed_model,
                 args.ollama_url,
                 args.rebuild,
diff --git a/ollama-chat-party b/ollama-chat-party
@@ -12,10 +12,9 @@
 
 set -e
 
-# Parse arguments to find --rag-dir and --listen
+# Parse arguments to find --rag-dir  
 DOCUMENTS_PATH=""
 VOLUME_ARG=""
-PORT_ARG=""
 NEW_ARGS=()
 
 while [[ $# -gt 0 ]]; do
@@ -58,40 +57,6 @@ while [[ $# -gt 0 ]]; do
             echo "✓ Using documents from: $DOCUMENTS_PATH"
             shift 2
             ;;
-        --listen=*)
-            # Handle --listen=host:port format
-            LISTEN_ADDR="${1#*=}"
-            if [ -z "$LISTEN_ADDR" ]; then
-                echo "Error: --listen requires host:port format"
-                exit 1
-            fi
-            # Extract port for Docker mapping (container always uses 0.0.0.0:8000)
-            EXTERNAL_PORT="${LISTEN_ADDR#*:}"
-            if [ "$EXTERNAL_PORT" != "8000" ]; then
-                PORT_ARG="-p $EXTERNAL_PORT:8000"
-                echo "✓ Mapping external port $EXTERNAL_PORT to container port 8000"
-            fi
-            # Always tell container to listen on 0.0.0.0:8000 internally
-            NEW_ARGS+=("--listen=0.0.0.0:8000")
-            shift
-            ;;
-        --listen)
-            # Handle --listen host:port format
-            if [ -z "$2" ] || [[ "$2" == --* ]]; then
-                echo "Error: --listen requires host:port format"
-                exit 1
-            fi
-            LISTEN_ADDR="$2"
-            # Extract port for Docker mapping (container always uses 0.0.0.0:8000)
-            EXTERNAL_PORT="${LISTEN_ADDR#*:}"
-            if [ "$EXTERNAL_PORT" != "8000" ]; then
-                PORT_ARG="-p $EXTERNAL_PORT:8000"
-                echo "✓ Mapping external port $EXTERNAL_PORT to container port 8000"
-            fi
-            # Always tell container to listen on 0.0.0.0:8000 internally
-            NEW_ARGS+=("--listen" "0.0.0.0:8000")
-            shift 2
-            ;;
         *)
             NEW_ARGS+=("$1")
             shift
@@ -109,15 +74,8 @@ if [ -f ".env" ]; then
     echo "✓ Using .env file"
 fi
 
-# Use host networking if no custom port, otherwise use port mapping
-if [ -z "$PORT_ARG" ]; then
-    NETWORK_ARG="--network host"
-else
-    NETWORK_ARG="$PORT_ARG"
-fi
-
 docker run -it --rm \
-    $NETWORK_ARG \
+    --network host \
     $VOLUME_ARG \
     $ENV_FILE_ARG \
     psyb0t/ollama-chat-party "${NEW_ARGS[@]}"
diff --git a/rag.py b/rag.py
@@ -81,26 +81,32 @@ def ollama_embed(
 def chunk_text(text: str, size: int = CHUNK_CHARS) -> List[str]:
     """Split text into chunks of specified size"""
     text = re.sub(r"\s+", " ", text)
-    return [text[i : i + size] for i in range(0, len(text), size)]
+    return [text[i:i + size] for i in range(0, len(text), size)]
 
 
 def scan_docs(root: Path) -> List[Tuple[str, str]]:
     """Scan directory for supported documents and read their content"""
-    # First, count only supported files (exclude database files)
-    console.print("🔍 [dim]Counting files...[/]")
-    all_files = [p for p in root.rglob("*") if p.is_file()]
-
-    # Filter out database files and only include supported extensions
-    files: List[Path] = []
-    for p in all_files:
-        # Skip database files
-        if p.name in ["faiss_index.bin", "doc_store.json"]:
-            continue
-        # Only include supported file types
-        if p.suffix.lower() in READERS:
-            files.append(p)
+    # Find supported files directly (much faster!)
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        console=console,
+    ) as prog:
+        count_task = prog.add_task("🔍 Counting files...")
+
+        files: List[Path] = []
+
+        # Only glob for supported file extensions
+        for ext in READERS.keys():
+            pattern = f"**/*{ext}"
+            for p in root.rglob(pattern):
+                if p.is_file() and p.name not in ["faiss_index.bin", "doc_store.json"]:
+                    files.append(p)
+
+        prog.update(count_task, completed=True)
 
     total_files = len(files)
+    console.print(f"📊 [bold green]Found {total_files} supported documents[/]")
 
     docs: List[Tuple[str, str]] = []
     files_processed = 0
@@ -141,7 +147,7 @@ def scan_docs(root: Path) -> List[Tuple[str, str]]:
         prog.update(
             task,
             current_file="✅ Complete!",
-            description=f"📄 Found {len(docs)} documents",
+            description="📄 Document scanning complete",
         )
     return docs
 
@@ -208,7 +214,7 @@ def build_or_load(
         task = prog.add_task("🔮 Generating embeddings", total=len(chunks))
         for i in range(0, len(chunks), batch_size):
             current_batch_size = min(batch_size, len(chunks) - i)
-            vecs = ollama_embed(chunks[i : i + current_batch_size], embed_model, url)
+            vecs = ollama_embed(chunks[i:i + current_batch_size], embed_model, url)
             index.add(vecs)  # type: ignore
             prog.advance(task, current_batch_size)