Skip to content

Commit fe5d2eb

Browse files
committed
fixes
1 parent 6764aa7 commit fe5d2eb

File tree

3 files changed

+34
-61
lines changed

3 files changed

+34
-61
lines changed

main.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,18 @@ def main():
180180

181181
# Build or load index only if RAG directory is provided
182182
if args.rag_dir:
183+
# Validate RAG directory exists first
184+
rag_path = Path(args.rag_dir).expanduser()
185+
if not rag_path.exists():
186+
console.print(f"[bold red]Error: RAG directory does not exist: {args.rag_dir}[/]")
187+
sys.exit(1)
188+
if not rag_path.is_dir():
189+
console.print(f"[bold red]Error: RAG path is not a directory: {args.rag_dir}[/]")
190+
sys.exit(1)
191+
183192
try:
184193
index, store = build_or_load(
185-
Path(args.rag_dir).expanduser(),
194+
rag_path,
186195
args.embed_model,
187196
args.ollama_url,
188197
args.rebuild,

ollama-chat-party

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212

1313
set -e
1414

15-
# Parse arguments to find --rag-dir and --listen
15+
# Parse arguments to find --rag-dir
1616
DOCUMENTS_PATH=""
1717
VOLUME_ARG=""
18-
PORT_ARG=""
1918
NEW_ARGS=()
2019

2120
while [[ $# -gt 0 ]]; do
@@ -58,40 +57,6 @@ while [[ $# -gt 0 ]]; do
5857
echo "✓ Using documents from: $DOCUMENTS_PATH"
5958
shift 2
6059
;;
61-
--listen=*)
62-
# Handle --listen=host:port format
63-
LISTEN_ADDR="${1#*=}"
64-
if [ -z "$LISTEN_ADDR" ]; then
65-
echo "Error: --listen requires host:port format"
66-
exit 1
67-
fi
68-
# Extract port for Docker mapping (container always uses 0.0.0.0:8000)
69-
EXTERNAL_PORT="${LISTEN_ADDR#*:}"
70-
if [ "$EXTERNAL_PORT" != "8000" ]; then
71-
PORT_ARG="-p $EXTERNAL_PORT:8000"
72-
echo "✓ Mapping external port $EXTERNAL_PORT to container port 8000"
73-
fi
74-
# Always tell container to listen on 0.0.0.0:8000 internally
75-
NEW_ARGS+=("--listen=0.0.0.0:8000")
76-
shift
77-
;;
78-
--listen)
79-
# Handle --listen host:port format
80-
if [ -z "$2" ] || [[ "$2" == --* ]]; then
81-
echo "Error: --listen requires host:port format"
82-
exit 1
83-
fi
84-
LISTEN_ADDR="$2"
85-
# Extract port for Docker mapping (container always uses 0.0.0.0:8000)
86-
EXTERNAL_PORT="${LISTEN_ADDR#*:}"
87-
if [ "$EXTERNAL_PORT" != "8000" ]; then
88-
PORT_ARG="-p $EXTERNAL_PORT:8000"
89-
echo "✓ Mapping external port $EXTERNAL_PORT to container port 8000"
90-
fi
91-
# Always tell container to listen on 0.0.0.0:8000 internally
92-
NEW_ARGS+=("--listen" "0.0.0.0:8000")
93-
shift 2
94-
;;
9560
*)
9661
NEW_ARGS+=("$1")
9762
shift
@@ -109,15 +74,8 @@ if [ -f ".env" ]; then
10974
echo "✓ Using .env file"
11075
fi
11176

112-
# Use host networking if no custom port, otherwise use port mapping
113-
if [ -z "$PORT_ARG" ]; then
114-
NETWORK_ARG="--network host"
115-
else
116-
NETWORK_ARG="$PORT_ARG"
117-
fi
118-
11977
docker run -it --rm \
120-
$NETWORK_ARG \
78+
--network host \
12179
$VOLUME_ARG \
12280
$ENV_FILE_ARG \
12381
psyb0t/ollama-chat-party "${NEW_ARGS[@]}"

rag.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -81,26 +81,32 @@ def ollama_embed(
8181
def chunk_text(text: str, size: int = CHUNK_CHARS) -> List[str]:
8282
"""Split text into chunks of specified size"""
8383
text = re.sub(r"\s+", " ", text)
84-
return [text[i : i + size] for i in range(0, len(text), size)]
84+
return [text[i:i + size] for i in range(0, len(text), size)]
8585

8686

8787
def scan_docs(root: Path) -> List[Tuple[str, str]]:
8888
"""Scan directory for supported documents and read their content"""
89-
# First, count only supported files (exclude database files)
90-
console.print("🔍 [dim]Counting files...[/]")
91-
all_files = [p for p in root.rglob("*") if p.is_file()]
92-
93-
# Filter out database files and only include supported extensions
94-
files: List[Path] = []
95-
for p in all_files:
96-
# Skip database files
97-
if p.name in ["faiss_index.bin", "doc_store.json"]:
98-
continue
99-
# Only include supported file types
100-
if p.suffix.lower() in READERS:
101-
files.append(p)
89+
# Find supported files directly (much faster!)
90+
with Progress(
91+
SpinnerColumn(),
92+
TextColumn("[progress.description]{task.description}"),
93+
console=console,
94+
) as prog:
95+
count_task = prog.add_task("🔍 Counting files...")
96+
97+
files: List[Path] = []
98+
99+
# Only glob for supported file extensions
100+
for ext in READERS.keys():
101+
pattern = f"**/*{ext}"
102+
for p in root.rglob(pattern):
103+
if p.is_file() and p.name not in ["faiss_index.bin", "doc_store.json"]:
104+
files.append(p)
105+
106+
prog.update(count_task, completed=True)
102107

103108
total_files = len(files)
109+
console.print(f"📊 [bold green]Found {total_files} supported documents[/]")
104110

105111
docs: List[Tuple[str, str]] = []
106112
files_processed = 0
@@ -141,7 +147,7 @@ def scan_docs(root: Path) -> List[Tuple[str, str]]:
141147
prog.update(
142148
task,
143149
current_file="✅ Complete!",
144-
description=f"📄 Found {len(docs)} documents",
150+
description="📄 Document scanning complete",
145151
)
146152
return docs
147153

@@ -208,7 +214,7 @@ def build_or_load(
208214
task = prog.add_task("🔮 Generating embeddings", total=len(chunks))
209215
for i in range(0, len(chunks), batch_size):
210216
current_batch_size = min(batch_size, len(chunks) - i)
211-
vecs = ollama_embed(chunks[i : i + current_batch_size], embed_model, url)
217+
vecs = ollama_embed(chunks[i:i + current_batch_size], embed_model, url)
212218
index.add(vecs) # type: ignore
213219
prog.advance(task, current_batch_size)
214220

0 commit comments

Comments
 (0)