-
Couldn't load subscription status.
- Fork 461
Open
Description
a simple repro that produces simple lance dataset with two fragments
import pyarrow as pa
import lance
from lance.query import MatchQuery
import polars as pl
table1 = pa.Table.from_pydict({"text": list("bcdfghjkl")})
table2 = pa.Table.from_pydict({"text": list("123456")})
lance_ds = lance.write_dataset(table1, "/tmp/tt.lance", mode="overwrite")
lance_ds.insert(table2)
assert len(lance_ds.get_fragments()) == 2
lance_ds.create_scalar_index(
index_type="INVERTED",
column="text",
ascii_folding=False,
stem=False,
max_token_length=30,
base_tokenizer="simple",
with_position=False,
remove_stop_words=False,
replace=True
)
print(pl.from_arrow(lance_ds.scanner(full_text_query=MatchQuery("c", "text"),
fragments=[lance_ds.get_fragments()[1]],
scan_in_order=True,
limit=2, with_row_id=True).to_table()))however, lance_ds.get_fragments()[1] does not contain c !
Metadata
Metadata
Assignees
Labels
No labels
