Skip to content

[DNM] Support TIFLASH_REPLICA option when creating table #69

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 148 additions & 1 deletion tests/sqlalchemy/test_sqlalchemy.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from locale import normalize
import pytest
import numpy as np
import sqlalchemy
from sqlalchemy import URL, create_engine, Column, Integer, select
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.exc import OperationalError
from tidb_vector.sqlalchemy import VectorType, VectorAdaptor
from tidb_vector.sqlalchemy import VectorType, VectorAdaptor, VectorIndex
import tidb_vector
from ..config import TestConfig

Expand Down Expand Up @@ -385,3 +387,148 @@ def test_index_and_search(self):
)
assert len(items) == 2
assert items[0].distance == 0.0


class Item3Model(Base):
__tablename__ = "sqlalchemy_item3"
id = Column(Integer, primary_key=True)
embedding = Column(VectorType(dim=3))

__table_args__ = {
"mysql_tiflash_replica": "1",
}


class TestSQLAlchemyVectorIndex:

def setup_class(self):
Item2Model.__table__.drop(bind=engine, checkfirst=True)
Item2Model.__table__.create(bind=engine)

def teardown_class(self):
Item2Model.__table__.drop(bind=engine, checkfirst=True)

def test_create_table_statement(self):
# Define a table using `sqlalchemy.schema.Table`
tbl = sqlalchemy.schema.Table(
'mytable',
Base.metadata,
Column('id', Integer),
mysql_tiflash_replica='1',
)
compiled = CreateTable(tbl).compile(dialect=engine.dialect)
normalized = compiled.string.replace("\n", "").replace("\t", "").strip()
assert normalized == "CREATE TABLE mytable (id INTEGER)"

# Define a table with tiflash replica using `sqlalchemy.schema.Table`
tbl = sqlalchemy.schema.Table(
'mytable',
Base.metadata,
Column('id', Integer),
mysql_tiflash_replica='1',
)
from sqlalchemy.sql.ddl import CreateTable
compiled = CreateTable(tbl).compile(dialect=engine.dialect)
normalized = compiled.string.replace("\n", "").replace("\t", "").strip()
assert normalized == "CREATE TABLE mytable (id INTEGER)TIFLASH_REPLICA=1"

# Define a table inheriting from `Base`
class TableModel(Base):
__tablename__ = "test_tbl"
id = Column(Integer, primary_key=True)
embedding = Column(VectorType(dim=3))
compiled = CreateTable(TableModel.__table__).compile(dialect=engine.dialect)
normalized = compiled.string.replace("\n", "").replace("\t", "").strip()
assert normalized == "CREATE TABLE test_tbl (id INTEGER NOT NULL AUTO_INCREMENT, embedding VECTOR(3), PRIMARY KEY (id))"

# Define a table inheriting from `Base` with tiflash replica using `__table_args__`
class TableModel(Base):
__tablename__ = "test_tbl"
id = Column(Integer, primary_key=True)
embedding = Column(VectorType(dim=3))
__table_args__ = {
"mysql_tiflash_replica": "1",
}
compiled = CreateTable(TableModel.__table__).compile(dialect=engine.dialect)
normalized = compiled.string.replace("\n", "").replace("\t", "").strip()
assert normalized == "CREATE TABLE test_tbl (id INTEGER NOT NULL AUTO_INCREMENT, embedding VECTOR(3), PRIMARY KEY (id))TIFLASH_REPLICA=1"

def test_create_vector_index_statement(self):
from sqlalchemy.sql.ddl import CreateIndex
l2_index = VectorIndex(
"idx_embedding_l2",
sqlalchemy.func.vec_l2_distance(Item3Model.__table__.c.embedding),
)
compiled = CreateIndex(l2_index).compile(dialect=engine.dialect)
assert compiled.string == "CREATE VECTOR INDEX idx_embedding_l2 ON sqlalchemy_item2 ((vec_l2_distance(embedding)))"

cos_index = VectorIndex(
"idx_embedding_cos",
sqlalchemy.func.vec_cosine_distance(Item3Model.__table__.c.embedding),
)
compiled = CreateIndex(cos_index).compile(dialect=engine.dialect)
assert compiled.string == "CREATE VECTOR INDEX idx_embedding_cos ON sqlalchemy_item2 ((vec_cosine_distance(embedding)))"

# non-vector index
normal_index = sqlalchemy.schema.Index("idx_unique", Item3Model.__table__.c.id, unique=True)
compiled = CreateIndex(normal_index).compile(dialect=engine.dialect)
assert compiled.string == "CREATE UNIQUE INDEX idx_unique ON sqlalchemy_item2 (id)"

def test_query_with_index(self):
# indexes
l2_index = VectorIndex(
"idx_embedding_l2",
sqlalchemy.func.vec_l2_distance(Item3Model.__table__.c.embedding),
)
l2_index.create(engine)
cos_index = VectorIndex(
"idx_embedding_cos",
sqlalchemy.func.vec_cosine_distance(Item3Model.__table__.c.embedding),
)
cos_index.create(engine)

with Session() as session:
session.add_all(
[Item3Model(embedding=[1, 2, 3]), Item3Model(embedding=[1, 2, 3.2])]
)
session.commit()

# l2 distance
result_l2 = session.scalars(
select(Item3Model).filter(
Item3Model.embedding.l2_distance([1, 2, 3.1]) < 0.2
)
).all()
assert len(result_l2) == 2

distance_l2 = Item3Model.embedding.l2_distance([1, 2, 3])
items_l2 = (
session.query(Item3Model.id, distance_l2.label("distance"))
.order_by(distance_l2)
.limit(5)
.all()
)
assert len(items_l2) == 2
assert items_l2[0].distance == 0.0

# cosine distance
result_cos = session.scalars(
select(Item3Model).filter(
Item3Model.embedding.cosine_distance([1, 2, 3.1]) < 0.2
)
).all()
assert len(result_cos) == 2

distance_cos = Item3Model.embedding.cosine_distance([1, 2, 3])
items_cos = (
session.query(Item3Model.id, distance_cos.label("distance"))
.order_by(distance_cos)
.limit(5)
.all()
)
assert len(items_cos) == 2
assert items_cos[0].distance == 0.0

# drop indexes
l2_index.drop(engine)
cos_index.drop(engine)
3 changes: 2 additions & 1 deletion tidb_vector/sqlalchemy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .vector_type import VectorType
from .adaptor import VectorAdaptor
from .index import VectorIndex

__all__ = ["VectorType", "VectorAdaptor"]
__all__ = ["VectorType", "VectorAdaptor", "VectorIndex"]
17 changes: 17 additions & 0 deletions tidb_vector/sqlalchemy/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Optional, Any

import sqlalchemy

from sqlalchemy.ext.compiler import compiles
from sqlalchemy.schema import Index

class VectorIndex(Index):
def __init__(
self,
name: Optional[str],
*expressions, # _DDLColumnArgument
_table: Optional[Any] = None,
**dialect_kw: Any,
):
super().__init__(name, *expressions, unique=False, _table=_table, **dialect_kw)
self.dialect_options["mysql"]["prefix"] = "VECTOR"
Loading