Sync Python SDK v0.4.1

haoshan98 · haoshan98 · commit 7d761ce4e340 · 2025-02-25T15:12:07.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 Python SDK - jamaibase
 
 - Add `CodeGenConfig` for python code execution #446
+- Remove pydub dependency for SDK `v0.4.1` #488
 
 TS SDK - jamaibase
 
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -101,7 +101,6 @@ dependencies = [
     "Pillow>=10.0.1",
     "pydantic-settings>=2.0.3",
     "pydantic>=2.4.2",
-    "pydub~=0.25.1",
     "srsly>=2.4.8",
     "toml>=0.10.2",
     "typing_extensions>=4.10.0",
diff --git a/clients/python/src/jamaibase/utils/io.py b/clients/python/src/jamaibase/utils/io.py
@@ -12,7 +12,6 @@
 import srsly
 import toml
 from PIL import ExifTags, Image
-from pydub import AudioSegment
 
 from jamaibase.utils.types import JSONInput, JSONOutput
 
@@ -215,6 +214,8 @@ def generate_audio_thumbnail(file_content: bytes, duration_ms: int = 30000) -> b
     Returns:
         bytes: The thumbnail audio segment as bytes.
     """
+    from pydub import AudioSegment
+
     # Use BytesIO to simulate a file object from the byte content
     audio = AudioSegment.from_file(BytesIO(file_content))
 
diff --git a/clients/python/src/jamaibase/version.py b/clients/python/src/jamaibase/version.py
@@ -1 +1 @@
-__version__ = "0.4.0"
+__version__ = "0.4.1"
diff --git a/clients/python/tests/oss/gen_table/test_export_ops.py b/clients/python/tests/oss/gen_table/test_export_ops.py
@@ -966,6 +966,82 @@ def test_export_data(
                     assert k not in row
 
 
+@flaky(max_runs=5, min_passes=1, rerun_filter=_rerun_on_fs_error_with_delay)
+@pytest.mark.parametrize("client_cls", CLIENT_CLS)
+@pytest.mark.parametrize("table_type", TABLE_TYPES)
+@pytest.mark.parametrize("delimiter", [","], ids=["comma_delimiter"])
+def test_export_reordered_columns_data(
+    client_cls: Type[JamAI],
+    table_type: p.TableType,
+    delimiter: str,
+):
+    jamai = client_cls()
+    with _create_table(jamai, table_type) as table:
+        assert isinstance(table, p.TableMetaResponse)
+        _add_row(
+            jamai,
+            table_type,
+            False,
+            data={"good": True, "words": 5, "stars": 0.0, "inputs": TEXT, "summary": TEXT},
+        )
+        # Reorder columns
+        new_cols_order = [
+            "words",
+            "inputs",
+            "summary",
+            "stars",
+            "photo",
+            "captioning",
+            "good",
+        ]
+        if table_type == p.TableType.knowledge:
+            new_cols_order = [
+                "Title",
+                "Title Embed",
+                "Text",
+                "Text Embed",
+                "File ID",
+                "Page",
+            ] + new_cols_order
+        if table_type == p.TableType.chat:
+            new_cols_order = ["User", "AI"] + new_cols_order
+
+        jamai.table.reorder_columns(
+            table_type=table_type,
+            request=p.ColumnReorderRequest(
+                table_id=TABLE_ID_A,
+                column_names=new_cols_order,
+            ),
+        )
+        reordered_columns = [
+            col_meta.id
+            for col_meta in jamai.table.get_table(table_type=table_type, table_id=TABLE_ID_A).cols
+        ]
+
+        # Export data
+        csv_data = jamai.export_table_data(table_type, TABLE_ID_A, delimiter=delimiter)
+        csv_df = csv_to_df(csv_data.decode("utf-8"), sep=delimiter)
+        exported_columns = list(csv_df.columns)
+
+        assert exported_columns == reordered_columns, (
+            f"Exported Columns: {','.join(exported_columns)}, "
+            f"Reordered Columns: {','.join(reordered_columns)}"
+        )
+
+        # Export selected columns
+        csv_data = jamai.export_table_data(
+            table_type, TABLE_ID_A, delimiter=delimiter, columns=["good", "words", "summary"]
+        )
+        csv_df = csv_to_df(csv_data.decode("utf-8"), sep=delimiter)
+        selected_exported_columns = list(csv_df.columns)
+        selected_reordered_columns = ["ID", "Updated at", "words", "summary", "good"]
+
+        assert selected_exported_columns == selected_reordered_columns, (
+            f"Exported Columns: {','.join(selected_exported_columns)}, "
+            f"Reordered Columns: {','.join(selected_reordered_columns)}"
+        )
+
+
 @flaky(max_runs=5, min_passes=1, rerun_filter=_rerun_on_fs_error_with_delay)
 @pytest.mark.parametrize("client_cls", CLIENT_CLS)
 @pytest.mark.parametrize("table_type", TABLE_TYPES)
diff --git a/docker/Dockerfile.frontend b/docker/Dockerfile.frontend
@@ -8,7 +8,7 @@ ARG CHECK_ORIGIN=false
 WORKDIR /app
 COPY ./services/app .
 RUN mv .env.example .env
-RUN npm ci
+RUN npm ci --force
 
 RUN JAMAI_URL=${JAMAI_URL} PUBLIC_JAMAI_URL=${PUBLIC_JAMAI_URL} PUBLIC_IS_SPA=${PUBLIC_IS_SPA} CHECK_ORIGIN=${CHECK_ORIGIN} npx vite build
 RUN mv temp build
diff --git a/services/api/src/owl/db/gen_table.py b/services/api/src/owl/db/gen_table.py
@@ -1036,7 +1036,12 @@ def list_rows(
                 limit = total - offset
             if order_descending:
                 offset = max(0, total - limit - offset)
-            rows = table._dataset.to_table(offset=offset, limit=limit).to_pylist()
+            if columns is not None:
+                if "ID" not in columns:
+                    columns.insert(0, "ID")
+                if "Updated at" not in columns:
+                    columns.insert(1, "Updated at")
+            rows = table._dataset.to_table(columns=columns, offset=offset, limit=limit).to_pylist()
             rows = sorted(rows, reverse=order_descending, key=lambda r: r["ID"])
             rows = self._post_process_rows(
                 rows,
@@ -1170,7 +1175,7 @@ def get_conversation_thread(
     def export_csv(
         self,
         table_id: TableName,
-        columns: list[ColName] | None = None,
+        columns: list[ColName],
         file_path: str = "",
         delimiter: CSVDelimiter | str = ",",
     ) -> pd.DataFrame:
diff --git a/services/api/src/owl/db/gen_table_v2.py b/services/api/src/owl/db/gen_table_v2.py
@@ -0,0 +1,126 @@
+from typing import Self
+
+import numpy as np
+
+
+class GenerativeTableCore:
+    ### --- Table CRUD --- ###
+
+    # Create
+    @classmethod
+    async def create_table(cls, table_id: str) -> Self:
+        pass
+
+    @classmethod
+    async def duplicate_table(cls, table_id: str) -> Self:
+        pass
+
+    # Read
+    @classmethod
+    async def list_tables(cls, table_id: str) -> list[Self]:
+        pass
+
+    @classmethod
+    async def get_table(cls, table_id: str) -> Self:
+        pass
+
+    async def count_rows(self):
+        pass
+
+    # Update
+    async def rename_table(self):
+        pass
+
+    async def recreate_fts_index(self):
+        # Optional
+        pass
+
+    async def recreate_vector_index(self):
+        # Optional
+        pass
+
+    async def drop_fts_index(self):
+        # Optional
+        pass
+
+    async def drop_vector_index(self):
+        # Optional
+        pass
+
+    # Delete
+    async def drop_table(self):
+        pass
+
+    # Import Export
+    async def export_table(self):
+        pass
+
+    async def import_table(self):
+        pass
+
+    async def export_data(self):
+        pass
+
+    async def import_data(self):
+        pass
+
+    ### --- Column CRUD --- ###
+
+    # Create
+    async def add_column(self):
+        pass
+
+    # Read ops are implemented as table ops
+    # Update
+    async def update_gen_config(self):
+        pass
+
+    async def rename_column(self):
+        pass
+
+    async def reorder_columns(self):
+        # Need to ensure that length of new order list matches the number of columns
+        pass
+
+    # Delete
+    async def drop_column(self):
+        pass
+
+    ### --- Row CRUD --- ###
+
+    # Create
+    async def add_row(self):
+        pass
+
+    async def add_rows(self):
+        # Optional, if batch operation is supported
+        pass
+
+    # Read
+    async def list_rows(self):
+        pass
+
+    async def get_row(self):
+        pass
+
+    async def fts_search(self, query: str):
+        pass
+
+    async def vector_search(self, query: list[float] | np.ndarray):
+        pass
+
+    # Update
+    async def update_row(self):
+        pass
+
+    async def update_rows(self):
+        # Optional, if batch operation is supported
+        pass
+
+    # Delete
+    async def delete_row(self):
+        pass
+
+    async def delete_rows(self):
+        # Optional, if batch operation is supported
+        pass
diff --git a/services/api/src/owl/routers/gen_table.py b/services/api/src/owl/routers/gen_table.py
@@ -1379,9 +1379,19 @@ def export_table_data(
     filepath = join(tmp_dir.name, filename)
     # Keep a reference to the directory and only delete upon completion
     bg_tasks.add_task(tmp_dir.cleanup)
+    # Get column ordering
+    with table.create_session() as session:
+        meta = table.open_meta(session, table_id, remove_state_cols=True)
+        columns_order = [c.id for c in meta.cols_schema]
+    if columns is None:
+        columns_to_export = columns_order
+    else:
+        columns_to_export = [
+            col for col in columns_order if col in columns or col.lower() in ("id", "updated at")
+        ]
     table.export_csv(
         table_id=table_id,
-        columns=columns,
+        columns=columns_to_export,
         file_path=filepath,
         delimiter=delimiter,
     )
diff --git a/services/api/src/owl/tasks/genitor.py b/services/api/src/owl/tasks/genitor.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.4.0"`
	`1`	`+__version__ = "0.4.1"`