@@ -884,47 +884,31 @@ def transform_paramstyle(
884
884
def create_arrow_table_from_arrow_file (
885
885
file_bytes : bytes , description
886
886
) -> "pyarrow.Table" :
887
- """
888
- Create an Arrow table from an Arrow file.
889
-
890
- Args:
891
- file_bytes: The bytes of the Arrow file
892
- description: The column descriptions
893
-
894
- Returns:
895
- pyarrow.Table: The Arrow table
896
- """
897
887
arrow_table = convert_arrow_based_file_to_arrow_table (file_bytes )
898
888
return convert_decimals_in_arrow_table (arrow_table , description )
899
889
900
890
901
891
def convert_arrow_based_file_to_arrow_table (file_bytes : bytes ):
902
- """
903
- Convert an Arrow file to an Arrow table.
904
-
905
- Args:
906
- file_bytes: The bytes of the Arrow file
907
-
908
- Returns:
909
- pyarrow.Table: The Arrow table
910
- """
911
892
try :
912
893
return pyarrow .ipc .open_stream (file_bytes ).read_all ()
913
894
except Exception as e :
914
895
raise RuntimeError ("Failure to convert arrow based file to arrow table" , e )
915
896
897
+ def convert_arrow_based_set_to_arrow_table (arrow_batches , lz4_compressed , schema_bytes ):
898
+ ba = bytearray ()
899
+ ba += schema_bytes
900
+ n_rows = 0
901
+ for arrow_batch in arrow_batches :
902
+ n_rows += arrow_batch .rowCount
903
+ ba += (
904
+ lz4 .frame .decompress (arrow_batch .batch )
905
+ if lz4_compressed
906
+ else arrow_batch .batch
907
+ )
908
+ arrow_table = pyarrow .ipc .open_stream (ba ).read_all ()
909
+ return arrow_table , n_rows
916
910
917
911
def convert_decimals_in_arrow_table (table , description ) -> "pyarrow.Table" :
918
- """
919
- Convert decimal columns in an Arrow table to the correct precision and scale.
920
-
921
- Args:
922
- table: The Arrow table
923
- description: The column descriptions
924
-
925
- Returns:
926
- pyarrow.Table: The Arrow table with correct decimal types
927
- """
928
912
new_columns = []
929
913
new_fields = []
930
914
@@ -951,35 +935,7 @@ def convert_decimals_in_arrow_table(table, description) -> "pyarrow.Table":
951
935
952
936
return pyarrow .Table .from_arrays (new_columns , schema = new_schema )
953
937
954
-
955
- def convert_arrow_based_set_to_arrow_table (arrow_batches , lz4_compressed , schema_bytes ):
956
- """
957
- Convert a set of Arrow batches to an Arrow table.
958
-
959
- Args:
960
- arrow_batches: The Arrow batches
961
- lz4_compressed: Whether the batches are LZ4 compressed
962
- schema_bytes: The schema bytes
963
-
964
- Returns:
965
- Tuple[pyarrow.Table, int]: The Arrow table and the number of rows
966
- """
967
- ba = bytearray ()
968
- ba += schema_bytes
969
- n_rows = 0
970
- for arrow_batch in arrow_batches :
971
- n_rows += arrow_batch .rowCount
972
- ba += (
973
- lz4 .frame .decompress (arrow_batch .batch )
974
- if lz4_compressed
975
- else arrow_batch .batch
976
- )
977
- arrow_table = pyarrow .ipc .open_stream (ba ).read_all ()
978
- return arrow_table , n_rows
979
-
980
-
981
938
def convert_to_assigned_datatypes_in_column_table (column_table , description ):
982
-
983
939
converted_column_table = []
984
940
for i , col in enumerate (column_table ):
985
941
if description [i ][1 ] == "decimal" :
0 commit comments