1
+ import json
1
2
import typing
2
3
3
4
if typing .TYPE_CHECKING :
4
5
from ..dataframe .base import DataFrame
5
6
6
7
7
8
class DataframeSerializer :
8
- def __init__ (self ) -> None :
9
- pass
9
+ MAX_COLUMN_TEXT_LENGTH = 200
10
10
11
- @staticmethod
12
- def serialize (df : "DataFrame" , dialect : str = "postgres" ) -> str :
11
+ @classmethod
12
+ def serialize (cls , df : "DataFrame" , dialect : str = "postgres" ) -> str :
13
13
"""
14
- Convert df to csv like format where csv is wrapped inside <dataframe></dataframe>
14
+ Convert df to a CSV-like format wrapped inside <table> tags, truncating long text values, and serializing only a subset of rows using df.head().
15
+
15
16
Args:
16
- df (pd.DataFrame): PandaAI dataframe or dataframe
17
+ df (pd.DataFrame): Pandas DataFrame
18
+ dialect (str): Database dialect (default is "postgres")
17
19
18
20
Returns:
19
- str: dataframe stringify
21
+ str: Serialized DataFrame string
20
22
"""
23
+
24
+ # Start building the table metadata
21
25
dataframe_info = f'<table dialect="{ dialect } " table_name="{ df .schema .name } "'
22
26
23
27
# Add description attribute if available
@@ -26,10 +30,27 @@ def serialize(df: "DataFrame", dialect: str = "postgres") -> str:
26
30
27
31
dataframe_info += f' dimensions="{ df .rows_count } x{ df .columns_count } ">'
28
32
29
- # Add dataframe details
30
- dataframe_info += f" \n { df .head (). to_csv ( index = False ) } "
33
+ # Truncate long values
34
+ df_truncated = cls . _truncate_dataframe ( df .head ())
31
35
32
- # Close the dataframe tag
36
+ # Convert to CSV format
37
+ dataframe_info += f"\n { df_truncated .to_csv (index = False )} "
38
+
39
+ # Close the table tag
33
40
dataframe_info += "</table>\n "
34
41
35
42
return dataframe_info
43
+
44
+ @classmethod
45
+ def _truncate_dataframe (cls , df : "DataFrame" ) -> "DataFrame" :
46
+ """Truncates string values exceeding MAX_COLUMN_TEXT_LENGTH, and converts JSON-like values to truncated strings."""
47
+
48
+ def truncate_value (value ):
49
+ if isinstance (value , (dict , list )): # Convert JSON-like objects to strings
50
+ value = json .dumps (value , ensure_ascii = False )
51
+
52
+ if isinstance (value , str ) and len (value ) > cls .MAX_COLUMN_TEXT_LENGTH :
53
+ return f"{ value [: cls .MAX_COLUMN_TEXT_LENGTH ]} …"
54
+ return value
55
+
56
+ return df .applymap (truncate_value )
0 commit comments