Skip to content

feature #49580: support new-style float_format string in to_csv #61650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def __init__(
self.na_rep = na_rep
self.formatters = self._initialize_formatters(formatters)
self.justify = self._initialize_justify(justify)
self.float_format = float_format
self.float_format = self._validate_float_format(float_format)
self.sparsify = self._initialize_sparsify(sparsify)
self.show_index_names = index_names
self.decimal = decimal
Expand Down Expand Up @@ -850,6 +850,35 @@ def _get_column_name_list(self) -> list[Hashable]:
names.append("" if columns.name is None else columns.name)
return names

def _validate_float_format(
self, fmt: FloatFormatType | None
) -> FloatFormatType | None:
"""
Validates and processes the float_format argument.
Converts new-style format strings to callables.
"""

if fmt is None:
return None

if callable(fmt):
return fmt

if isinstance(fmt, str):
if "%" in fmt:
# Keeps old-style format strings as they are (C code handles them)
return fmt
else:

try:
_ = fmt.format(1.0) # Test with an arbitrary float
return lambda x: fmt.format(x)
except (ValueError, KeyError, IndexError) as e:

raise ValueError(f"Invalid new-style format string {repr(fmt)}") from e

# If fmt is neither None, nor callable, nor a successfully processed string,
raise ValueError("float_format must be a string or callable")

class DataFrameRenderer:
"""Class for creating dataframe output in multiple formats.
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/io/formats/test_csv_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest
import numpy as np
import pandas as pd

pytestmark = pytest.mark.usefixtures("benchmark")

def test_benchmark_old_style_format(benchmark):
df = pd.DataFrame(np.random.rand(1000, 1000))
benchmark(lambda: df.to_csv(float_format="%.6f"))

def test_benchmark_new_style_format(benchmark):
df = pd.DataFrame(np.random.rand(1000, 1000))
benchmark(lambda: df.to_csv(float_format="{:.6f}"))

def test_benchmark_new_style_thousands(benchmark):
df = pd.DataFrame(np.random.rand(1000, 1000))
benchmark(lambda: df.to_csv(float_format="{:,.2f}"))

def test_benchmark_callable_format(benchmark):
df = pd.DataFrame(np.random.rand(1000, 1000))
benchmark(lambda: df.to_csv(float_format=lambda x: f"{x:.6f}"))
123 changes: 122 additions & 1 deletion pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
compat,
)
import pandas._testing as tm

import warnings

class TestToCSV:
def test_to_csv_with_single_column(self):
Expand Down Expand Up @@ -741,3 +741,124 @@ def test_to_csv_iterative_compression_buffer(compression):
pd.read_csv(buffer, compression=compression, index_col=0), df
)
assert not buffer.closed


def test_new_style_float_format_basic():
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="{:.2f}")
expected = ",A\n0,1234.57\n1,9876.54\n"
assert result == expected

def test_new_style_float_format_thousands():
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="{:,.2f}")
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
assert result == expected

def test_new_style_scientific_format():
df = pd.DataFrame({"A": [0.000123, 0.000456]})
result = df.to_csv(float_format="{:.2e}")
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
assert result == expected

def test_new_style_with_nan():
df = pd.DataFrame({"A": [1.23, np.nan, 4.56]})
result = df.to_csv(float_format="{:.2f}", na_rep="NA")
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
assert result == expected

def test_new_style_with_mixed_types():
df = pd.DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
result = df.to_csv(float_format="{:.2f}")
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
assert result == expected

def test_new_style_with_mixed_types_in_column():
df = pd.DataFrame({"A": [1.23, "text", 4.56]})
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
result = df.to_csv(float_format="{:.2f}")

expected = ",A\n0,1.23\n1,text\n2,4.56\n"
assert result == expected

def test_invalid_new_style_format_missing_brace():
df = pd.DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
df.to_csv(float_format="{:.2f")

def test_invalid_new_style_format_specifier():
df = pd.DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
df.to_csv(float_format="{:.2z}")

def test_old_style_format_compatibility():
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="%.2f")
expected = ",A\n0,1234.57\n1,9876.54\n"
assert result == expected

def test_callable_float_format_compatibility():
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format=lambda x: f"{x:,.2f}")
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
assert result == expected

def test_no_float_format():
df = pd.DataFrame({"A": [1.23, 4.56]})
result = df.to_csv(float_format=None)
expected = ",A\n0,1.23\n1,4.56\n"
assert result == expected

def test_large_numbers():
df = pd.DataFrame({"A": [1e308, 2e308]})
result = df.to_csv(float_format="{:.2e}")
expected = ",A\n0,1.00e+308\n1,inf\n"
assert result == expected

def test_zero_and_negative():
df = pd.DataFrame({"A": [0.0, -1.23456]})
result = df.to_csv(float_format="{:+.2f}")
expected = ",A\n0,+0.00\n1,-1.23\n"
assert result == expected

def test_unicode_format():
df = pd.DataFrame({"A": [1.23, 4.56]})
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8")
expected = ",A\n0,1.23€\n1,4.56€\n"
assert result == expected

def test_empty_dataframe():
df = pd.DataFrame({"A": []})
result = df.to_csv(float_format="{:.2f}")
expected = ",A\n"
assert result == expected

def test_multi_column_float():
df = pd.DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
result = df.to_csv(float_format="{:.2f}")
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
assert result == expected

def test_invalid_float_format_type():
df = pd.DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="float_format must be a string or callable"):
df.to_csv(float_format=123)

def test_new_style_with_inf():
df = pd.DataFrame({"A": [1.23, np.inf, -np.inf]})
result = df.to_csv(float_format="{:.2f}", na_rep="NA")
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
assert result == expected

def test_new_style_with_precision_edge():
df = pd.DataFrame({"A": [1.23456789]})
result = df.to_csv(float_format="{:.10f}")
expected = ",A\n0,1.2345678900\n"
assert result == expected

def test_new_style_with_template():
df = pd.DataFrame({"A": [1234.56789]})
result = df.to_csv(float_format="Value: {:,.2f}")
expected = ',A\n0,"Value: 1,234.57"\n'
assert result == expected
Loading