Skip to content

Commit 5c08955

Browse files
Add esoteric-underscore-grouping
Co-authored-by: shauss <stephane.hauss@gmail.com>
1 parent 2111959 commit 5c08955

File tree

4 files changed

+176
-50
lines changed

4 files changed

+176
-50
lines changed

pylint/checkers/format.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from __future__ import annotations
1515

16+
import re
1617
import tokenize
1718
from functools import reduce
1819
from re import Match
@@ -116,6 +117,11 @@
116117
"use-standard-scientific-notation",
117118
"Emitted when a number is written in non-standard scientific notation.",
118119
),
120+
"C0330": (
121+
"Non standard grouping of numeric literals using underscores should be %s",
122+
"esoteric-underscore-grouping",
123+
"Used when numeric literals use underscore separators not in groups of 3 digits.",
124+
),
119125
}
120126

121127

@@ -387,6 +393,32 @@ def to_standard_scientific_notation(number: float) -> str:
387393
base = base.rstrip("0").rstrip(".")
388394
return f"{base}e{int(exp)}"
389395

396+
@staticmethod
397+
def to_standard_underscore_grouping(number: str) -> str:
398+
if "e" in number.lower() or "E" in number.lower():
399+
return FormatChecker.to_standard_scientific_notation(float(number))
400+
401+
number = number.replace("_", "")
402+
# Split into whole and decimal parts (if present)
403+
if "." in number:
404+
whole, decimal = number.split(".")
405+
else:
406+
whole, decimal = number, ""
407+
408+
# Format whole part with proper grouping (right to left)
409+
if len(whole) > 3:
410+
grouped_whole = ""
411+
for i in range(len(whole), 0, -3):
412+
start = max(0, i - 3)
413+
group = whole[start:i]
414+
if grouped_whole:
415+
grouped_whole = group + "_" + grouped_whole
416+
else:
417+
grouped_whole = group
418+
whole = grouped_whole
419+
return f"{whole}.{decimal}" if decimal else whole
420+
421+
# pylint: disable-next=too-many-branches,too-many-statements
390422
def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
391423
"""Process tokens and search for:
392424
@@ -446,19 +478,36 @@ def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
446478
check_equal = False
447479
self.check_indent_level(line, indents[-1], line_num)
448480
if tok_type == tokenize.NUMBER:
449-
# Check for wrong scientific notation
450-
if (
451-
("e" in string or "E" in string)
452-
and "x" not in string # not a hexadecimal
481+
not_hex_oct_or_complex = (
482+
# You don't deserve a linter if you mix non-decimal notation with
483+
# and exponential or underscore,
484+
"x" not in string # not a hexadecimal
485+
and "o" not in string # not an octal
453486
and "j" not in string # not a complex
454-
):
487+
)
488+
# Wrong scientific notation
489+
if ("e" in string or "E" in string) and not_hex_oct_or_complex:
455490
value = float(string.lower().split("e")[0])
456491
if not (1 <= value < 10):
457492
self.add_message(
458493
"use-standard-scientific-notation",
459494
args=(self.to_standard_scientific_notation(value)),
460495
line=line_num,
461496
col_offset=start[1],
497+
confidence=HIGH,
498+
)
499+
# proper underscore grouping in numeric literals
500+
if "_" in string and not_hex_oct_or_complex:
501+
if not re.match(
502+
r"^\d{0,3}(_\d{3})*\.?\d*([eE]-?\d{0,3}(_\d{3})*)?$", string
503+
):
504+
suggested = self.to_standard_underscore_grouping(string)
505+
self.add_message(
506+
"esoteric-underscore-grouping",
507+
args=(suggested),
508+
line=line_num,
509+
col_offset=start[1],
510+
confidence=HIGH,
462511
)
463512
if string.endswith("l"):
464513
self.add_message("lowercase-l-suffix", line=line_num)

tests/checkers/unittest_format.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import tokenize
1010

1111
import astroid
12+
import pytest
1213

1314
from pylint import lint, reporters
1415
from pylint.checkers.base.basic_checker import BasicChecker
@@ -180,3 +181,16 @@ def test_disable_global_option_end_of_line() -> None:
180181
assert not myreporter.messages
181182
finally:
182183
os.remove(file_.name)
184+
185+
186+
@pytest.mark.parametrize(
187+
"value, expected",
188+
[
189+
("1_000_000", "1_000_000"),
190+
("1000_000", "1_000_000"),
191+
("10_5415_456_4654984.16354698489", "1_054_154_564_654_984.16354698489"),
192+
],
193+
)
194+
def test_to_standard_underscore_grouping(value: str, expected: str) -> None:
195+
"""Test the conversion of numbers to standard underscore grouping."""
196+
assert FormatChecker.to_standard_underscore_grouping(value) == expected

tests/functional/u/use/use_standard_scientific_notation.py

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
wrong_big = 45.3e6 # [use-standard-scientific-notation]
44
uppercase_e_wrong = 45.3E6 # [use-standard-scientific-notation]
55
wrong_small = 0.00012e-26 # [use-standard-scientific-notation]
6+
uppercase_e_wrong_small = 0.00012E-26 # [use-standard-scientific-notation]
67
wrong_negative_and_big = -10e3 # [use-standard-scientific-notation]
78
actual_trolling = 11000e26 # [use-standard-scientific-notation]
89
scientific_double_digit = 12e8 # [use-standard-scientific-notation]
@@ -25,10 +26,12 @@
2526
correct_decimal_only = 3.14
2627
negative_correct = -5.67e-8
2728
correct_small_exponent = 1.5e1
28-
correct_tiny_exponent = 9.0e0
29-
correct_precise = 6.02214076e23
29+
actually_nine = 9e0
30+
actually_one = 1.0e0
31+
3032

3133
hex_constant = 0x1e4 # Hexadecimal, not scientific notation
34+
hex_constant_bad = 0x10e4
3235
binary_constant = 0b1010
3336
octal_constant = 0o1234
3437
inside_string = "Temperature: 10e3 degrees"
@@ -39,44 +42,60 @@
3942
in_variable_name = measurement_10e3 = 45
4043
inside_f_string = f"Value is {1.0} not 10e6"
4144

42-
# Potential false negatives
43-
barely_violation = 9.99e0 # Should this be 9.99?
44-
integer_sci = int(1e10) # Integer call with scientific notation
4545
complex_number = 1.5e3 + 2.5e3j # Complex number with scientific notation
46-
tuple_of_sci = (1.2e4, 3.4e5)
47-
list_of_sci = [5.6e6, 7.8e7]
48-
dict_with_sci = {"a": 9.1e8, "b": 1.2e9}
46+
# false negative for complex numbers:
47+
complex_number_wrong = 15e3 + 25e3j # [use-standard-scientific-notation]
4948

50-
# Mathematical operations
51-
addition = 1.0e3 + 2.0e3
52-
multiplication = 1.0e3 * 2.0
53-
division = 1.0e3 / 2.0
54-
power = 1.0e3 ** 2.0
5549

56-
# Function calls with scientific notation
57-
def function_with_sci(param=1.0e3, other_param=2.0e3):
50+
#+1: [use-standard-scientific-notation, use-standard-scientific-notation]
51+
def function_with_sci(param=10.0e3, other_param=20.0e3):
5852
return param, other_param
5953

60-
result = function_with_sci(2.0e3)
61-
positional_and_keyword = function_with_sci(1.0, other_param=3.0e4)
54+
#+1: [use-standard-scientific-notation, use-standard-scientific-notation]
55+
result = function_with_sci(20.0e3, 10.0e3)
56+
57+
valid_underscore_int = 1_000_000
58+
valid_underscore_float = 1_000_000.12345
59+
valid_underscore_float_exp = 123_000_000.12345e12_000_000 # [use-standard-scientific-notation]
60+
valid_underscore_float_exp_cap = 123_000_000.12345E123_000_000 # [use-standard-scientific-notation]
6261

63-
# Assignments with operations
64-
a = 1
65-
a += 1.0e3
66-
b = 2
67-
b *= 2.0e3
62+
invalid_underscore_octal = 0o123_456 # octal with underscores bypassed
63+
invalid_underscore_hexa = 0x12c_456 # hexa with underscores bypassed
6864

69-
# Scientific notation in different contexts
70-
inside_list_comp = [x * 2 for x in [1.0e3, 2.0e3]]
71-
inside_dict_comp = {str(x): x for x in [3.0e3, 4.0e3]}
72-
inside_generator = (x + 1 for x in [5.0e3, 6.0e3])
65+
invalid_underscore_float_no_int = .123_456 # [esoteric-underscore-grouping]
66+
invalid_underscore_float_no_frac = 123_456.123_456 # [esoteric-underscore-grouping]
67+
incorrect_sci_underscore = 1.234_567e6 # [esoteric-underscore-grouping]
68+
incorrect_sci_uppercase = 1.234_567E6 # [esoteric-underscore-grouping]
69+
incorrect_sci_underscore_exp = 1.2e1_0 # [esoteric-underscore-grouping]
70+
invalid_underscore_float = 1_234.567_89 # [esoteric-underscore-grouping]
71+
invalid_underscore_binary = 0b1010_1010 # [esoteric-underscore-grouping]
72+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
73+
wrong_big_underscore = 45.3_45e6
74+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
75+
wrong_small_underscore = 0.000_12e-26
76+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
77+
scientific_double_digit_underscore = 1_2e8
78+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
79+
scientific_triple_digit_underscore = 12_3e3
80+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
81+
invalid_underscore_sci = 1_234.567_89e10
82+
invalid_underscore_sci_exp = 1.2e1_0 # [esoteric-underscore-grouping]
83+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
84+
invalid_underscore_sci_combined = 1_2.3_4e5_6
85+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
86+
invalid_uppercase_sci = 1_234.567_89E10
87+
edge_underscore_1 = 1_0e6 # [use-standard-scientific-notation, esoteric-underscore-grouping]
88+
mixed_underscore_1 = 1_000_000.0e-3 # [use-standard-scientific-notation]
89+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
90+
mixed_underscore_2 = 0.000_001e3
91+
mixed_underscore_3 = 1_0.0e2 # [use-standard-scientific-notation, esoteric-underscore-grouping]
7392

74-
# Boundary cases for normalization
75-
boundary_small = 9.999e0 # Almost 10, but not quite
76-
boundary_large = 1.001e0 # Just above 1
77-
boundary_case = 1.0e0 # Equal to 1
93+
# Complex numbers with underscores
94+
complex_underscore = 1.5_6e3 + 2.5_6e3j # [esoteric-underscore-grouping]
95+
#+1: [use-standard-scientific-notation, esoteric-underscore-grouping]
96+
complex_underscore_wrong = 15_6e2 + 25_6e2j
7897

79-
# Constants from physics/science (correctly formatted)
80-
speed_of_light = 2.99792458e8 # m/s
81-
planck_constant = 6.62607015e-34 # J⋅s
82-
electron_charge = 1.602176634e-19 # C
98+
#+2: [esoteric-underscore-grouping, esoteric-underscore-grouping]
99+
#+1: [use-standard-scientific-notation, use-standard-scientific-notation]
100+
def function_with_underscore(param=10.0_0e3, other_param=20.0_0e3):
101+
return param, other_param
Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,55 @@
1-
use-standard-scientific-notation:3:12:None:None::Scientific notation should be '4.53e1' instead:UNDEFINED
2-
use-standard-scientific-notation:4:20:None:None::Scientific notation should be '4.53e1' instead:UNDEFINED
3-
use-standard-scientific-notation:5:14:None:None::Scientific notation should be '1.2e-4' instead:UNDEFINED
4-
use-standard-scientific-notation:6:26:None:None::Scientific notation should be '1e1' instead:UNDEFINED
5-
use-standard-scientific-notation:7:18:None:None::Scientific notation should be '1.1e4' instead:UNDEFINED
6-
use-standard-scientific-notation:8:26:None:None::Scientific notation should be '1.2e1' instead:UNDEFINED
7-
use-standard-scientific-notation:9:26:None:None::Scientific notation should be '1.23e2' instead:UNDEFINED
8-
use-standard-scientific-notation:10:28:None:None::Scientific notation should be '1e-4' instead:UNDEFINED
9-
use-standard-scientific-notation:11:26:None:None::Scientific notation should be '1e-4' instead:UNDEFINED
10-
use-standard-scientific-notation:12:20:None:None::Scientific notation should be '5e-1' instead:UNDEFINED
11-
use-standard-scientific-notation:13:12:None:None::Scientific notation should be '0e0' instead:UNDEFINED
1+
use-standard-scientific-notation:3:12:None:None::Scientific notation should be '4.53e1' instead:HIGH
2+
use-standard-scientific-notation:4:20:None:None::Scientific notation should be '4.53e1' instead:HIGH
3+
use-standard-scientific-notation:5:14:None:None::Scientific notation should be '1.2e-4' instead:HIGH
4+
use-standard-scientific-notation:6:26:None:None::Scientific notation should be '1.2e-4' instead:HIGH
5+
use-standard-scientific-notation:7:26:None:None::Scientific notation should be '1e1' instead:HIGH
6+
use-standard-scientific-notation:8:18:None:None::Scientific notation should be '1.1e4' instead:HIGH
7+
use-standard-scientific-notation:9:26:None:None::Scientific notation should be '1.2e1' instead:HIGH
8+
use-standard-scientific-notation:10:26:None:None::Scientific notation should be '1.23e2' instead:HIGH
9+
use-standard-scientific-notation:11:28:None:None::Scientific notation should be '1e-4' instead:HIGH
10+
use-standard-scientific-notation:12:26:None:None::Scientific notation should be '1e-4' instead:HIGH
11+
use-standard-scientific-notation:13:20:None:None::Scientific notation should be '5e-1' instead:HIGH
12+
use-standard-scientific-notation:14:12:None:None::Scientific notation should be '0e0' instead:HIGH
13+
use-standard-scientific-notation:47:23:None:None::Scientific notation should be '1.5e1' instead:HIGH
14+
use-standard-scientific-notation:51:28:None:None::Scientific notation should be '1e1' instead:HIGH
15+
use-standard-scientific-notation:51:48:None:None::Scientific notation should be '2e1' instead:HIGH
16+
use-standard-scientific-notation:55:35:None:None::Scientific notation should be '1e1' instead:HIGH
17+
use-standard-scientific-notation:55:27:None:None::Scientific notation should be '2e1' instead:HIGH
18+
use-standard-scientific-notation:59:29:None:None::Scientific notation should be '1.2300000012345e8' instead:HIGH
19+
use-standard-scientific-notation:60:33:None:None::Scientific notation should be '1.2300000012345e8' instead:HIGH
20+
esoteric-underscore-grouping:65:34:None:None::Non standard grouping of numeric literals using underscores should be .123456:HIGH
21+
esoteric-underscore-grouping:66:35:None:None::Non standard grouping of numeric literals using underscores should be 123_456.123456:HIGH
22+
esoteric-underscore-grouping:67:27:None:None::Non standard grouping of numeric literals using underscores should be 1.234567e6:HIGH
23+
esoteric-underscore-grouping:68:26:None:None::Non standard grouping of numeric literals using underscores should be 1.234567e6:HIGH
24+
esoteric-underscore-grouping:69:31:None:None::Non standard grouping of numeric literals using underscores should be 1.2e10:HIGH
25+
esoteric-underscore-grouping:70:27:None:None::Non standard grouping of numeric literals using underscores should be 1_234.56789:HIGH
26+
esoteric-underscore-grouping:71:28:None:None::Non standard grouping of numeric literals using underscores should be 0_b10_101_010:HIGH
27+
esoteric-underscore-grouping:73:23:None:None::Non standard grouping of numeric literals using underscores should be 4.5345e7:HIGH
28+
use-standard-scientific-notation:73:23:None:None::Scientific notation should be '4.5345e1' instead:HIGH
29+
esoteric-underscore-grouping:75:25:None:None::Non standard grouping of numeric literals using underscores should be 1.2e-30:HIGH
30+
use-standard-scientific-notation:75:25:None:None::Scientific notation should be '1.2e-4' instead:HIGH
31+
esoteric-underscore-grouping:77:37:None:None::Non standard grouping of numeric literals using underscores should be 1.2e9:HIGH
32+
use-standard-scientific-notation:77:37:None:None::Scientific notation should be '1.2e1' instead:HIGH
33+
esoteric-underscore-grouping:79:37:None:None::Non standard grouping of numeric literals using underscores should be 1.23e5:HIGH
34+
use-standard-scientific-notation:79:37:None:None::Scientific notation should be '1.23e2' instead:HIGH
35+
esoteric-underscore-grouping:81:25:None:None::Non standard grouping of numeric literals using underscores should be 1.23456789e13:HIGH
36+
use-standard-scientific-notation:81:25:None:None::Scientific notation should be '1.23456789e3' instead:HIGH
37+
esoteric-underscore-grouping:82:29:None:None::Non standard grouping of numeric literals using underscores should be 1.2e10:HIGH
38+
esoteric-underscore-grouping:84:34:None:None::Non standard grouping of numeric literals using underscores should be 1.234e57:HIGH
39+
use-standard-scientific-notation:84:34:None:None::Scientific notation should be '1.234e1' instead:HIGH
40+
esoteric-underscore-grouping:86:24:None:None::Non standard grouping of numeric literals using underscores should be 1.23456789e13:HIGH
41+
use-standard-scientific-notation:86:24:None:None::Scientific notation should be '1.23456789e3' instead:HIGH
42+
esoteric-underscore-grouping:87:20:None:None::Non standard grouping of numeric literals using underscores should be 1e7:HIGH
43+
use-standard-scientific-notation:87:20:None:None::Scientific notation should be '1e1' instead:HIGH
44+
use-standard-scientific-notation:88:21:None:None::Scientific notation should be '1e6' instead:HIGH
45+
esoteric-underscore-grouping:90:21:None:None::Non standard grouping of numeric literals using underscores should be 1e-3:HIGH
46+
use-standard-scientific-notation:90:21:None:None::Scientific notation should be '1e-6' instead:HIGH
47+
esoteric-underscore-grouping:91:21:None:None::Non standard grouping of numeric literals using underscores should be 1e3:HIGH
48+
use-standard-scientific-notation:91:21:None:None::Scientific notation should be '1e1' instead:HIGH
49+
esoteric-underscore-grouping:94:21:None:None::Non standard grouping of numeric literals using underscores should be 1.56e3:HIGH
50+
esoteric-underscore-grouping:96:27:None:None::Non standard grouping of numeric literals using underscores should be 1.56e4:HIGH
51+
use-standard-scientific-notation:96:27:None:None::Scientific notation should be '1.56e2' instead:HIGH
52+
esoteric-underscore-grouping:100:35:None:None::Non standard grouping of numeric literals using underscores should be 1e4:HIGH
53+
esoteric-underscore-grouping:100:57:None:None::Non standard grouping of numeric literals using underscores should be 2e4:HIGH
54+
use-standard-scientific-notation:100:35:None:None::Scientific notation should be '1e1' instead:HIGH
55+
use-standard-scientific-notation:100:57:None:None::Scientific notation should be '2e1' instead:HIGH

0 commit comments

Comments
 (0)