@@ -145,6 +145,35 @@ def _validate_data_input(
145
145
raise GMTInvalidInput (msg )
146
146
147
147
148
+ def _is_printable_ascii (argstr : str ) -> bool :
149
+ """
150
+ Check if a string only contains printable ASCII characters.
151
+
152
+ Here, printable ASCII characters are defined as the characters in the range of 32 to
153
+ 126 in the ASCII table. It's different from the ``string.printable`` constant that
154
+ it doesn't include the control characters that are considered whitespace (tab,
155
+ linefeed, return, formfeed, and vertical tab).
156
+
157
+ Parameters
158
+ ----------
159
+ argstr
160
+ The string to be checked.
161
+
162
+ Returns
163
+ -------
164
+ ``True`` if the string only contains printable ASCII characters. Otherwise, return
165
+ ``False``.
166
+
167
+ Examples
168
+ --------
169
+ >>> _is_printable_ascii("123ABC+-?!")
170
+ True
171
+ >>> _is_printable_ascii("12AB±β①②")
172
+ False
173
+ """
174
+ return all (32 <= ord (c ) <= 126 for c in argstr )
175
+
176
+
148
177
def _check_encoding (argstr : str ) -> Encoding :
149
178
"""
150
179
Check the charset encoding of a string.
@@ -177,8 +206,8 @@ def _check_encoding(argstr: str) -> Encoding:
177
206
>>> _check_encoding("123AB中文") # Characters not in any charset encoding
178
207
'ISOLatin1+'
179
208
"""
180
- # Return "ascii" if the string only contains ASCII characters.
181
- if all ( 32 <= ord ( c ) <= 126 for c in argstr ):
209
+ # Return "ascii" if the string only contains printable ASCII characters.
210
+ if _is_printable_ascii ( argstr ):
182
211
return "ascii"
183
212
# Loop through all supported encodings and check if all characters in the string
184
213
# are in the charset of the encoding. If all characters are in the charset, return
@@ -374,8 +403,8 @@ def non_ascii_to_octal(argstr: str, encoding: Encoding = "ISOLatin1+") -> str:
374
403
>>> non_ascii_to_octal("12ABāáâãäåβ①②", encoding="ISO-8859-4")
375
404
'12AB\\340\\341\\342\\343\\344\\345@~\\142@~@%34%\\254@%%@%34%\\255@%%'
376
405
""" # noqa: RUF002
377
- # Return the input string if it only contains ASCII characters.
378
- if encoding == "ascii" or all ( 32 <= ord ( c ) <= 126 for c in argstr ):
406
+ # Return the input string if it only contains printable ASCII characters.
407
+ if encoding == "ascii" or _is_printable_ascii ( argstr ):
379
408
return argstr
380
409
381
410
# Dictionary mapping non-ASCII characters to octal codes
@@ -389,7 +418,7 @@ def non_ascii_to_octal(argstr: str, encoding: Encoding = "ISOLatin1+") -> str:
389
418
# ISOLatin1+ or ISO-8859-x charset.
390
419
mapping .update ({c : f"\\ { i :03o} " for i , c in charset [encoding ].items ()})
391
420
392
- # Remove any printable characters
421
+ # Remove any printable characters.
393
422
mapping = {k : v for k , v in mapping .items () if k not in string .printable }
394
423
return argstr .translate (str .maketrans (mapping ))
395
424
0 commit comments