@@ -340,7 +340,7 @@ cdef class TextReader:
340
340
cdef:
341
341
parser_t * parser
342
342
object na_fvalues
343
- object true_values, false_values
343
+ list true_values, false_values
344
344
object handle
345
345
object orig_header
346
346
bint na_filter, keep_default_na, has_usecols, has_mi_columns
@@ -942,6 +942,7 @@ cdef class TextReader:
942
942
bint na_filter = 0
943
943
int64_t num_cols
944
944
dict results
945
+ bint is_default_dict_dtype
945
946
946
947
start = self .parser_start
947
948
@@ -957,26 +958,7 @@ cdef class TextReader:
957
958
self .parser.line_fields[i] + \
958
959
(num_cols >= self .parser.line_fields[i]) * num_cols
959
960
960
- usecols_not_callable_and_exists = not callable (self .usecols) and self .usecols
961
- names_larger_num_cols = (self .names and
962
- len (self .names) - self .leading_cols > num_cols)
963
-
964
- if self .table_width - self .leading_cols > num_cols:
965
- if (usecols_not_callable_and_exists
966
- and self .table_width - self .leading_cols < len (self .usecols)
967
- or names_larger_num_cols):
968
- raise ParserError(f" Too many columns specified: expected "
969
- f" {self.table_width - self.leading_cols} "
970
- f" and found {num_cols}" )
971
-
972
- if (usecols_not_callable_and_exists and
973
- all (isinstance (u, int ) for u in self .usecols)):
974
- missing_usecols = [col for col in self .usecols if col >= num_cols]
975
- if missing_usecols:
976
- raise ParserError(
977
- " Defining usecols with out-of-bounds indices is not allowed. "
978
- f" {missing_usecols} are out of bounds." ,
979
- )
961
+ self ._validate_usecols_and_names(num_cols)
980
962
981
963
results = {}
982
964
nused = 0
@@ -1004,22 +986,7 @@ cdef class TextReader:
1004
986
nused += 1
1005
987
1006
988
conv = self ._get_converter(i, name)
1007
-
1008
- col_dtype = None
1009
- if self .dtype is not None :
1010
- if isinstance (self .dtype, dict ):
1011
- if name in self .dtype:
1012
- col_dtype = self .dtype[name]
1013
- elif i in self .dtype:
1014
- col_dtype = self .dtype[i]
1015
- elif is_default_dict_dtype:
1016
- col_dtype = self .dtype[name]
1017
- else :
1018
- if self .dtype.names:
1019
- # structured array
1020
- col_dtype = np.dtype(self .dtype.descr[i][1 ])
1021
- else :
1022
- col_dtype = self .dtype
989
+ col_dtype = self ._get_col_dtype(i, is_default_dict_dtype, name)
1023
990
1024
991
if conv:
1025
992
if col_dtype is not None :
@@ -1267,6 +1234,47 @@ cdef class TextReader:
1267
1234
return _string_box_utf8(self .parser, i, start, end, na_filter,
1268
1235
na_hashset, self .encoding_errors)
1269
1236
1237
+ cdef void _validate_usecols_and_names(self , int num_cols):
1238
+ usecols_not_callable_and_exists = not callable (self .usecols) and self .usecols
1239
+ names_larger_num_cols = (self .names and
1240
+ len (self .names) - self .leading_cols > num_cols)
1241
+
1242
+ if self .table_width - self .leading_cols > num_cols:
1243
+ if (usecols_not_callable_and_exists
1244
+ and self .table_width - self .leading_cols < len (self .usecols)
1245
+ or names_larger_num_cols):
1246
+ raise ParserError(f" Too many columns specified: expected "
1247
+ f" {self.table_width - self.leading_cols} "
1248
+ f" and found {num_cols}" )
1249
+
1250
+ if (usecols_not_callable_and_exists and
1251
+ all (isinstance (u, int ) for u in self .usecols)):
1252
+ missing_usecols = [col for col in self .usecols if col >= num_cols]
1253
+ if missing_usecols:
1254
+ raise ParserError(
1255
+ " Defining usecols with out-of-bounds indices is not allowed. "
1256
+ f" {missing_usecols} are out of bounds." ,
1257
+ )
1258
+
1259
+ # -> DtypeObj
1260
+ cdef object _get_col_dtype(self , int64_t i, bint is_default_dict_dtype, name):
1261
+ col_dtype = None
1262
+ if self .dtype is not None :
1263
+ if isinstance (self .dtype, dict ):
1264
+ if name in self .dtype:
1265
+ col_dtype = self .dtype[name]
1266
+ elif i in self .dtype:
1267
+ col_dtype = self .dtype[i]
1268
+ elif is_default_dict_dtype:
1269
+ col_dtype = self .dtype[name]
1270
+ else :
1271
+ if self .dtype.names:
1272
+ # structured array
1273
+ col_dtype = np.dtype(self .dtype.descr[i][1 ])
1274
+ else :
1275
+ col_dtype = self .dtype
1276
+ return col_dtype
1277
+
1270
1278
def _get_converter (self , i: int , name ):
1271
1279
if self .converters is None :
1272
1280
return None
@@ -1347,8 +1355,8 @@ cdef _close(TextReader reader):
1347
1355
1348
1356
1349
1357
cdef:
1350
- object _true_values = [b" True" , b" TRUE" , b" true" ]
1351
- object _false_values = [b" False" , b" FALSE" , b" false" ]
1358
+ list _true_values = [b" True" , b" TRUE" , b" true" ]
1359
+ list _false_values = [b" False" , b" FALSE" , b" false" ]
1352
1360
1353
1361
1354
1362
def _ensure_encoded (list lst ):
0 commit comments