115115dtype : Type name or dict of column -> type, default None
116116 Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
117117 Use `str` or `object` to preserve and not interpret dtype.
118- If converters are specified, they will be applied AFTER
119- dtype conversion.
118+ If converters are specified, they will be applied INSTEAD
119+ of dtype conversion.
120+
121+ .. versionadded:: 0.20.0 support for the Python parser.
122+
120123%s
121124converters : dict, default None
122125 Dict of functions for converting values in certain columns. Keys can either
@@ -1292,20 +1295,6 @@ def _agg_index(self, index, try_parse_dates=True):
12921295
12931296 return index
12941297
1295- def _apply_converter (self , values , conv_f , na_values , col_na_values ,
1296- col_na_fvalues ):
1297- """ apply converter function to values, respecting NAs """
1298- try :
1299- values = lib .map_infer (values , conv_f )
1300- except ValueError :
1301- mask = lib .ismember (values , na_values ).view (np .uint8 )
1302- values = lib .map_infer_mask (values , conv_f , mask )
1303-
1304- cvals , na_count = self ._infer_types (
1305- values , set (col_na_values ) | col_na_fvalues ,
1306- try_num_bool = False )
1307- return cvals , na_count
1308-
13091298 def _convert_to_ndarrays (self , dct , na_values , na_fvalues , verbose = False ,
13101299 converters = None , dtypes = None ):
13111300 result = {}
@@ -1323,45 +1312,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13231312 else :
13241313 col_na_values , col_na_fvalues = set (), set ()
13251314
1326- if conv_f is not None and cast_type is None :
1327- # if type is not specified, apply the conversion first, without
1328- # inference
1329- cvals , na_count = self ._apply_converter (
1330- values , conv_f , na_values ,
1331- col_na_values , col_na_fvalues )
1315+ if conv_f is not None :
1316+ # conv_f applied to data before inference
1317+ # dtype isn't used if a converted specified
1318+ try :
1319+ values = lib .map_infer (values , conv_f )
1320+ except ValueError :
1321+ mask = lib .ismember (values , na_values ).view (np .uint8 )
1322+ values = lib .map_infer_mask (values , conv_f , mask )
1323+
1324+ cvals , na_count = self ._infer_types (
1325+ values , set (col_na_values ) | col_na_fvalues ,
1326+ try_num_bool = False )
13321327 else :
1333- try_num_bool = True
1334- if cast_type and is_object_dtype (cast_type ):
1335- # skip inference if specified dtype is object
1336- try_num_bool = False
1328+ # skip inference if specified dtype is object
1329+ try_num_bool = not (cast_type and is_object_dtype (cast_type ))
13371330
13381331 # general type inference and conversion
13391332 cvals , na_count = self ._infer_types (
13401333 values , set (col_na_values ) | col_na_fvalues ,
13411334 try_num_bool )
13421335
1336+ # type specificed in dtype param
1337+ if cast_type and not is_dtype_equal (cvals , cast_type ):
1338+ cvals = self ._cast_types (cvals , cast_type , c )
1339+
13431340 if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
13441341 cvals = lib .downcast_int64 (
13451342 cvals , _parser .na_values ,
13461343 self .use_unsigned )
13471344
1348- if cast_type and not is_dtype_equal (cvals , cast_type ):
1349- # type specificed in dtype param
1350-
1351- cvals = self ._cast_types (cvals , cast_type , c )
1352- # for consistency with c-parser, if a converter and dtype are
1353- # specified, apply the converter last
1354- if conv_f is not None :
1355- values , na_count = self ._apply_converter (
1356- values , conv_f , na_values ,
1357- col_na_values , col_na_fvalues )
1358-
13591345 result [c ] = cvals
13601346 if verbose and na_count :
13611347 print ('Filled %d NA values in column %s' % (na_count , str (c )))
13621348 return result
13631349
13641350 def _infer_types (self , values , na_values , try_num_bool = True ):
1351+ """
1352+ Infer types of values, possibly casting
1353+
1354+ Parameters
1355+ ----------
1356+ values : ndarray
1357+ na_values : set
1358+ try_num_bool : bool, default try
1359+ try to cast values to numeric (first preference) or boolean
1360+
1361+ Returns:
1362+ --------
1363+ converted : ndarray
1364+ na_count : int
1365+ """
1366+
13651367 na_count = 0
13661368 if issubclass (values .dtype .type , (np .number , np .bool_ )):
13671369 mask = lib .ismember (values , na_values )
@@ -1393,7 +1395,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
13931395 return result , na_count
13941396
13951397 def _cast_types (self , values , cast_type , column ):
1396- """ cast column to type specified in dtypes= param """
1398+ """
1399+ Cast values to specified type
1400+
1401+ Parameters
1402+ ----------
1403+ values : ndarray
1404+ cast_type : string or np.dtype
1405+ dtype to cast values to
1406+ column : string
1407+ column name - used only for error reporting
1408+
1409+ Returns
1410+ -------
1411+ converted : ndarray
1412+ """
1413+
13971414 if is_categorical_dtype (cast_type ):
13981415 # XXX this is for consistency with
13991416 # c-parser which parses all categories
0 commit comments