115115dtype : Type name or dict of column -> type, default None
116116 Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
117117 Use `str` or `object` to preserve and not interpret dtype.
118- If converters are specified, they will be applied AFTER
119- dtype conversion.
118+ If converters are specified, they will be applied INSTEAD
119+ of dtype conversion.
120+
121+ .. versionadded:: 0.20.0 support for the Python parser.
122+
120123%s
121124converters : dict, default None
122125 Dict of functions for converting values in certain columns. Keys can either
@@ -1295,15 +1298,6 @@ def _agg_index(self, index, try_parse_dates=True):
12951298 def _apply_converter (self , values , conv_f , na_values , col_na_values ,
12961299 col_na_fvalues ):
12971300 """ apply converter function to values, respecting NAs """
1298- try :
1299- values = lib .map_infer (values , conv_f )
1300- except ValueError :
1301- mask = lib .ismember (values , na_values ).view (np .uint8 )
1302- values = lib .map_infer_mask (values , conv_f , mask )
1303-
1304- cvals , na_count = self ._infer_types (
1305- values , set (col_na_values ) | col_na_fvalues ,
1306- try_num_bool = False )
13071301 return cvals , na_count
13081302
13091303 def _convert_to_ndarrays (self , dct , na_values , na_fvalues , verbose = False ,
@@ -1323,45 +1317,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13231317 else :
13241318 col_na_values , col_na_fvalues = set (), set ()
13251319
1326- if conv_f is not None and cast_type is None :
1327- # if type is not specified, apply the conversion first, without
1328- # inference
1329- cvals , na_count = self ._apply_converter (
1330- values , conv_f , na_values ,
1331- col_na_values , col_na_fvalues )
1320+ if conv_f is not None :
1321+ # conv_f applied to data before inference
1322+ # dtype isn't used if a converted specified
1323+ try :
1324+ values = lib .map_infer (values , conv_f )
1325+ except ValueError :
1326+ mask = lib .ismember (values , na_values ).view (np .uint8 )
1327+ values = lib .map_infer_mask (values , conv_f , mask )
1328+
1329+ cvals , na_count = self ._infer_types (
1330+ values , set (col_na_values ) | col_na_fvalues ,
1331+ try_num_bool = False )
13321332 else :
1333- try_num_bool = True
1334- if cast_type and is_object_dtype (cast_type ):
1335- # skip inference if specified dtype is object
1336- try_num_bool = False
1333+ # skip inference if specified dtype is object
1334+ try_num_bool = not (cast_type and is_object_dtype (cast_type ))
13371335
13381336 # general type inference and conversion
13391337 cvals , na_count = self ._infer_types (
13401338 values , set (col_na_values ) | col_na_fvalues ,
13411339 try_num_bool )
13421340
1343- if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
1344- cvals = lib .downcast_int64 (
1345- cvals , _parser .na_values ,
1346- self .use_unsigned )
1341+ if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
1342+ cvals = lib .downcast_int64 (
1343+ cvals , _parser .na_values ,
1344+ self .use_unsigned )
13471345
1348- if cast_type and not is_dtype_equal (cvals , cast_type ):
13491346 # type specificed in dtype param
1350-
1351- cvals = self ._cast_types (cvals , cast_type , c )
1352- # for consistency with c-parser, if a converter and dtype are
1353- # specified, apply the converter last
1354- if conv_f is not None :
1355- values , na_count = self ._apply_converter (
1356- values , conv_f , na_values ,
1357- col_na_values , col_na_fvalues )
1347+ if cast_type and not is_dtype_equal (cvals , cast_type ):
1348+ cvals = self ._cast_types (cvals , cast_type , c )
13581349
13591350 result [c ] = cvals
13601351 if verbose and na_count :
13611352 print ('Filled %d NA values in column %s' % (na_count , str (c )))
13621353 return result
13631354
13641355 def _infer_types (self , values , na_values , try_num_bool = True ):
1356+ """
1357+ Infer types of values, possibly casting
1358+
1359+ Parameters
1360+ ----------
1361+ values : ndarray
1362+ na_values : set
1363+ try_num_bool : bool, default try
1364+ try to cast values to numeric (first preference) or boolean
1365+
1366+ Returns:
1367+ --------
1368+ converted : ndarray
1369+ na_count : int
1370+ """
1371+
13651372 na_count = 0
13661373 if issubclass (values .dtype .type , (np .number , np .bool_ )):
13671374 mask = lib .ismember (values , na_values )
@@ -1393,7 +1400,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
13931400 return result , na_count
13941401
13951402 def _cast_types (self , values , cast_type , column ):
1396- """ cast column to type specified in dtypes= param """
1403+ """
1404+ Cast values to specified type
1405+
1406+ Parameters
1407+ ----------
1408+ values : ndarray
1409+ cast_type : string or np.dtype
1410+ dtype to cast values to
1411+ column : string
1412+ column name - used only for error reporting
1413+
1414+ Returns
1415+ -------
1416+ converted : ndarray
1417+ """
1418+
13971419 if is_categorical_dtype (cast_type ):
13981420 # XXX this is for consistency with
13991421 # c-parser which parses all categories
0 commit comments