@@ -2280,6 +2280,23 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22802280 'must all be of the same length as the '
22812281 'calling Series/Index.' )
22822282
2283+ # data has already been checked by _validate to be of correct dtype,
2284+ # but others could still have Series of dtypes (e.g. integers) which
2285+ # will necessarily fail in concatenation. To avoid deep and confusing
2286+ # traces, we raise here for anything that's not object or all-NA float.
2287+ def _legal_dtype (series ):
2288+ # unify dtype handling between categorical/non-categorical
2289+ dtype = (series .dtype if not is_categorical_dtype (series )
2290+ else series .cat .categories .dtype )
2291+ legal = dtype == 'O' or (dtype == 'float' and series .isna ().all ())
2292+ return legal
2293+ err_wrong_dtype = ('Can only concatenate list-likes containing only '
2294+ 'strings (or missing values).' )
2295+ if any (not _legal_dtype (x ) for x in others ):
2296+ raise TypeError (err_wrong_dtype + ' Received list-like of dtype: '
2297+ '{}' .format ([x .dtype for x in others
2298+ if not _legal_dtype (x )][0 ]))
2299+
22832300 if join is None and warn :
22842301 warnings .warn ("A future version of pandas will perform index "
22852302 "alignment when `others` is a Series/Index/"
@@ -2307,23 +2324,28 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23072324 na_masks = np .array ([isna (x ) for x in all_cols ])
23082325 union_mask = np .logical_or .reduce (na_masks , axis = 0 )
23092326
2310- if na_rep is None and union_mask .any ():
2311- # no na_rep means NaNs for all rows where any column has a NaN
2312- # only necessary if there are actually any NaNs
2313- result = np .empty (len (data ), dtype = object )
2314- np .putmask (result , union_mask , np .nan )
2315-
2316- not_masked = ~ union_mask
2317- result [not_masked ] = cat_core ([x [not_masked ] for x in all_cols ],
2318- sep )
2319- elif na_rep is not None and union_mask .any ():
2320- # fill NaNs with na_rep in case there are actually any NaNs
2321- all_cols = [np .where (nm , na_rep , col )
2322- for nm , col in zip (na_masks , all_cols )]
2323- result = cat_core (all_cols , sep )
2324- else :
2325- # no NaNs - can just concatenate
2326- result = cat_core (all_cols , sep )
2327+ # if there are any non-string, non-null values hidden within an object
2328+ # dtype, cat_core will fail; catch error and return with better message
2329+ try :
2330+ if na_rep is None and union_mask .any ():
2331+ # no na_rep means NaNs for all rows where any column has a NaN
2332+ # only necessary if there are actually any NaNs
2333+ result = np .empty (len (data ), dtype = object )
2334+ np .putmask (result , union_mask , np .nan )
2335+
2336+ not_masked = ~ union_mask
2337+ result [not_masked ] = cat_core ([x [not_masked ]
2338+ for x in all_cols ], sep )
2339+ elif na_rep is not None and union_mask .any ():
2340+ # fill NaNs with na_rep in case there are actually any NaNs
2341+ all_cols = [np .where (nm , na_rep , col )
2342+ for nm , col in zip (na_masks , all_cols )]
2343+ result = cat_core (all_cols , sep )
2344+ else :
2345+ # no NaNs - can just concatenate
2346+ result = cat_core (all_cols , sep )
2347+ except TypeError :
2348+ raise TypeError (err_wrong_dtype )
23272349
23282350 if isinstance (self ._orig , Index ):
23292351 # add dtype for case that result is all-NA
0 commit comments