22from functools import wraps
33import re
44import textwrap
5- from typing import Dict
5+ from typing import Dict , List
66import warnings
77
88import numpy as np
3131_shared_docs = dict () # type: Dict[str, str]
3232
3333
34- def cat_core (list_of_columns , sep ):
34+ def cat_core (list_of_columns : List , sep : str ):
3535 """
3636 Auxiliary function for :meth:`str.cat`
3737
@@ -53,6 +53,41 @@ def cat_core(list_of_columns, sep):
5353 return np .sum (list_with_sep , axis = 0 )
5454
5555
56+ def cat_safe (list_of_columns : List , sep : str ):
57+ """
58+ Auxiliary function for :meth:`str.cat`.
59+
60+ Same signature as cat_core, but handles TypeErrors in concatenation, which
61+ happen if the arrays in list_of columns have the wrong dtypes or content.
62+
63+ Parameters
64+ ----------
65+ list_of_columns : list of numpy arrays
66+ List of arrays to be concatenated with sep;
67+ these arrays may not contain NaNs!
68+ sep : string
69+ The separator string for concatenating the columns
70+
71+ Returns
72+ -------
73+ nd.array
74+ The concatenation of list_of_columns with sep
75+ """
76+ try :
77+ result = cat_core (list_of_columns , sep )
78+ except TypeError :
79+ # if there are any non-string values (wrong dtype or hidden behind
80+ # object dtype), np.sum will fail; catch and return with better message
81+ for column in list_of_columns :
82+ dtype = lib .infer_dtype (column , skipna = True )
83+ if dtype not in ['string' , 'empty' ]:
84+ raise TypeError (
85+ 'Concatenation requires list-likes containing only '
86+ 'strings (or missing values). Offending values found in '
87+ 'column {}' .format (dtype )) from None
88+ return result
89+
90+
5691def _na_map (f , arr , na_result = np .nan , dtype = object ):
5792 # should really _check_ for NA
5893 return _map (f , arr , na_mask = True , na_value = na_result , dtype = dtype )
@@ -2314,16 +2349,16 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23142349 np .putmask (result , union_mask , np .nan )
23152350
23162351 not_masked = ~ union_mask
2317- result [not_masked ] = cat_core ([x [not_masked ] for x in all_cols ],
2352+ result [not_masked ] = cat_safe ([x [not_masked ] for x in all_cols ],
23182353 sep )
23192354 elif na_rep is not None and union_mask .any ():
23202355 # fill NaNs with na_rep in case there are actually any NaNs
23212356 all_cols = [np .where (nm , na_rep , col )
23222357 for nm , col in zip (na_masks , all_cols )]
2323- result = cat_core (all_cols , sep )
2358+ result = cat_safe (all_cols , sep )
23242359 else :
23252360 # no NaNs - can just concatenate
2326- result = cat_core (all_cols , sep )
2361+ result = cat_safe (all_cols , sep )
23272362
23282363 if isinstance (self ._orig , Index ):
23292364 # add dtype for case that result is all-NA
0 commit comments