33
44from pandas .compat import zip
55from pandas .core .dtypes .generic import ABCSeries , ABCIndex
6- from pandas .core .dtypes .missing import isna , notna
6+ from pandas .core .dtypes .missing import isna
77from pandas .core .dtypes .common import (
8+ ensure_object ,
89 is_bool_dtype ,
910 is_categorical_dtype ,
1011 is_object_dtype ,
3637_shared_docs = dict ()
3738
3839
39- def _get_array_list (arr , others ):
40- """
41- Auxiliary function for :func:`str_cat`
42-
43- Parameters
44- ----------
45- arr : ndarray
46- The left-most ndarray of the concatenation
47- others : list, ndarray, Series
48- The rest of the content to concatenate. If list of list-likes,
49- all elements must be passable to ``np.asarray``.
50-
51- Returns
52- -------
53- list
54- List of all necessary arrays
55- """
56- from pandas .core .series import Series
57-
58- if len (others ) and isinstance (com .values_from_object (others )[0 ],
59- (list , np .ndarray , Series )):
60- arrays = [arr ] + list (others )
61- else :
62- arrays = [arr , others ]
63-
64- return [np .asarray (x , dtype = object ) for x in arrays ]
65-
66-
67- def str_cat (arr , others = None , sep = None , na_rep = None ):
40+ def cat_core (list_of_columns , sep ):
6841 """
6942 Auxiliary function for :meth:`str.cat`
7043
71- If `others` is specified, this function concatenates the Series/Index
72- and elements of `others` element-wise.
73- If `others` is not being passed then all values in the Series are
74- concatenated in a single string with a given `sep`.
75-
7644 Parameters
7745 ----------
78- others : list-like, or list of list-likes, optional
79- List-likes (or a list of them) of the same length as calling object.
80- If None, returns str concatenating strings of the Series.
81- sep : string or None, default None
82- If None, concatenates without any separator.
83- na_rep : string or None, default None
84- If None, NA in the series are ignored.
46+ list_of_columns : list of numpy arrays
47+ List of arrays to be concatenated with sep;
48+ these arrays may not contain NaNs!
49+ sep : string
50+ The separator string for concatenating the columns
8551
8652 Returns
8753 -------
88- concat
89- ndarray containing concatenated results (if `others is not None`)
90- or str (if `others is None`)
54+ nd.array
55+ The concatenation of list_of_columns with sep
9156 """
92- if sep is None :
93- sep = ''
94-
95- if others is not None :
96- arrays = _get_array_list (arr , others )
97-
98- n = _length_check (arrays )
99- masks = np .array ([isna (x ) for x in arrays ])
100- cats = None
101-
102- if na_rep is None :
103- na_mask = np .logical_or .reduce (masks , axis = 0 )
104-
105- result = np .empty (n , dtype = object )
106- np .putmask (result , na_mask , np .nan )
107-
108- notmask = ~ na_mask
109-
110- tuples = zip (* [x [notmask ] for x in arrays ])
111- cats = [sep .join (tup ) for tup in tuples ]
112-
113- result [notmask ] = cats
114- else :
115- for i , x in enumerate (arrays ):
116- x = np .where (masks [i ], na_rep , x )
117- if cats is None :
118- cats = x
119- else :
120- cats = cats + sep + x
121-
122- result = cats
123-
124- return result
125- else :
126- arr = np .asarray (arr , dtype = object )
127- mask = isna (arr )
128- if na_rep is None and mask .any ():
129- if sep == '' :
130- na_rep = ''
131- else :
132- return sep .join (arr [notna (arr )])
133- return sep .join (np .where (mask , na_rep , arr ))
134-
135-
136- def _length_check (others ):
137- n = None
138- for x in others :
139- try :
140- if n is None :
141- n = len (x )
142- elif len (x ) != n :
143- raise ValueError ('All arrays must be same length' )
144- except TypeError :
145- raise ValueError ('Must pass arrays containing strings to str_cat' )
146- return n
57+ list_with_sep = [sep ] * (2 * len (list_of_columns ) - 1 )
58+ list_with_sep [::2 ] = list_of_columns
59+ return np .sum (list_with_sep , axis = 0 )
14760
14861
14962def _na_map (f , arr , na_result = np .nan , dtype = object ):
@@ -2283,6 +2196,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22832196
22842197 if isinstance (others , compat .string_types ):
22852198 raise ValueError ("Did you mean to supply a `sep` keyword?" )
2199+ if sep is None :
2200+ sep = ''
22862201
22872202 if isinstance (self ._orig , Index ):
22882203 data = Series (self ._orig , index = self ._orig )
@@ -2291,9 +2206,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22912206
22922207 # concatenate Series/Index with itself if no "others"
22932208 if others is None :
2294- result = str_cat (data , others = others , sep = sep , na_rep = na_rep )
2295- return self ._wrap_result (result ,
2296- use_codes = (not self ._is_categorical ))
2209+ data = ensure_object (data )
2210+ na_mask = isna (data )
2211+ if na_rep is None and na_mask .any ():
2212+ data = data [~ na_mask ]
2213+ elif na_rep is not None and na_mask .any ():
2214+ data = np .where (na_mask , na_rep , data )
2215+ return sep .join (data )
22972216
22982217 try :
22992218 # turn anything in "others" into lists of Series
@@ -2320,23 +2239,45 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23202239 "'outer'|'inner'|'right'`. The future default will "
23212240 "be `join='left'`." , FutureWarning , stacklevel = 2 )
23222241
2242+ # if join is None, _get_series_list already force-aligned indexes
2243+ join = 'left' if join is None else join
2244+
23232245 # align if required
2324- if join is not None :
2246+ if any ( not data . index . equals ( x . index ) for x in others ) :
23252247 # Need to add keys for uniqueness in case of duplicate columns
23262248 others = concat (others , axis = 1 ,
23272249 join = (join if join == 'inner' else 'outer' ),
2328- keys = range (len (others )))
2250+ keys = range (len (others )), copy = False )
23292251 data , others = data .align (others , join = join )
23302252 others = [others [x ] for x in others ] # again list of Series
23312253
2332- # str_cat discards index
2333- res = str_cat (data , others = others , sep = sep , na_rep = na_rep )
2254+ all_cols = [ensure_object (x ) for x in [data ] + others ]
2255+ na_masks = np .array ([isna (x ) for x in all_cols ])
2256+ union_mask = np .logical_or .reduce (na_masks , axis = 0 )
2257+
2258+ if na_rep is None and union_mask .any ():
2259+ # no na_rep means NaNs for all rows where any column has a NaN
2260+ # only necessary if there are actually any NaNs
2261+ result = np .empty (len (data ), dtype = object )
2262+ np .putmask (result , union_mask , np .nan )
2263+
2264+ not_masked = ~ union_mask
2265+ result [not_masked ] = cat_core ([x [not_masked ] for x in all_cols ],
2266+ sep )
2267+ elif na_rep is not None and union_mask .any ():
2268+ # fill NaNs with na_rep in case there are actually any NaNs
2269+ all_cols = [np .where (nm , na_rep , col )
2270+ for nm , col in zip (na_masks , all_cols )]
2271+ result = cat_core (all_cols , sep )
2272+ else :
2273+ # no NaNs - can just concatenate
2274+ result = cat_core (all_cols , sep )
23342275
23352276 if isinstance (self ._orig , Index ):
2336- res = Index (res , name = self ._orig .name )
2277+ result = Index (result , name = self ._orig .name )
23372278 else : # Series
2338- res = Series (res , index = data .index , name = self ._orig .name )
2339- return res
2279+ result = Series (result , index = data .index , name = self ._orig .name )
2280+ return result
23402281
23412282 _shared_docs ['str_split' ] = ("""
23422283 Split strings around given separator/delimiter.
0 commit comments