33
44from pandas .compat import zip
55from pandas .core .dtypes .generic import ABCSeries , ABCIndex
6- from pandas .core .dtypes .missing import isna , notna
6+ from pandas .core .dtypes .missing import isna
77from pandas .core .dtypes .common import (
88 is_bool_dtype ,
99 is_categorical_dtype ,
3636_shared_docs = dict ()
3737
3838
39- def _get_array_list (arr , others ):
40- """
41- Auxiliary function for :func:`str_cat`
42-
43- Parameters
44- ----------
45- arr : ndarray
46- The left-most ndarray of the concatenation
47- others : list, ndarray, Series
48- The rest of the content to concatenate. If list of list-likes,
49- all elements must be passable to ``np.asarray``.
50-
51- Returns
52- -------
53- list
54- List of all necessary arrays
55- """
56- from pandas .core .series import Series
57-
58- if len (others ) and isinstance (com .values_from_object (others )[0 ],
59- (list , np .ndarray , Series )):
60- arrays = [arr ] + list (others )
61- else :
62- arrays = [arr , others ]
63-
64- return [np .asarray (x , dtype = object ) for x in arrays ]
65-
66-
67- def str_cat (arr , others = None , sep = None , na_rep = None ):
68- """
39+ def interleave_sep (all_cols , sep ):
40+ '''
6941 Auxiliary function for :meth:`str.cat`
7042
71- If `others` is specified, this function concatenates the Series/Index
72- and elements of `others` element-wise.
73- If `others` is not being passed then all values in the Series are
74- concatenated in a single string with a given `sep`.
75-
7643 Parameters
7744 ----------
78- others : list-like, or list of list-likes, optional
79- List-likes (or a list of them) of the same length as calling object.
80- If None, returns str concatenating strings of the Series.
81- sep : string or None, default None
82- If None, concatenates without any separator.
83- na_rep : string or None, default None
84- If None, NA in the series are ignored.
45+ all_cols : list of numpy arrays
46+ List of arrays to be concatenated with sep
47+ sep : string
48+ The separator string for concatenating the columns
8549
8650 Returns
8751 -------
88- concat
89- ndarray containing concatenated results (if `others is not None`)
90- or str (if `others is None`)
91- """
92- if sep is None :
93- sep = ''
94-
95- if others is not None :
96- arrays = _get_array_list (arr , others )
97-
98- n = _length_check (arrays )
99- masks = np .array ([isna (x ) for x in arrays ])
100- cats = None
101-
102- if na_rep is None :
103- na_mask = np .logical_or .reduce (masks , axis = 0 )
104-
105- result = np .empty (n , dtype = object )
106- np .putmask (result , na_mask , np .nan )
107-
108- notmask = ~ na_mask
109-
110- tuples = zip (* [x [notmask ] for x in arrays ])
111- cats = [sep .join (tup ) for tup in tuples ]
112-
113- result [notmask ] = cats
114- else :
115- for i , x in enumerate (arrays ):
116- x = np .where (masks [i ], na_rep , x )
117- if cats is None :
118- cats = x
119- else :
120- cats = cats + sep + x
121-
122- result = cats
123-
124- return result
125- else :
126- arr = np .asarray (arr , dtype = object )
127- mask = isna (arr )
128- if na_rep is None and mask .any ():
129- if sep == '' :
130- na_rep = ''
131- else :
132- return sep .join (arr [notna (arr )])
133- return sep .join (np .where (mask , na_rep , arr ))
134-
135-
136- def _length_check (others ):
137- n = None
138- for x in others :
139- try :
140- if n is None :
141- n = len (x )
142- elif len (x ) != n :
143- raise ValueError ('All arrays must be same length' )
144- except TypeError :
145- raise ValueError ('Must pass arrays containing strings to str_cat' )
146- return n
52+ list
53+ The list of arrays interleaved with sep; to be fed to np.sum
54+ '''
55+ if sep == '' :
56+ # no need to add empty strings
57+ return all_cols
58+ result = [sep ] * (2 * len (all_cols ) - 1 )
59+ result [::2 ] = all_cols
60+ return result
14761
14862
14963def _na_map (f , arr , na_result = np .nan , dtype = object ):
@@ -2172,6 +2086,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21722086
21732087 if isinstance (others , compat .string_types ):
21742088 raise ValueError ("Did you mean to supply a `sep` keyword?" )
2089+ if sep is None :
2090+ sep = ''
21752091
21762092 if isinstance (self ._orig , Index ):
21772093 data = Series (self ._orig , index = self ._orig )
@@ -2180,9 +2096,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21802096
21812097 # concatenate Series/Index with itself if no "others"
21822098 if others is None :
2183- result = str_cat (data , others = others , sep = sep , na_rep = na_rep )
2184- return self ._wrap_result (result ,
2185- use_codes = (not self ._is_categorical ))
2099+ data = data .astype (object ).values
2100+ mask = isna (data )
2101+ if mask .any ():
2102+ if na_rep is None :
2103+ return sep .join (data [~ mask ])
2104+ return sep .join (np .where (mask , na_rep , data ))
2105+ return sep .join (data )
21862106
21872107 try :
21882108 # turn anything in "others" into lists of Series
@@ -2209,23 +2129,42 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22092129 "'outer'|'inner'|'right'`. The future default will "
22102130 "be `join='left'`." , FutureWarning , stacklevel = 2 )
22112131
2212- # align if required
2213- if join is not None :
2132+ # if join is None, _get_series_list already aligned indexes
2133+ join = 'left' if join is None else join
2134+
2135+ if any (not data .index .equals (x .index ) for x in others ):
22142136 # Need to add keys for uniqueness in case of duplicate columns
22152137 others = concat (others , axis = 1 ,
22162138 join = (join if join == 'inner' else 'outer' ),
2217- keys = range (len (others )))
2139+ keys = range (len (others )), copy = False )
22182140 data , others = data .align (others , join = join )
22192141 others = [others [x ] for x in others ] # again list of Series
22202142
2221- # str_cat discards index
2222- res = str_cat (data , others = others , sep = sep , na_rep = na_rep )
2143+ all_cols = [x .astype (object ).values for x in [data ] + others ]
2144+ masks = np .array ([isna (x ) for x in all_cols ])
2145+ union_mask = np .logical_or .reduce (masks , axis = 0 )
2146+
2147+ if na_rep is None and union_mask .any ():
2148+ result = np .empty (len (data ), dtype = object )
2149+ np .putmask (result , union_mask , np .nan )
2150+
2151+ not_masked = ~ union_mask
2152+ all_cols = interleave_sep ([x [not_masked ] for x in all_cols ], sep )
2153+
2154+ result [not_masked ] = np .sum (all_cols , axis = 0 )
2155+ elif na_rep is not None and union_mask .any ():
2156+ # fill NaNs
2157+ all_cols = [np .where (masks [i ], na_rep , all_cols [i ])
2158+ for i in range (len (all_cols ))]
2159+ result = np .sum (interleave_sep (all_cols , sep ), axis = 0 )
2160+ else : # no NaNs
2161+ result = np .sum (interleave_sep (all_cols , sep ), axis = 0 )
22232162
22242163 if isinstance (self ._orig , Index ):
2225- res = Index (res , name = self ._orig .name )
2164+ result = Index (result , name = self ._orig .name )
22262165 else : # Series
2227- res = Series (res , index = data .index , name = self ._orig .name )
2228- return res
2166+ result = Series (result , index = data .index , name = self ._orig .name )
2167+ return result
22292168
22302169 _shared_docs ['str_split' ] = ("""
22312170 Split strings around given separator/delimiter.
0 commit comments