@@ -149,6 +149,11 @@ def test_multiindex(self):
149149
150150
151151class TestGetDummies (tm .TestCase ):
152+
153+ def setUp (self ):
154+ self .df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
155+ 'C' : [1 , 2 , 3 ]})
156+
152157 def test_basic (self ):
153158 s_list = list ('abc' )
154159 s_series = Series (s_list )
@@ -209,6 +214,114 @@ def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values
209214 u ('letter_%s' ) % eacute : {0 : 0.0 , 1 : 1.0 , 2 : 1.0 }})
210215 assert_frame_equal (res , exp )
211216
217+ def test_dataframe_dummies_all_obj (self ):
218+ df = self .df [['A' , 'B' ]]
219+ result = get_dummies (df )
220+ expected = DataFrame ({'A_a' : [1. , 0 , 1 ], 'A_b' : [0. , 1 , 0 ],
221+ 'B_b' : [1. , 1 , 0 ], 'B_c' : [0. , 0 , 1 ]})
222+ assert_frame_equal (result , expected )
223+
224+ def test_dataframe_dummies_mix_default (self ):
225+ df = self .df
226+ result = get_dummies (df )
227+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
228+ 'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
229+ 'B_c' : [0. , 0 , 1 ]})
230+ expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
231+ assert_frame_equal (result , expected )
232+
233+ def test_dataframe_dummies_prefix_list (self ):
234+ prefixes = ['from_A' , 'from_B' ]
235+ df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
236+ 'C' : [1 , 2 , 3 ]})
237+ result = get_dummies (df , prefix = prefixes )
238+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'from_A_a' : [1. , 0 , 1 ],
239+ 'from_A_b' : [0. , 1 , 0 ], 'from_B_b' : [1. , 1 , 0 ],
240+ 'from_B_c' : [0. , 0 , 1 ]})
241+ expected = expected [['C' , 'from_A_a' , 'from_A_b' , 'from_B_b' ,
242+ 'from_B_c' ]]
243+ assert_frame_equal (result , expected )
244+
245+ def test_datafrmae_dummies_prefix_str (self ):
246+ # not that you should do this...
247+ df = self .df
248+ result = get_dummies (df , prefix = 'bad' )
249+ expected = DataFrame ([[1 , 1. , 0. , 1. , 0. ],
250+ [2 , 0. , 1. , 1. , 0. ],
251+ [3 , 1. , 0. , 0. , 1. ]],
252+ columns = ['C' , 'bad_a' , 'bad_b' , 'bad_b' , 'bad_c' ])
253+ assert_frame_equal (result , expected )
254+
255+ def test_dataframe_dummies_subset (self ):
256+ df = self .df
257+ result = get_dummies (df , prefix = ['from_A' ],
258+ columns = ['A' ])
259+ expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
260+ 'B' : ['b' , 'b' , 'c' ], 'C' : [1 , 2 , 3 ]})
261+ assert_frame_equal (result , expected )
262+
263+ def test_dataframe_dummies_prefix_sep (self ):
264+ df = self .df
265+ result = get_dummies (df , prefix_sep = '..' )
266+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A..a' : [1. , 0 , 1 ],
267+ 'A..b' : [0. , 1 , 0 ], 'B..b' : [1. , 1 , 0 ],
268+ 'B..c' : [0. , 0 , 1 ]})
269+ expected = expected [['C' , 'A..a' , 'A..b' , 'B..b' , 'B..c' ]]
270+ assert_frame_equal (result , expected )
271+
272+ result = get_dummies (df , prefix_sep = ['..' , '__' ])
273+ expected = expected .rename (columns = {'B..b' : 'B__b' , 'B..c' : 'B__c' })
274+ assert_frame_equal (result , expected )
275+
276+ result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' })
277+ assert_frame_equal (result , expected )
278+
279+ def test_dataframe_dummies_prefix_bad_length (self ):
280+ with tm .assertRaises (ValueError ):
281+ get_dummies (self .df , prefix = ['too few' ])
282+
283+ def test_dataframe_dummies_prefix_sep_bad_length (self ):
284+ with tm .assertRaises (ValueError ):
285+ get_dummies (self .df , prefix_sep = ['bad' ])
286+
287+ def test_dataframe_dummies_prefix_dict (self ):
288+ prefixes = {'A' : 'from_A' , 'B' : 'from_B' }
289+ df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
290+ 'C' : [1 , 2 , 3 ]})
291+ result = get_dummies (df , prefix = prefixes )
292+ expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
293+ 'from_B_b' : [1. , 1 , 0 ], 'from_B_c' : [0. , 0 , 1 ],
294+ 'C' : [1 , 2 , 3 ]})
295+ assert_frame_equal (result , expected )
296+
297+ def test_dataframe_dummies_with_na (self ):
298+ df = self .df
299+ df .loc [3 , :] = [np .nan , np .nan , np .nan ]
300+ result = get_dummies (df , dummy_na = True )
301+ expected = DataFrame ({'C' : [1 , 2 , 3 , np .nan ], 'A_a' : [1. , 0 , 1 , 0 ],
302+ 'A_b' : [0. , 1 , 0 , 0 ], 'A_nan' : [0. , 0 , 0 , 1 ], 'B_b' : [1. , 1 , 0 , 0 ],
303+ 'B_c' : [0. , 0 , 1 , 0 ], 'B_nan' : [0. , 0 , 0 , 1 ]})
304+ expected = expected [['C' , 'A_a' , 'A_b' , 'A_nan' , 'B_b' , 'B_c' ,
305+ 'B_nan' ]]
306+ assert_frame_equal (result , expected )
307+
308+ result = get_dummies (df , dummy_na = False )
309+ expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
310+ assert_frame_equal (result , expected )
311+
312+ def test_dataframe_dummies_with_categorical (self ):
313+ df = self .df
314+ df ['cat' ] = pd .Categorical (['x' , 'y' , 'y' ])
315+ result = get_dummies (df )
316+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
317+ 'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
318+ 'B_c' : [0. , 0 , 1 ], 'cat_x' : [1. , 0 , 0 ],
319+ 'cat_y' : [0. , 1 , 1 ]})
320+ expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ,
321+ 'cat_x' , 'cat_y' ]]
322+ assert_frame_equal (result , expected )
323+
324+
212325class TestConvertDummies (tm .TestCase ):
213326 def test_convert_dummies (self ):
214327 df = DataFrame ({'A' : ['foo' , 'bar' , 'foo' , 'bar' ,
@@ -218,8 +331,9 @@ def test_convert_dummies(self):
218331 'C' : np .random .randn (8 ),
219332 'D' : np .random .randn (8 )})
220333
221- result = convert_dummies (df , ['A' , 'B' ])
222- result2 = convert_dummies (df , ['A' , 'B' ], prefix_sep = '.' )
334+ with tm .assert_produces_warning (FutureWarning ):
335+ result = convert_dummies (df , ['A' , 'B' ])
336+ result2 = convert_dummies (df , ['A' , 'B' ], prefix_sep = '.' )
223337
224338 expected = DataFrame ({'A_foo' : [1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 ],
225339 'A_bar' : [0 , 1 , 0 , 1 , 0 , 1 , 0 , 0 ],
0 commit comments