@@ -160,12 +160,6 @@ def f():
160160
161161 self .assertRaises (ValueError , f )
162162
163- def f ():
164- with tm .assert_produces_warning (FutureWarning ):
165- Categorical ([1 , 2 ], [1 , 2 , np .nan , np .nan ])
166-
167- self .assertRaises (ValueError , f )
168-
169163 # The default should be unordered
170164 c1 = Categorical (["a" , "b" , "c" , "a" ])
171165 self .assertFalse (c1 .ordered )
@@ -222,29 +216,19 @@ def f():
222216 cat = pd .Categorical ([np .nan , 1. , 2. , 3. ])
223217 self .assertTrue (is_float_dtype (cat .categories ))
224218
225- # Deprecating NaNs in categoires (GH #10748)
226- # preserve int as far as possible by converting to object if NaN is in
227- # categories
228- with tm .assert_produces_warning (FutureWarning ):
229- cat = pd .Categorical ([np .nan , 1 , 2 , 3 ],
230- categories = [np .nan , 1 , 2 , 3 ])
231- self .assertTrue (is_object_dtype (cat .categories ))
232-
233219 # This doesn't work -> this would probably need some kind of "remember
234220 # the original type" feature to try to cast the array interface result
235221 # to...
236222
237223 # vals = np.asarray(cat[cat.notnull()])
238224 # self.assertTrue(is_integer_dtype(vals))
239- with tm .assert_produces_warning (FutureWarning ):
240- cat = pd .Categorical ([np .nan , "a" , "b" , "c" ],
241- categories = [np .nan , "a" , "b" , "c" ])
242- self .assertTrue (is_object_dtype (cat .categories ))
243- # but don't do it for floats
244- with tm .assert_produces_warning (FutureWarning ):
245- cat = pd .Categorical ([np .nan , 1. , 2. , 3. ],
246- categories = [np .nan , 1. , 2. , 3. ])
247- self .assertTrue (is_float_dtype (cat .categories ))
225+
226+ # Cannot have NaN in categories
227+ def f ():
228+ pd .Categorical ([np .nan , "a" , "b" , "c" ],
229+ categories = [np .nan , "a" , "b" , "c" ])
230+
231+ self .assertRaises (ValueError , f )
248232
249233 # corner cases
250234 cat = pd .Categorical ([1 ])
@@ -418,6 +402,12 @@ def f():
418402
419403 self .assertRaises (ValueError , f )
420404
405+ # NaN categories included
406+ def f ():
407+ Categorical .from_codes ([0 , 1 , 2 ], ["a" , "b" , np .nan ])
408+
409+ self .assertRaises (ValueError , f )
410+
421411 # too negative
422412 def f ():
423413 Categorical .from_codes ([- 2 , 1 , 2 ], ["a" , "b" , "c" ])
@@ -649,30 +639,6 @@ def test_describe(self):
649639 name = 'categories' ))
650640 tm .assert_frame_equal (desc , expected )
651641
652- # NA as a category
653- with tm .assert_produces_warning (FutureWarning ):
654- cat = pd .Categorical (["a" , "c" , "c" , np .nan ],
655- categories = ["b" , "a" , "c" , np .nan ])
656- result = cat .describe ()
657-
658- expected = DataFrame ([[0 , 0 ], [1 , 0.25 ], [2 , 0.5 ], [1 , 0.25 ]],
659- columns = ['counts' , 'freqs' ],
660- index = pd .CategoricalIndex (['b' , 'a' , 'c' , np .nan ],
661- name = 'categories' ))
662- tm .assert_frame_equal (result , expected , check_categorical = False )
663-
664- # NA as an unused category
665- with tm .assert_produces_warning (FutureWarning ):
666- cat = pd .Categorical (["a" , "c" , "c" ],
667- categories = ["b" , "a" , "c" , np .nan ])
668- result = cat .describe ()
669-
670- exp_idx = pd .CategoricalIndex (
671- ['b' , 'a' , 'c' , np .nan ], name = 'categories' )
672- expected = DataFrame ([[0 , 0 ], [1 , 1 / 3. ], [2 , 2 / 3. ], [0 , 0 ]],
673- columns = ['counts' , 'freqs' ], index = exp_idx )
674- tm .assert_frame_equal (result , expected , check_categorical = False )
675-
676642 def test_print (self ):
677643 expected = ["[a, b, b, a, a, c, c, c]" ,
678644 "Categories (3, object): [a < b < c]" ]
@@ -1119,90 +1085,18 @@ def test_nan_handling(self):
11191085 self .assert_numpy_array_equal (c ._codes ,
11201086 np .array ([0 , - 1 , - 1 , 0 ], dtype = np .int8 ))
11211087
1122- # If categories have nan included, the code should point to that
1123- # instead
1124- with tm .assert_produces_warning (FutureWarning ):
1125- c = Categorical (["a" , "b" , np .nan , "a" ],
1126- categories = ["a" , "b" , np .nan ])
1127- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1128- self .assert_numpy_array_equal (c ._codes ,
1129- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1130- c [1 ] = np .nan
1131- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1132- self .assert_numpy_array_equal (c ._codes ,
1133- np .array ([0 , 2 , 2 , 0 ], dtype = np .int8 ))
1134-
1135- # Changing categories should also make the replaced category np.nan
1136- c = Categorical (["a" , "b" , "c" , "a" ])
1137- with tm .assert_produces_warning (FutureWarning ):
1138- c .categories = ["a" , "b" , np .nan ] # noqa
1139-
1140- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1141- self .assert_numpy_array_equal (c ._codes ,
1142- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1143-
11441088 # Adding nan to categories should make assigned nan point to the
11451089 # category!
11461090 c = Categorical (["a" , "b" , np .nan , "a" ])
11471091 self .assert_index_equal (c .categories , Index (["a" , "b" ]))
11481092 self .assert_numpy_array_equal (c ._codes ,
11491093 np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1150- with tm .assert_produces_warning (FutureWarning ):
1151- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1152-
1153- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1154- self .assert_numpy_array_equal (c ._codes ,
1155- np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1156- c [1 ] = np .nan
1157- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1158- self .assert_numpy_array_equal (c ._codes ,
1159- np .array ([0 , 2 , - 1 , 0 ], dtype = np .int8 ))
1160-
1161- # Remove null categories (GH 10156)
1162- cases = [([1.0 , 2.0 , np .nan ], [1.0 , 2.0 ]),
1163- (['a' , 'b' , None ], ['a' , 'b' ]),
1164- ([pd .Timestamp ('2012-05-01' ), pd .NaT ],
1165- [pd .Timestamp ('2012-05-01' )])]
1166-
1167- null_values = [np .nan , None , pd .NaT ]
1168-
1169- for with_null , without in cases :
1170- with tm .assert_produces_warning (FutureWarning ):
1171- base = Categorical ([], with_null )
1172- expected = Categorical ([], without )
1173-
1174- for nullval in null_values :
1175- result = base .remove_categories (nullval )
1176- self .assert_categorical_equal (result , expected )
1177-
1178- # Different null values are indistinguishable
1179- for i , j in [(0 , 1 ), (0 , 2 ), (1 , 2 )]:
1180- nulls = [null_values [i ], null_values [j ]]
1181-
1182- def f ():
1183- with tm .assert_produces_warning (FutureWarning ):
1184- Categorical ([], categories = nulls )
1185-
1186- self .assertRaises (ValueError , f )
11871094
11881095 def test_isnull (self ):
11891096 exp = np .array ([False , False , True ])
11901097 c = Categorical (["a" , "b" , np .nan ])
11911098 res = c .isnull ()
1192- self .assert_numpy_array_equal (res , exp )
1193-
1194- with tm .assert_produces_warning (FutureWarning ):
1195- c = Categorical (["a" , "b" , np .nan ], categories = ["a" , "b" , np .nan ])
1196- res = c .isnull ()
1197- self .assert_numpy_array_equal (res , exp )
11981099
1199- # test both nan in categories and as -1
1200- exp = np .array ([True , False , True ])
1201- c = Categorical (["a" , "b" , np .nan ])
1202- with tm .assert_produces_warning (FutureWarning ):
1203- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1204- c [0 ] = np .nan
1205- res = c .isnull ()
12061100 self .assert_numpy_array_equal (res , exp )
12071101
12081102 def test_codes_immutable (self ):
@@ -1487,45 +1381,10 @@ def test_slicing_directly(self):
14871381
14881382 def test_set_item_nan (self ):
14891383 cat = pd .Categorical ([1 , 2 , 3 ])
1490- exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1491- cat [1 ] = np .nan
1492- tm .assert_categorical_equal (cat , exp )
1493-
1494- # if nan in categories, the proper code should be set!
1495- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1496- with tm .assert_produces_warning (FutureWarning ):
1497- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
14981384 cat [1 ] = np .nan
1499- exp = np .array ([0 , 3 , 2 , - 1 ], dtype = np .int8 )
1500- self .assert_numpy_array_equal (cat .codes , exp )
1501-
1502- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1503- with tm .assert_produces_warning (FutureWarning ):
1504- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1505- cat [1 :3 ] = np .nan
1506- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1507- self .assert_numpy_array_equal (cat .codes , exp )
1508-
1509- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1510- with tm .assert_produces_warning (FutureWarning ):
1511- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1512- cat [1 :3 ] = [np .nan , 1 ]
1513- exp = np .array ([0 , 3 , 0 , - 1 ], dtype = np .int8 )
1514- self .assert_numpy_array_equal (cat .codes , exp )
1515-
1516- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1517- with tm .assert_produces_warning (FutureWarning ):
1518- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1519- cat [1 :3 ] = [np .nan , np .nan ]
1520- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1521- self .assert_numpy_array_equal (cat .codes , exp )
15221385
1523- cat = pd .Categorical ([1 , 2 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1524- with tm .assert_produces_warning (FutureWarning ):
1525- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1526- cat [pd .isnull (cat )] = np .nan
1527- exp = np .array ([0 , 1 , 3 , 2 ], dtype = np .int8 )
1528- self .assert_numpy_array_equal (cat .codes , exp )
1386+ exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1387+ tm .assert_categorical_equal (cat , exp )
15291388
15301389 def test_shift (self ):
15311390 # GH 9416
@@ -2026,33 +1885,12 @@ def test_sideeffects_free(self):
20261885
20271886 def test_nan_handling (self ):
20281887
2029- # Nans are represented as -1 in labels
1888+ # NaNs are represented as -1 in labels
20301889 s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
20311890 self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
20321891 self .assert_numpy_array_equal (s .values .codes ,
20331892 np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
20341893
2035- # If categories have nan included, the label should point to that
2036- # instead
2037- with tm .assert_produces_warning (FutureWarning ):
2038- s2 = Series (Categorical (["a" , "b" , np .nan , "a" ],
2039- categories = ["a" , "b" , np .nan ]))
2040-
2041- exp_cat = Index (["a" , "b" , np .nan ])
2042- self .assert_index_equal (s2 .cat .categories , exp_cat )
2043- self .assert_numpy_array_equal (s2 .values .codes ,
2044- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2045-
2046- # Changing categories should also make the replaced category np.nan
2047- s3 = Series (Categorical (["a" , "b" , "c" , "a" ]))
2048- with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
2049- s3 .cat .categories = ["a" , "b" , np .nan ]
2050-
2051- exp_cat = Index (["a" , "b" , np .nan ])
2052- self .assert_index_equal (s3 .cat .categories , exp_cat )
2053- self .assert_numpy_array_equal (s3 .values .codes ,
2054- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2055-
20561894 def test_cat_accessor (self ):
20571895 s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
20581896 self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
0 commit comments