|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | # pylint: disable=E1101,E1103,W0232 |
3 | 3 |
|
4 | | -import os |
5 | 4 | import sys |
6 | 5 | from datetime import datetime |
7 | 6 | from distutils.version import LooseVersion |
@@ -2906,54 +2905,41 @@ def test_value_counts(self): |
2906 | 2905 | tm.assert_series_equal(res, exp) |
2907 | 2906 |
|
2908 | 2907 | def test_value_counts_with_nan(self): |
2909 | | - # https://github.com/pydata/pandas/issues/9443 |
| 2908 | + # see gh-9443 |
2910 | 2909 |
|
| 2910 | + # sanity check |
2911 | 2911 | s = pd.Series(["a", "b", "a"], dtype="category") |
2912 | | - tm.assert_series_equal( |
2913 | | - s.value_counts(dropna=True), |
2914 | | - pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) |
2915 | | - tm.assert_series_equal( |
2916 | | - s.value_counts(dropna=False), |
2917 | | - pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) |
| 2912 | + exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
2918 | 2913 |
|
2919 | | - s = pd.Series(["a", "b", None, "a", None, None], dtype="category") |
2920 | | - tm.assert_series_equal( |
2921 | | - s.value_counts(dropna=True), |
2922 | | - pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) |
2923 | | - tm.assert_series_equal( |
2924 | | - s.value_counts(dropna=False), |
2925 | | - pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"]))) |
2926 | | - # When we aren't sorting by counts, and np.nan isn't a |
2927 | | - # category, it should be last. |
2928 | | - tm.assert_series_equal( |
2929 | | - s.value_counts(dropna=False, sort=False), |
2930 | | - pd.Series([2, 1, 3], |
2931 | | - index=pd.CategoricalIndex(["a", "b", np.nan]))) |
| 2914 | + res = s.value_counts(dropna=True) |
| 2915 | + tm.assert_series_equal(res, exp) |
2932 | 2916 |
|
2933 | | - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
2934 | | - s = pd.Series(pd.Categorical(["a", "b", "a"], |
2935 | | - categories=["a", "b", np.nan])) |
| 2917 | + res = s.value_counts(dropna=True) |
| 2918 | + tm.assert_series_equal(res, exp) |
2936 | 2919 |
|
2937 | | - # internal categories are different because of NaN |
2938 | | - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
2939 | | - tm.assert_series_equal(s.value_counts(dropna=True), exp, |
2940 | | - check_categorical=False) |
2941 | | - exp = pd.Series([2, 1, 0], |
2942 | | - index=pd.CategoricalIndex(["a", "b", np.nan])) |
2943 | | - tm.assert_series_equal(s.value_counts(dropna=False), exp, |
2944 | | - check_categorical=False) |
| 2920 | + # same Series via two different constructions --> same behaviour |
| 2921 | + series = [ |
| 2922 | + pd.Series(["a", "b", None, "a", None, None], dtype="category"), |
| 2923 | + pd.Series(pd.Categorical(["a", "b", None, "a", None, None], |
| 2924 | + categories=["a", "b"])) |
| 2925 | + ] |
2945 | 2926 |
|
2946 | | - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
2947 | | - s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None], |
2948 | | - categories=["a", "b", np.nan])) |
| 2927 | + for s in series: |
| 2928 | + # None is a NaN value, so we exclude its count here |
| 2929 | + exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
| 2930 | + res = s.value_counts(dropna=True) |
| 2931 | + tm.assert_series_equal(res, exp) |
2949 | 2932 |
|
2950 | | - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
2951 | | - tm.assert_series_equal(s.value_counts(dropna=True), exp, |
2952 | | - check_categorical=False) |
2953 | | - exp = pd.Series([3, 2, 1], |
2954 | | - index=pd.CategoricalIndex([np.nan, "a", "b"])) |
2955 | | - tm.assert_series_equal(s.value_counts(dropna=False), exp, |
2956 | | - check_categorical=False) |
| 2933 | + # we don't exclude the count of None and sort by counts |
| 2934 | + exp = pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) |
| 2935 | + res = s.value_counts(dropna=False) |
| 2936 | + tm.assert_series_equal(res, exp) |
| 2937 | + |
| 2938 | + # When we aren't sorting by counts, and np.nan isn't a |
| 2939 | + # category, it should be last. |
| 2940 | + exp = pd.Series([2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) |
| 2941 | + res = s.value_counts(dropna=False, sort=False) |
| 2942 | + tm.assert_series_equal(res, exp) |
2957 | 2943 |
|
2958 | 2944 | def test_groupby(self): |
2959 | 2945 |
|
@@ -4113,16 +4099,11 @@ def f(): |
4113 | 4099 | res = df.dropna() |
4114 | 4100 | tm.assert_frame_equal(res, df_exp_drop_all) |
4115 | 4101 |
|
4116 | | - # make sure that fillna takes both missing values and NA categories |
4117 | | - # into account |
4118 | | - c = Categorical(["a", "b", np.nan]) |
4119 | | - with tm.assert_produces_warning(FutureWarning): |
4120 | | - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) |
4121 | | - |
4122 | | - c[0] = np.nan |
| 4102 | + # make sure that fillna takes missing values into account |
| 4103 | + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) |
4123 | 4104 | df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) |
4124 | 4105 |
|
4125 | | - cat_exp = Categorical(["a", "b", "a"], categories=["a", "b", np.nan]) |
| 4106 | + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) |
4126 | 4107 | df_exp = pd.DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) |
4127 | 4108 |
|
4128 | 4109 | res = df.fillna("a") |
|
0 commit comments