Skip to content

Commit 16faf64

Browse files
SaladRaiderkunalgosar
authored andcommitted
Implement multiple axis for dropna (#13)
* Implement multiple axis for dropna * Add multiple axis dropna test * Fix using dummy_frame in dropna * Clean up dropna multiple axis tests * remove unnecessary axis modification * Clean up dropna tests
1 parent b94adb4 commit 16faf64

File tree

2 files changed

+57
-12
lines changed

2 files changed

+57
-12
lines changed

python/ray/dataframe/dataframe.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -776,9 +776,20 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
776776
DataFrame with the dropna applied.
777777
"""
778778
if is_list_like(axis):
779-
raise NotImplementedError(
780-
"To contribute to Pandas on Ray, please visit "
781-
"github.com/ray-project/ray.")
779+
result = self
780+
# TODO(kunalgosar): this builds an intermediate dataframe,
781+
# which does unnecessary computation
782+
for ax in axis:
783+
result = result.dropna(
784+
axis=ax, how=how, thresh=thresh, subset=subset)
785+
if not inplace:
786+
return result
787+
788+
return self._update_inplace(
789+
block_partitions=result._block_partitions,
790+
columns=result.columns,
791+
index=result.index
792+
)
782793

783794
axis = pd.DataFrame()._get_axis_number(axis)
784795
inplace = validate_bool_kwarg(inplace, "inplace")

python/ray/dataframe/test/test_dataframe.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,8 @@ def test_dense_nan_df():
839839

840840
test_dropna(ray_df, pd_df)
841841
test_dropna_inplace(ray_df, pd_df)
842+
test_dropna_multiple_axes(ray_df, pd_df)
843+
test_dropna_multiple_axes_inplace(ray_df, pd_df)
842844

843845

844846
@pytest.fixture
@@ -1297,16 +1299,17 @@ def test_drop_duplicates():
12971299

12981300
@pytest.fixture
12991301
def test_dropna(ray_df, pd_df):
1300-
ray_df_equals_pandas(ray_df.dropna(axis=1, how='all'),
1301-
pd_df.dropna(axis=1, how='all'))
1302+
assert ray_df_equals_pandas(ray_df.dropna(axis=1, how='all'),
1303+
pd_df.dropna(axis=1, how='all'))
13021304

1303-
ray_df_equals_pandas(ray_df.dropna(axis=1, how='any'),
1304-
pd_df.dropna(axis=1, how='any'))
1305+
assert ray_df_equals_pandas(ray_df.dropna(axis=1, how='any'),
1306+
pd_df.dropna(axis=1, how='any'))
13051307

1306-
ray_df_equals_pandas(ray_df.dropna(axis=0, how='all'),
1307-
pd_df.dropna(axis=0, how='all'))
1308+
assert ray_df_equals_pandas(ray_df.dropna(axis=0, how='all'),
1309+
pd_df.dropna(axis=0, how='all'))
13081310

1309-
ray_df_equals_pandas(ray_df.dropna(thresh=2), pd_df.dropna(thresh=2))
1311+
assert ray_df_equals_pandas(ray_df.dropna(thresh=2),
1312+
pd_df.dropna(thresh=2))
13101313

13111314

13121315
@pytest.fixture
@@ -1317,12 +1320,43 @@ def test_dropna_inplace(ray_df, pd_df):
13171320
ray_df.dropna(thresh=2, inplace=True)
13181321
pd_df.dropna(thresh=2, inplace=True)
13191322

1320-
ray_df_equals_pandas(ray_df, pd_df)
1323+
assert ray_df_equals_pandas(ray_df, pd_df)
13211324

13221325
ray_df.dropna(axis=1, how='any', inplace=True)
13231326
pd_df.dropna(axis=1, how='any', inplace=True)
13241327

1325-
ray_df_equals_pandas(ray_df, pd_df)
1328+
assert ray_df_equals_pandas(ray_df, pd_df)
1329+
1330+
1331+
@pytest.fixture
1332+
def test_dropna_multiple_axes(ray_df, pd_df):
1333+
assert ray_df_equals_pandas(
1334+
ray_df.dropna(how='all', axis=[0, 1]),
1335+
pd_df.dropna(how='all', axis=[0, 1])
1336+
)
1337+
assert ray_df_equals_pandas(
1338+
ray_df.dropna(how='all', axis=(0, 1)),
1339+
pd_df.dropna(how='all', axis=(0, 1))
1340+
)
1341+
1342+
1343+
@pytest.fixture
1344+
def test_dropna_multiple_axes_inplace(ray_df, pd_df):
1345+
ray_df_copy = ray_df.copy()
1346+
pd_df_copy = pd_df.copy()
1347+
1348+
ray_df_copy.dropna(how='all', axis=[0, 1], inplace=True)
1349+
pd_df_copy.dropna(how='all', axis=[0, 1], inplace=True)
1350+
1351+
assert ray_df_equals_pandas(ray_df_copy, pd_df_copy)
1352+
1353+
ray_df_copy = ray_df.copy()
1354+
pd_df_copy = pd_df.copy()
1355+
1356+
ray_df_copy.dropna(how='all', axis=(0, 1), inplace=True)
1357+
pd_df_copy.dropna(how='all', axis=(0, 1), inplace=True)
1358+
1359+
assert ray_df_equals_pandas(ray_df_copy, pd_df_copy)
13261360

13271361

13281362
def test_duplicated():

0 commit comments

Comments
 (0)