|
2 | 2 |
|
3 | 3 | import warnings
|
4 | 4 | from pathlib import Path
|
5 |
| -from typing import Any, Callable, Iterable, Literal, Type |
| 5 | +from typing import Any, Callable, Iterable, Type |
6 | 6 |
|
7 | 7 | import dask.dataframe as dd
|
8 | 8 | import hats as hc
|
|
11 | 11 | from hats.catalog.catalog_collection import CatalogCollection
|
12 | 12 | from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset as HCHealpixDataset
|
13 | 13 | from hats.catalog.index.index_catalog import IndexCatalog as HCIndexCatalog
|
14 |
| -from pandas._libs import lib |
15 |
| -from pandas._typing import AnyAll, Axis, IndexLabel, Renamer |
16 |
| -from pandas.api.extensions import no_default |
| 14 | +from pandas._typing import Renamer |
17 | 15 | from typing_extensions import Self
|
18 | 16 | from upath import UPath
|
19 | 17 |
|
@@ -146,33 +144,6 @@ def rename(self, columns: Renamer) -> Catalog:
|
146 | 144 | catalog.margin = self.margin.rename(columns)
|
147 | 145 | return catalog
|
148 | 146 |
|
149 |
| - def assign(self, **kwargs) -> Catalog: |
150 |
| - """Assigns new columns to a catalog |
151 |
| -
|
152 |
| - Args: |
153 |
| - **kwargs: Arguments to pass to the assign method. This dictionary |
154 |
| - should contain the column names as keys and either a |
155 |
| - function or a 1-D Dask array as their corresponding value. |
156 |
| -
|
157 |
| - Returns: |
158 |
| - The catalog containing both the old columns and the newly created columns |
159 |
| -
|
160 |
| - Examples: |
161 |
| - Create a new column using a function:: |
162 |
| -
|
163 |
| - catalog = Catalog(...) |
164 |
| - catalog = catalog.assign(new_col=lambda df: df['existing_col'] * 2) |
165 |
| -
|
166 |
| - Add a column from a 1-D Dask array:: |
167 |
| -
|
168 |
| - import dask.array as da |
169 |
| - new_data = da.arange(...) |
170 |
| - catalog = catalog.assign(new_col=new_data) |
171 |
| - """ |
172 |
| - self._check_unloaded_columns(list(kwargs.keys())) |
173 |
| - ddf = self._ddf.assign(**kwargs) |
174 |
| - return self._create_updated_dataset(ddf=ddf) |
175 |
| - |
176 | 147 | def crossmatch(
|
177 | 148 | self,
|
178 | 149 | other: Catalog,
|
@@ -951,81 +922,6 @@ def nest_lists(
|
951 | 922 | )
|
952 | 923 | return catalog
|
953 | 924 |
|
954 |
| - def dropna( |
955 |
| - self, |
956 |
| - *, |
957 |
| - axis: Axis = 0, |
958 |
| - how: AnyAll | lib.NoDefault = no_default, |
959 |
| - thresh: int | lib.NoDefault = no_default, |
960 |
| - on_nested: bool = False, |
961 |
| - subset: IndexLabel | None = None, |
962 |
| - ignore_index: bool = False, |
963 |
| - ) -> Catalog: |
964 |
| - """Remove missing values for one layer of nested columns in the catalog. |
965 |
| -
|
966 |
| - Parameters |
967 |
| - ---------- |
968 |
| - axis : {0 or 'index', 1 or 'columns'}, default 0 |
969 |
| - Determine if rows or columns which contain missing values are |
970 |
| - removed. |
971 |
| -
|
972 |
| - * 0, or 'index' : Drop rows which contain missing values. |
973 |
| - * 1, or 'columns' : Drop columns which contain missing value. |
974 |
| -
|
975 |
| - Only a single axis is allowed. |
976 |
| -
|
977 |
| - how : {'any', 'all'}, default 'any' |
978 |
| - Determine if row or column is removed from catalog, when we have |
979 |
| - at least one NA or all NA. |
980 |
| -
|
981 |
| - * 'any' : If any NA values are present, drop that row or column. |
982 |
| - * 'all' : If all values are NA, drop that row or column. |
983 |
| - thresh : int, optional |
984 |
| - Require that many non-NA values. Cannot be combined with how. |
985 |
| - on_nested : str or bool, optional |
986 |
| - If not False, applies the call to the nested dataframe in the |
987 |
| - column with label equal to the provided string. If specified, |
988 |
| - the nested dataframe should align with any columns given in |
989 |
| - `subset`. |
990 |
| - subset : column label or sequence of labels, optional |
991 |
| - Labels along other axis to consider, e.g. if you are dropping rows |
992 |
| - these would be a list of columns to include. |
993 |
| -
|
994 |
| - Access nested columns using `nested_df.nested_col` (where |
995 |
| - `nested_df` refers to a particular nested dataframe and |
996 |
| - `nested_col` is a column of that nested dataframe). |
997 |
| - ignore_index : bool, default ``False`` |
998 |
| - If ``True``, the resulting axis will be labeled 0, 1, …, n - 1. |
999 |
| -
|
1000 |
| - Returns |
1001 |
| - ------- |
1002 |
| - Catalog |
1003 |
| - Catalog with NA entries dropped from it. |
1004 |
| -
|
1005 |
| - Notes |
1006 |
| - ----- |
1007 |
| - Operations that target a particular nested structure return a dataframe |
1008 |
| - with rows of that particular nested structure affected. |
1009 |
| -
|
1010 |
| - Values for `on_nested` and `subset` should be consistent in pointing |
1011 |
| - to a single layer, multi-layer operations are not supported at this |
1012 |
| - time. |
1013 |
| - """ |
1014 |
| - self._check_unloaded_columns(subset) |
1015 |
| - catalog = super().dropna( |
1016 |
| - axis=axis, how=how, thresh=thresh, on_nested=on_nested, subset=subset, ignore_index=ignore_index |
1017 |
| - ) |
1018 |
| - if self.margin is not None: |
1019 |
| - catalog.margin = self.margin.dropna( |
1020 |
| - axis=axis, |
1021 |
| - how=how, |
1022 |
| - thresh=thresh, |
1023 |
| - on_nested=on_nested, |
1024 |
| - subset=subset, |
1025 |
| - ignore_index=ignore_index, |
1026 |
| - ) |
1027 |
| - return catalog |
1028 |
| - |
1029 | 925 | def reduce(self, func, *args, meta=None, append_columns=False, infer_nesting=True, **kwargs) -> Catalog:
|
1030 | 926 | """
|
1031 | 927 | Takes a function and applies it to each top-level row of the Catalog.
|
@@ -1092,55 +988,6 @@ def reduce(self, func, *args, meta=None, append_columns=False, infer_nesting=Tru
|
1092 | 988 | )
|
1093 | 989 | return catalog
|
1094 | 990 |
|
1095 |
| - def sort_nested_values( |
1096 |
| - self, |
1097 |
| - by: str | list[str], |
1098 |
| - ascending: bool | list[bool] = True, |
1099 |
| - na_position: Literal["first"] | Literal["last"] = "last", |
1100 |
| - ignore_index: bool | None = False, |
1101 |
| - **options, |
1102 |
| - ) -> Catalog: |
1103 |
| - # pylint: disable=duplicate-code |
1104 |
| - """Sort nested columns for each row in the catalog. |
1105 |
| -
|
1106 |
| - Note that this does NOT sort rows, only nested values within rows. |
1107 |
| -
|
1108 |
| - Args: |
1109 |
| - by: str or list[str] |
1110 |
| - Column(s) to sort by. |
1111 |
| - ascending: bool or list[bool], optional |
1112 |
| - Sort ascending vs. descending. Defaults to True. Specify list for |
1113 |
| - multiple sort orders. If this is a list of bools, must match the |
1114 |
| - length of the `by`. |
1115 |
| - na_position: {‘last’, ‘first’}, optional |
1116 |
| - Puts NaNs at the beginning if ‘first’, puts NaN at the end if |
1117 |
| - ‘last’. Defaults to ‘last’. |
1118 |
| - ignore_index: bool, optional |
1119 |
| - If True, the resulting axis will be labeled 0, 1, …, n - 1. |
1120 |
| - Defaults to False. |
1121 |
| - **options: keyword arguments, optional |
1122 |
| - Additional options to pass to the sorting function. |
1123 |
| -
|
1124 |
| - Returns: |
1125 |
| - A new catalog where the specified nested columns are sorted. |
1126 |
| - """ |
1127 |
| - catalog = super().sort_nested_values( |
1128 |
| - by=by, |
1129 |
| - ascending=ascending, |
1130 |
| - na_position=na_position, |
1131 |
| - ignore_index=ignore_index, |
1132 |
| - **options, |
1133 |
| - ) |
1134 |
| - if self.margin is not None: |
1135 |
| - catalog.margin = self.margin.sort_nested_values( |
1136 |
| - by=by, |
1137 |
| - ascending=ascending, |
1138 |
| - na_position=na_position, |
1139 |
| - ignore_index=ignore_index, |
1140 |
| - **options, |
1141 |
| - ) |
1142 |
| - return catalog |
1143 |
| - |
1144 | 991 | def to_hats(
|
1145 | 992 | self,
|
1146 | 993 | base_catalog_path: str | Path | UPath,
|
|
0 commit comments