Skip to content

Commit 5c0d2c1

Browse files
authored
Merge 56f01a0 into fe17e19
2 parents fe17e19 + 56f01a0 commit 5c0d2c1

File tree

6 files changed

+19
-11
lines changed

6 files changed

+19
-11
lines changed

src/nested_pandas/datasets/generation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def generate_data(n_base, n_layer, seed=None) -> NestedFrame:
3333

3434
# Generate base data
3535
base_data = {"a": randomstate.random(n_base), "b": randomstate.random(n_base) * 2}
36-
base_nf = NestedFrame(data=base_data)
36+
base_nf = NestedFrame(data=base_data).convert_dtypes(dtype_backend="pyarrow")
3737

3838
# In case of int, create a single nested layer called "nested"
3939
if isinstance(n_layer, int):
@@ -50,6 +50,7 @@ def generate_data(n_base, n_layer, seed=None) -> NestedFrame:
5050
"index": np.arange(layer_size * n_base) % n_base,
5151
}
5252
layer_nf = NestedFrame(data=layer_data).set_index("index")
53+
layer_nf = layer_nf.convert_dtypes(dtype_backend="pyarrow")
5354
base_nf = base_nf.add_nested(layer_nf, key)
5455
return base_nf
5556
else:

src/nested_pandas/nestedframe/core.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def drop(
660660
>>> nf
661661
a b nested
662662
0 0.417022 0.184677 [{flux: 31.551563, band: 'r'}; …] (5 rows)
663-
1 0.720324 0.372520 [{flux: 68.650093, band: 'g'}; …] (5 rows)
663+
1 0.720324 0.37252 [{flux: 68.650093, band: 'g'}; …] (5 rows)
664664
2 0.000114 0.691121 [{flux: 83.462567, band: 'g'}; …] (5 rows)
665665
3 0.302333 0.793535 [{flux: 1.828828, band: 'g'}; …] (5 rows)
666666
4 0.146756 1.077633 [{flux: 75.014431, band: 'g'}; …] (5 rows)
@@ -984,7 +984,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
984984
>>> nf
985985
a b nested
986986
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
987-
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
987+
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
988988
2 0.000114 0.691121 [{t: 11.173797, flux: 28.044399, band: 'r'}; …...
989989
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
990990
4 0.146756 1.077633 [{t: 17.527783, flux: 13.002857, band: 'r'}; …...
@@ -1168,7 +1168,7 @@ def dropna(
11681168
>>> nf
11691169
a b nested
11701170
0 0.417022 0.184677 None
1171-
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
1171+
1 0.720324 0.37252 [{t: 19.365232, flux: 90.85955, band: 'r'}]
11721172
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
11731173
3 0.302333 0.793535 None
11741174
4 0.146756 1.077633 None
@@ -1177,7 +1177,7 @@ def dropna(
11771177
>>> # dropna removes rows with those emptied dataframes
11781178
>>> nf.dropna(subset="nested")
11791179
a b nested
1180-
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
1180+
1 0.720324 0.37252 [{t: 19.365232, flux: 90.85955, band: 'r'}]
11811181
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
11821182
11831183
@@ -1188,15 +1188,15 @@ def dropna(
11881188
>>> nf.dropna(on_nested="nested")
11891189
a b nested
11901190
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
1191-
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1191+
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
11921192
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
11931193
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
11941194
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
11951195
>>> # or on a specific nested column
11961196
>>> nf.dropna(subset="nested.t")
11971197
a b nested
11981198
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
1199-
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1199+
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
12001200
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
12011201
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
12021202
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
@@ -1307,7 +1307,7 @@ def sort_values(
13071307
>>> nf.sort_values(by="nested.band")
13081308
a b nested
13091309
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
1310-
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
1310+
1 0.720324 0.37252 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
13111311
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
13121312
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
13131313
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...

src/nested_pandas/series/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ def to_flatten_inner(self, field: str) -> pd.Series:
580580
>>> nf
581581
a b inner id
582582
0 0.417022 0.184677 [{t: 8.38389, flux: 80.074457, band: 'r'}; …] ... 0
583-
1 0.720324 0.372520 [{t: 13.70439, flux: 96.826158, band: 'g'}; …]... 0
583+
1 0.720324 0.37252 [{t: 13.70439, flux: 96.826158, band: 'g'}; …]... 0
584584
2 0.000114 0.691121 [{t: 4.089045, flux: 31.342418, band: 'g'}; …]... 0
585585
3 0.302333 0.793535 [{t: 17.562349, flux: 69.232262, band: 'r'}; …... 1
586586
4 0.146756 1.077633 [{t: 0.547752, flux: 87.638915, band: 'g'}; …]... 1

src/nested_pandas/series/ext_array.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,11 @@ def _convert_struct_scalar_to_df(
762762
copy=copy,
763763
name=name,
764764
)
765-
return pd.DataFrame(series, copy=False)
765+
766+
res_df = pd.DataFrame(series, copy=False)
767+
#non_nested = [col for col in res_df.columns if not isinstance(res_df[col].dtype, NestedDtype)]
768+
#res_df[non_nested] = res_df[non_nested].convert_dtypes(dtype_backend="pyarrow")
769+
return res_df
766770

767771
@property
768772
def _list_storage(self):

src/nested_pandas/utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame:
3939
>>> count_nested(nf, "nested")
4040
a b nested n_nested
4141
0 0.417022 0.184677 [{t: 8.38389, flux: 10.233443, band: 'g'}; …] ... 10
42-
1 0.720324 0.372520 [{t: 13.70439, flux: 41.405599, band: 'g'}; …]... 10
42+
1 0.720324 0.37252 [{t: 13.70439, flux: 41.405599, band: 'g'}; …]... 10
4343
2 0.000114 0.691121 [{t: 4.089045, flux: 69.440016, band: 'g'}; …]... 10
4444
3 0.302333 0.793535 [{t: 17.562349, flux: 41.417927, band: 'g'}; …... 10
4545
4 0.146756 1.077633 [{t: 0.547752, flux: 4.995346, band: 'r'}; …] ... 10

tests/nested_pandas/series/test_ext_array.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,11 +709,14 @@ def test___getitem___with_integer():
709709
nf = generate_data(10, 3)
710710
# repeat index 3 and nest on it
711711
nf["id"] = [0, 1, 2, 3, 3, 4, 5, 6, 7, 8]
712+
#nf["id"] = nf["id"].astype(pd.ArrowDtype(pa.int64()))
713+
712714
nnf = NestedFrame.from_flat(nf, base_columns=[], on="id", name="outer")
713715
ext_array = nnf["outer"].array
714716

715717
actual = ext_array[3]
716718
desired = pd.DataFrame(nf.query("id == 3").drop("id", axis=1)).reset_index(drop=True)
719+
#import pdb;pdb.set_trace()
717720

718721
assert_frame_equal(actual, desired)
719722

0 commit comments

Comments
 (0)