|
1 | 1 | import numpy as np |
2 | 2 | import pytest |
3 | 3 |
|
4 | | -from pandas import ( |
5 | | - DataFrame, |
6 | | - Index, |
7 | | - MultiIndex, |
8 | | - Series, |
9 | | - isna, |
10 | | - notna, |
11 | | -) |
| 4 | +from pandas import Series |
12 | 5 | import pandas._testing as tm |
13 | 6 |
|
14 | 7 |
|
15 | | -def test_expanding_corr(series): |
16 | | - A = series.dropna() |
17 | | - B = (A + np.random.randn(len(A)))[:-5] |
18 | | - |
19 | | - result = A.expanding().corr(B) |
20 | | - |
21 | | - rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) |
22 | | - |
23 | | - tm.assert_almost_equal(rolling_result, result) |
24 | | - |
25 | | - |
26 | | -def test_expanding_count(series): |
27 | | - result = series.expanding(min_periods=0).count() |
28 | | - tm.assert_almost_equal( |
29 | | - result, series.rolling(window=len(series), min_periods=0).count() |
30 | | - ) |
31 | | - |
32 | | - |
33 | | -def test_expanding_quantile(series): |
34 | | - result = series.expanding().quantile(0.5) |
35 | | - |
36 | | - rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) |
37 | | - |
38 | | - tm.assert_almost_equal(result, rolling_result) |
39 | | - |
40 | | - |
41 | | -def test_expanding_cov(series): |
42 | | - A = series |
43 | | - B = (A + np.random.randn(len(A)))[:-5] |
44 | | - |
45 | | - result = A.expanding().cov(B) |
46 | | - |
47 | | - rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) |
48 | | - |
49 | | - tm.assert_almost_equal(rolling_result, result) |
50 | | - |
51 | | - |
52 | | -def test_expanding_cov_pairwise(frame): |
53 | | - result = frame.expanding().cov() |
54 | | - |
55 | | - rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() |
56 | | - |
57 | | - tm.assert_frame_equal(result, rolling_result) |
58 | | - |
59 | | - |
60 | | -def test_expanding_corr_pairwise(frame): |
61 | | - result = frame.expanding().corr() |
62 | | - |
63 | | - rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() |
64 | | - tm.assert_frame_equal(result, rolling_result) |
65 | | - |
66 | | - |
67 | | -@pytest.mark.parametrize( |
68 | | - "func,static_comp", |
69 | | - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], |
70 | | - ids=["sum", "mean", "max", "min"], |
71 | | -) |
72 | | -def test_expanding_func(func, static_comp, frame_or_series): |
73 | | - data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) |
74 | | - result = getattr(data.expanding(min_periods=1, axis=0), func)() |
75 | | - assert isinstance(result, frame_or_series) |
76 | | - |
77 | | - if frame_or_series is Series: |
78 | | - tm.assert_almost_equal(result[10], static_comp(data[:11])) |
79 | | - else: |
80 | | - tm.assert_series_equal( |
81 | | - result.iloc[10], static_comp(data[:11]), check_names=False |
82 | | - ) |
83 | | - |
84 | | - |
85 | | -@pytest.mark.parametrize( |
86 | | - "func,static_comp", |
87 | | - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], |
88 | | - ids=["sum", "mean", "max", "min"], |
89 | | -) |
90 | | -def test_expanding_min_periods(func, static_comp): |
91 | | - ser = Series(np.random.randn(50)) |
92 | | - |
93 | | - result = getattr(ser.expanding(min_periods=30, axis=0), func)() |
94 | | - assert result[:29].isna().all() |
95 | | - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) |
96 | | - |
97 | | - # min_periods is working correctly |
98 | | - result = getattr(ser.expanding(min_periods=15, axis=0), func)() |
99 | | - assert isna(result.iloc[13]) |
100 | | - assert notna(result.iloc[14]) |
101 | | - |
102 | | - ser2 = Series(np.random.randn(20)) |
103 | | - result = getattr(ser2.expanding(min_periods=5, axis=0), func)() |
104 | | - assert isna(result[3]) |
105 | | - assert notna(result[4]) |
106 | | - |
107 | | - # min_periods=0 |
108 | | - result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() |
109 | | - result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() |
110 | | - tm.assert_almost_equal(result0, result1) |
111 | | - |
112 | | - result = getattr(ser.expanding(min_periods=1, axis=0), func)() |
113 | | - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) |
114 | | - |
115 | | - |
116 | | -def test_expanding_apply(engine_and_raw, frame_or_series): |
117 | | - engine, raw = engine_and_raw |
118 | | - data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) |
119 | | - result = data.expanding(min_periods=1).apply( |
120 | | - lambda x: x.mean(), raw=raw, engine=engine |
121 | | - ) |
122 | | - assert isinstance(result, frame_or_series) |
123 | | - |
124 | | - if frame_or_series is Series: |
125 | | - tm.assert_almost_equal(result[9], np.mean(data[:11])) |
126 | | - else: |
127 | | - tm.assert_series_equal(result.iloc[9], np.mean(data[:11]), check_names=False) |
128 | | - |
129 | | - |
130 | | -def test_expanding_min_periods_apply(engine_and_raw): |
131 | | - engine, raw = engine_and_raw |
132 | | - ser = Series(np.random.randn(50)) |
133 | | - |
134 | | - result = ser.expanding(min_periods=30).apply( |
135 | | - lambda x: x.mean(), raw=raw, engine=engine |
136 | | - ) |
137 | | - assert result[:29].isna().all() |
138 | | - tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) |
139 | | - |
140 | | - # min_periods is working correctly |
141 | | - result = ser.expanding(min_periods=15).apply( |
142 | | - lambda x: x.mean(), raw=raw, engine=engine |
143 | | - ) |
144 | | - assert isna(result.iloc[13]) |
145 | | - assert notna(result.iloc[14]) |
146 | | - |
147 | | - ser2 = Series(np.random.randn(20)) |
148 | | - result = ser2.expanding(min_periods=5).apply( |
149 | | - lambda x: x.mean(), raw=raw, engine=engine |
150 | | - ) |
151 | | - assert isna(result[3]) |
152 | | - assert notna(result[4]) |
153 | | - |
154 | | - # min_periods=0 |
155 | | - result0 = ser.expanding(min_periods=0).apply( |
156 | | - lambda x: x.mean(), raw=raw, engine=engine |
157 | | - ) |
158 | | - result1 = ser.expanding(min_periods=1).apply( |
159 | | - lambda x: x.mean(), raw=raw, engine=engine |
160 | | - ) |
161 | | - tm.assert_almost_equal(result0, result1) |
162 | | - |
163 | | - result = ser.expanding(min_periods=1).apply( |
164 | | - lambda x: x.mean(), raw=raw, engine=engine |
165 | | - ) |
166 | | - tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) |
167 | | - |
168 | | - |
169 | 8 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) |
170 | 9 | @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum]) |
171 | 10 | def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f): |
@@ -334,202 +173,3 @@ def test_expanding_consistency_var_debiasing_factors(consistency_data, min_perio |
334 | 173 | x.expanding().count() - 1.0 |
335 | 174 | ).replace(0.0, np.nan) |
336 | 175 | tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) |
337 | | - |
338 | | - |
339 | | -@pytest.mark.parametrize( |
340 | | - "f", |
341 | | - [ |
342 | | - lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)), |
343 | | - lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)), |
344 | | - ], |
345 | | -) |
346 | | -def test_moment_functions_zero_length_pairwise(f): |
347 | | - |
348 | | - df1 = DataFrame() |
349 | | - df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) |
350 | | - df2["a"] = df2["a"].astype("float64") |
351 | | - |
352 | | - df1_expected = DataFrame( |
353 | | - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) |
354 | | - ) |
355 | | - df2_expected = DataFrame( |
356 | | - index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), |
357 | | - columns=Index(["a"], name="foo"), |
358 | | - dtype="float64", |
359 | | - ) |
360 | | - |
361 | | - df1_result = f(df1) |
362 | | - tm.assert_frame_equal(df1_result, df1_expected) |
363 | | - |
364 | | - df2_result = f(df2) |
365 | | - tm.assert_frame_equal(df2_result, df2_expected) |
366 | | - |
367 | | - |
368 | | -@pytest.mark.parametrize( |
369 | | - "f", |
370 | | - [ |
371 | | - lambda x: x.expanding().count(), |
372 | | - lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), |
373 | | - lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), |
374 | | - lambda x: x.expanding(min_periods=5).max(), |
375 | | - lambda x: x.expanding(min_periods=5).min(), |
376 | | - lambda x: x.expanding(min_periods=5).sum(), |
377 | | - lambda x: x.expanding(min_periods=5).mean(), |
378 | | - lambda x: x.expanding(min_periods=5).std(), |
379 | | - lambda x: x.expanding(min_periods=5).var(), |
380 | | - lambda x: x.expanding(min_periods=5).skew(), |
381 | | - lambda x: x.expanding(min_periods=5).kurt(), |
382 | | - lambda x: x.expanding(min_periods=5).quantile(0.5), |
383 | | - lambda x: x.expanding(min_periods=5).median(), |
384 | | - lambda x: x.expanding(min_periods=5).apply(sum, raw=False), |
385 | | - lambda x: x.expanding(min_periods=5).apply(sum, raw=True), |
386 | | - ], |
387 | | -) |
388 | | -def test_moment_functions_zero_length(f): |
389 | | - # GH 8056 |
390 | | - s = Series(dtype=np.float64) |
391 | | - s_expected = s |
392 | | - df1 = DataFrame() |
393 | | - df1_expected = df1 |
394 | | - df2 = DataFrame(columns=["a"]) |
395 | | - df2["a"] = df2["a"].astype("float64") |
396 | | - df2_expected = df2 |
397 | | - |
398 | | - s_result = f(s) |
399 | | - tm.assert_series_equal(s_result, s_expected) |
400 | | - |
401 | | - df1_result = f(df1) |
402 | | - tm.assert_frame_equal(df1_result, df1_expected) |
403 | | - |
404 | | - df2_result = f(df2) |
405 | | - tm.assert_frame_equal(df2_result, df2_expected) |
406 | | - |
407 | | - |
408 | | -def test_expanding_apply_empty_series(engine_and_raw): |
409 | | - engine, raw = engine_and_raw |
410 | | - ser = Series([], dtype=np.float64) |
411 | | - tm.assert_series_equal( |
412 | | - ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine) |
413 | | - ) |
414 | | - |
415 | | - |
416 | | -def test_expanding_apply_min_periods_0(engine_and_raw): |
417 | | - # GH 8080 |
418 | | - engine, raw = engine_and_raw |
419 | | - s = Series([None, None, None]) |
420 | | - result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine) |
421 | | - expected = Series([1.0, 2.0, 3.0]) |
422 | | - tm.assert_series_equal(result, expected) |
423 | | - |
424 | | - |
425 | | -def test_expanding_cov_diff_index(): |
426 | | - # GH 7512 |
427 | | - s1 = Series([1, 2, 3], index=[0, 1, 2]) |
428 | | - s2 = Series([1, 3], index=[0, 2]) |
429 | | - result = s1.expanding().cov(s2) |
430 | | - expected = Series([None, None, 2.0]) |
431 | | - tm.assert_series_equal(result, expected) |
432 | | - |
433 | | - s2a = Series([1, None, 3], index=[0, 1, 2]) |
434 | | - result = s1.expanding().cov(s2a) |
435 | | - tm.assert_series_equal(result, expected) |
436 | | - |
437 | | - s1 = Series([7, 8, 10], index=[0, 1, 3]) |
438 | | - s2 = Series([7, 9, 10], index=[0, 2, 3]) |
439 | | - result = s1.expanding().cov(s2) |
440 | | - expected = Series([None, None, None, 4.5]) |
441 | | - tm.assert_series_equal(result, expected) |
442 | | - |
443 | | - |
444 | | -def test_expanding_corr_diff_index(): |
445 | | - # GH 7512 |
446 | | - s1 = Series([1, 2, 3], index=[0, 1, 2]) |
447 | | - s2 = Series([1, 3], index=[0, 2]) |
448 | | - result = s1.expanding().corr(s2) |
449 | | - expected = Series([None, None, 1.0]) |
450 | | - tm.assert_series_equal(result, expected) |
451 | | - |
452 | | - s2a = Series([1, None, 3], index=[0, 1, 2]) |
453 | | - result = s1.expanding().corr(s2a) |
454 | | - tm.assert_series_equal(result, expected) |
455 | | - |
456 | | - s1 = Series([7, 8, 10], index=[0, 1, 3]) |
457 | | - s2 = Series([7, 9, 10], index=[0, 2, 3]) |
458 | | - result = s1.expanding().corr(s2) |
459 | | - expected = Series([None, None, None, 1.0]) |
460 | | - tm.assert_series_equal(result, expected) |
461 | | - |
462 | | - |
463 | | -def test_expanding_cov_pairwise_diff_length(): |
464 | | - # GH 7512 |
465 | | - df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo")) |
466 | | - df1a = DataFrame( |
467 | | - [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo") |
468 | | - ) |
469 | | - df2 = DataFrame( |
470 | | - [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo") |
471 | | - ) |
472 | | - df2a = DataFrame( |
473 | | - [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo") |
474 | | - ) |
475 | | - # TODO: xref gh-15826 |
476 | | - # .loc is not preserving the names |
477 | | - result1 = df1.expanding().cov(df2, pairwise=True).loc[2] |
478 | | - result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] |
479 | | - result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] |
480 | | - result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] |
481 | | - expected = DataFrame( |
482 | | - [[-3.0, -6.0], [-5.0, -10.0]], |
483 | | - columns=Index(["A", "B"], name="foo"), |
484 | | - index=Index(["X", "Y"], name="foo"), |
485 | | - ) |
486 | | - tm.assert_frame_equal(result1, expected) |
487 | | - tm.assert_frame_equal(result2, expected) |
488 | | - tm.assert_frame_equal(result3, expected) |
489 | | - tm.assert_frame_equal(result4, expected) |
490 | | - |
491 | | - |
492 | | -def test_expanding_corr_pairwise_diff_length(): |
493 | | - # GH 7512 |
494 | | - df1 = DataFrame( |
495 | | - [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar") |
496 | | - ) |
497 | | - df1a = DataFrame( |
498 | | - [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"] |
499 | | - ) |
500 | | - df2 = DataFrame( |
501 | | - [[5, 6], [None, None], [2, 1]], |
502 | | - columns=["X", "Y"], |
503 | | - index=Index(range(3), name="bar"), |
504 | | - ) |
505 | | - df2a = DataFrame( |
506 | | - [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"] |
507 | | - ) |
508 | | - result1 = df1.expanding().corr(df2, pairwise=True).loc[2] |
509 | | - result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] |
510 | | - result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] |
511 | | - result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] |
512 | | - expected = DataFrame( |
513 | | - [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"]) |
514 | | - ) |
515 | | - tm.assert_frame_equal(result1, expected) |
516 | | - tm.assert_frame_equal(result2, expected) |
517 | | - tm.assert_frame_equal(result3, expected) |
518 | | - tm.assert_frame_equal(result4, expected) |
519 | | - |
520 | | - |
521 | | -def test_expanding_apply_args_kwargs(engine_and_raw): |
522 | | - def mean_w_arg(x, const): |
523 | | - return np.mean(x) + const |
524 | | - |
525 | | - engine, raw = engine_and_raw |
526 | | - |
527 | | - df = DataFrame(np.random.rand(20, 3)) |
528 | | - |
529 | | - expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 |
530 | | - |
531 | | - result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,)) |
532 | | - tm.assert_frame_equal(result, expected) |
533 | | - |
534 | | - result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20}) |
535 | | - tm.assert_frame_equal(result, expected) |
0 commit comments