Skip to content

Commit 8ba49f1

Browse files
committed
add CI estimation to viz.fit_line and probplot
1 parent d18d085 commit 8ba49f1

File tree

2 files changed

+197
-74
lines changed

2 files changed

+197
-74
lines changed

probscale/tests/test_viz.py

Lines changed: 157 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -64,94 +64,183 @@ def setup(self):
6464
3.23039631, 4.23953492, 4.25892247, 4.5834766 , 6.53100725
6565
])
6666

67-
self.known_y_linlin = numpy.array([-0.896506, 21.12622])
68-
self.known_y_linlog = numpy.array([2.801908, 27.649589])
69-
self.known_y_linprob = numpy.array([8.47617988, 98.53407669])
70-
self.known_y_loglin = numpy.array([-2.57620461, 1.66767934])
71-
self.known_y_loglog = numpy.array([0.0468154, 5.73261406])
72-
self.known_y_logprob = numpy.array([0.489822, 95.246099])
73-
self.known_y_problin = numpy.array([-0.896506, 21.12622])
74-
self.known_y_problog = numpy.array([2.801908, 27.649589])
75-
self.known_y_probprob = numpy.array([1.944938, 98.055062])
67+
self.known_y_linlin_no_ci = numpy.array([-0.896506, 21.12622])
68+
self.known_y_linlin = numpy.array([-0.8965, 6.4370, 9.7360, 12.8837, 17.7706])
69+
self.known_y_linlog = numpy.array([2.8019, 6.0052, 8.4619, 11.7375, 19.5072])
70+
self.known_y_linprob = numpy.array([8.4762, 23.0079, 40.0813, 57.6156, 94.6629])
71+
self.known_y_loglin = numpy.array([-2.576205, -0.7402 , -0.034269, 0.426663, 1.395386])
72+
self.known_y_loglog = numpy.array([0.0468154, 0.37470676, 0.83369069, 1.40533704, 4.21100704])
73+
self.known_y_logprob = numpy.array([0.48982206, 22.957763, 48.63313552, 66.518853, 91.86591714])
74+
self.known_y_problin = numpy.array([-0.89650596, 6.43698357, 9.73601589, 12.88372926, 17.77058661])
75+
self.known_y_problog = numpy.array([2.80190754, 6.00524156, 8.46190468, 11.73746612, 19.50723532])
76+
self.known_y_probprob = numpy.array([2.106935, 24.925853, 47.268638, 69.562842, 92.127085])
7677

7778
self.custom_xhat = [-2, -1, 0, 1, 2]
7879
self.known_custom_yhat = numpy.array([-0.56601826, 4.77441944, 10.11485714,
7980
15.45529485, 20.79573255])
8081

81-
def test_xlinear_ylinear(self):
82+
def check_res(self, res, known_res):
83+
assert abs(res['intercept'] - known_res['intercept']) < 0.000001
84+
assert abs(res['slope'] - known_res['slope']) < 0.000001
85+
if known_res['yhat_lo'] is None:
86+
assert res['yhat_hi'] is None
87+
assert res['yhat_lo'] is None
88+
else:
89+
nptest.assert_allclose(res['yhat_lo'], known_res['yhat_lo'], rtol=0.0001)
90+
nptest.assert_allclose(res['yhat_hi'], known_res['yhat_hi'], rtol=0.0001)
91+
92+
@seed
93+
def test_xlinear_ylinear_no_ci(self):
94+
known_y_linlin_no_ci = numpy.array([
95+
-0.89650596, 1.20256093, 2.45912768, 3.39459245,
96+
4.15976331, 4.81895346, 5.40596572, 5.94094748,
97+
6.43698357, 6.90313142, 7.34598503, 7.77055185,
98+
8.18077912, 8.57988686, 8.97059045, 9.35525614,
99+
9.73601589, 10.11485714, 10.49369839, 10.87445814,
100+
11.25912384, 11.64982743, 12.04893516, 12.45916243,
101+
12.88372926, 13.32658287, 13.79273071, 14.2887668 ,
102+
14.82374857, 15.41076083, 16.06995097, 16.83512184,
103+
17.77058661, 19.02715336, 21.12622025
104+
])
82105
scales = {'fitlogs': None, 'fitprobs': None}
83106
x, y = self.zscores, self.data
84107
x_, y_, res = viz.fit_line(x, y, **scales)
85-
nptest.assert_array_almost_equal(y_, self.known_y_linlin)
86-
assert isinstance(res, numpy.ndarray)
87-
108+
nptest.assert_array_almost_equal(y_, known_y_linlin_no_ci)
109+
known_res = {
110+
'slope': 5.3404377026700995,
111+
'intercept': 10.114857142857147,
112+
'yhat_lo': None,
113+
'yhat_hi': None,
114+
}
115+
self.check_res(res, known_res)
116+
117+
@seed
118+
def test_xlinear_ylinear(self):
119+
scales = {'fitlogs': None, 'fitprobs': None}
120+
x, y = self.zscores, self.data
121+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
122+
nptest.assert_allclose(y_, self.known_y_linlin, rtol=0.0001)
123+
known_res = {
124+
'slope': 5.3404377026700995,
125+
'intercept': 10.114857142857147,
126+
'yhat_lo': numpy.array([ -2.9223, 5.4807, 9.109 , 12.0198, 16.2376]),
127+
'yhat_hi': numpy.array([ 0.4983, 7.0448, 10.2715, 13.4877, 18.8306]),
128+
}
129+
self.check_res(res, known_res)
130+
131+
@seed
88132
def test_xlinear_ylog(self):
89133
scales = {'fitlogs': 'y', 'fitprobs': None}
90134
x, y = self.zscores, self.data
91-
x_, y_, res = viz.fit_line(x, y, **scales)
92-
nptest.assert_array_almost_equal(y_, self.known_y_linlog)
93-
assert isinstance(res, numpy.ndarray)
94-
135+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
136+
nptest.assert_allclose(y_, self.known_y_linlog, rtol=0.0001)
137+
known_res = {
138+
'slope': 0.55515014824534514,
139+
'intercept': 2.1749556618678434,
140+
'yhat_lo': numpy.array([ 2.4355, 5.6436, 8.1653, 11.3136, 18.1000]),
141+
'yhat_hi': numpy.array([ 3.1348, 6.3072, 8.7495, 12.2324, 21.2824]),
142+
}
143+
self.check_res(res, known_res)
144+
145+
@seed
95146
def test_xlinear_yprob(self):
96147
scales = {'fitlogs': None, 'fitprobs': 'y'}
97148
x, y = self.data, self.probs
98-
x_, y_, res = viz.fit_line(x, y, **scales)
99-
nptest.assert_array_almost_equal(y_, self.known_y_linprob)
100-
assert isinstance(res, numpy.ndarray)
101-
149+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
150+
nptest.assert_allclose(y_, self.known_y_linprob, rtol=0.0001)
151+
known_res = {
152+
'slope': 0.16920340891421964,
153+
'intercept': -1.7114683092517717,
154+
'yhat_lo': numpy.array([ 5.6382, 18.9842, 36.0326, 54.0282, 92.8391]),
155+
'yhat_hi': numpy.array([ 12.6284, 28.2687, 44.6934, 61.8816, 97.1297]),
156+
}
157+
self.check_res(res, known_res)
158+
159+
@seed
102160
def test_xlog_ylinear(self):
103161
scales = {'fitlogs': 'x', 'fitprobs': None}
104162
x, y = self.data, self.zscores
105-
x_, y_, res = viz.fit_line(x, y, **scales)
106-
nptest.assert_array_almost_equal(y_, self.known_y_loglin)
107-
assert isinstance(res, numpy.ndarray)
108-
163+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
164+
nptest.assert_allclose(y_, self.known_y_loglin, rtol=0.0001)
165+
known_res = {
166+
'slope': 1.7385543724819053,
167+
'intercept': -3.7812786758946122,
168+
'yhat_lo': numpy.array([-2.88948 , -0.846565, -0.093696, 0.360738, 1.255963]),
169+
'yhat_hi': numpy.array([-2.310246, -0.63795 , 0.024143, 0.494404, 1.561183]),
170+
}
171+
self.check_res(res, known_res)
172+
173+
@seed
109174
def test_xlog_ylog(self):
110175
scales = {'fitlogs': 'both', 'fitprobs': None}
111176
x, y = self.data, self.y
112-
x_, y_, res = viz.fit_line(x, y, **scales)
113-
nptest.assert_array_almost_equal(y_, self.known_y_loglog)
114-
assert isinstance(res, numpy.ndarray)
115-
177+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
178+
nptest.assert_allclose(y_, self.known_y_loglog, rtol=0.0001)
179+
known_res = {
180+
'slope': 1.9695339470891058,
181+
'intercept': -4.4267200322534261,
182+
'yhat_lo': numpy.array([ 0.033559, 0.32797 , 0.777473, 1.331504, 3.811647]),
183+
'yhat_hi': numpy.array([ 0.061867, 0.422956, 0.892383, 1.48953 , 4.842235]),
184+
}
185+
self.check_res(res, known_res)
186+
187+
@seed
116188
def test_xlog_yprob(self):
117189
scales = {'fitlogs': 'x', 'fitprobs': 'y'}
118190
x, y = self.data, self.probs
119-
x_, y_, res = viz.fit_line(x, y, **scales)
120-
nptest.assert_array_almost_equal(y_, self.known_y_logprob)
121-
assert isinstance(res, numpy.ndarray)
122-
191+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
192+
nptest.assert_allclose(y_, self.known_y_logprob, rtol=0.0001)
193+
known_res = {
194+
'slope': 1.7385543724819046,
195+
'intercept': -3.7812786758946113,
196+
'yhat_lo': numpy.array([0.187555, 19.859832, 46.267537, 64.085292, 89.551801]),
197+
'yhat_hi': numpy.array([1.030230, 26.174702, 50.963065, 68.949137, 94.089655]),
198+
}
199+
self.check_res(res, known_res)
200+
201+
@seed
123202
def test_xprob_ylinear(self):
124203
scales = {'fitlogs': None, 'fitprobs': 'x'}
125204
x, y = self.probs, self.data
126-
x_, y_, res = viz.fit_line(x, y, **scales)
127-
nptest.assert_array_almost_equal(y_, self.known_y_problin)
128-
assert isinstance(res, numpy.ndarray)
129-
205+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
206+
nptest.assert_allclose(y_, self.known_y_problin, rtol=0.0001)
207+
known_res = {
208+
'slope': 5.3404377026700995,
209+
'intercept': 10.114857142857147,
210+
'yhat_lo': numpy.array([-2.92233134, 5.48065673, 9.1090198 , 12.01977856, 16.23762957]),
211+
'yhat_hi': numpy.array([ 0.49826723, 7.04480065, 10.27146083, 13.48770383, 18.83061329]),
212+
}
213+
self.check_res(res, known_res)
214+
215+
@seed
130216
def test_xprob_ylog(self):
131217
scales = {'fitlogs': 'y', 'fitprobs': 'x'}
132218
x, y = self.probs, self.data
133-
x_, y_, res = viz.fit_line(x, y, **scales)
134-
nptest.assert_array_almost_equal(y_, self.known_y_problog)
135-
assert isinstance(res, numpy.ndarray)
136-
219+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
220+
nptest.assert_allclose(y_, self.known_y_problog, rtol=0.0001)
221+
known_res = {
222+
'intercept': 2.1749556618678434,
223+
'slope': 0.55515014824534525,
224+
'yhat_lo': numpy.array([2.43550106, 5.6436203 , 8.16525601, 11.31358231, 18.09998664]),
225+
'yhat_hi': numpy.array([3.13484803, 6.30722509, 8.74945323, 12.23244498, 21.28240831]),
226+
}
227+
self.check_res(res, known_res)
228+
229+
@seed
137230
def test_xprob_yprob(self):
138-
p2 = numpy.array([
139-
1.94493789, 4.7424475 , 7.57359631, 10.40452018,
140-
13.23476893, 16.06435006, 18.89337556, 21.72197005,
141-
24.55024455, 27.37829018, 30.20617837, 33.03396313,
142-
35.86168383, 38.68936789, 41.51703325, 44.34469064,
143-
47.17234553, 50. , 52.82765447, 55.65530936,
144-
58.48296675, 61.31063211, 64.13831617, 66.96603687,
145-
69.79382163, 72.62170982, 75.44975545, 78.27802995,
146-
81.10662444, 83.93564994, 86.76523107, 89.59547982,
147-
92.42640369, 95.2575525 , 98.05506211
148-
])
231+
p2 = self.probs + numpy.random.uniform(-1, 1, size=len(self.probs))
149232

150233
scales = {'fitlogs': None, 'fitprobs': 'both'}
151234
x, y = self.probs, p2,
152-
x_, y_, res = viz.fit_line(x, y, **scales)
153-
nptest.assert_array_almost_equal(y_, self.known_y_probprob)
154-
assert isinstance(res, numpy.ndarray)
235+
x_, y_, res = viz.fit_line(x, y, xhat=x[::8], estimate_ci=True, **scales)
236+
nptest.assert_allclose(y_, self.known_y_probprob, rtol=0.0001)
237+
known_res = {
238+
'slope': 0.98467862838225351,
239+
'intercept': 0.0013327049076583583,
240+
'yhat_lo': numpy.array([1.96759603, 24.66922946, 46.88723664, 68.88913508, 91.58436332]),
241+
'yhat_hi': numpy.array([2.28593917, 25.24921351, 47.60781632, 70.11543855, 92.54803847]),
242+
}
243+
self.check_res(res, known_res)
155244

156245
def test_bad_fitlogs(self):
157246
with pytest.raises(ValueError):
@@ -468,7 +557,8 @@ def test_probplot_pp(plot_data):
468557
@pytest.mark.mpl_image_compare(baseline_dir=BASELINE_DIR, tolerance=10)
469558
def test_probplot_prob_bestfit(plot_data):
470559
fig, ax = plt.subplots()
471-
fig = viz.probplot(plot_data, ax=ax, datalabel='Test xlabel', bestfit=True, datascale='log')
560+
fig = viz.probplot(plot_data, ax=ax, datalabel='Test xlabel', bestfit=True,
561+
datascale='log', estimate_ci=True)
472562
assert isinstance(fig, plt.Figure)
473563
return fig
474564

@@ -477,7 +567,8 @@ def test_probplot_prob_bestfit(plot_data):
477567
def test_probplot_qq_bestfit(plot_data):
478568
fig, ax = plt.subplots()
479569
fig = viz.probplot(plot_data, ax=ax, plottype='qq', bestfit=True,
480-
problabel='Test label', datascale='log')
570+
problabel='Test label', datascale='log',
571+
estimate_ci=True)
481572
return fig
482573

483574

@@ -488,7 +579,8 @@ def test_probplot_pp_bestfit(plot_data):
488579
line_kws = {'linestyle': '--', 'linewidth': 3}
489580
fig = viz.probplot(plot_data, ax=ax, plottype='pp', datascale='linear',
490581
datalabel='test x', bestfit=True, problabel='test y',
491-
scatter_kws=scatter_kws, line_kws=line_kws)
582+
scatter_kws=scatter_kws, line_kws=line_kws,
583+
estimate_ci=True)
492584
return fig
493585

494586

@@ -521,7 +613,7 @@ def test_probplot_pp_probax_y(plot_data):
521613
def test_probplot_prob_bestfit_probax_y(plot_data):
522614
fig, ax = plt.subplots()
523615
fig = viz.probplot(plot_data, ax=ax, datalabel='Test xlabel', bestfit=True,
524-
datascale='log', probax='y')
616+
datascale='log', probax='y', estimate_ci=True)
525617
assert isinstance(fig, plt.Figure)
526618
return fig
527619

@@ -530,7 +622,7 @@ def test_probplot_prob_bestfit_probax_y(plot_data):
530622
def test_probplot_qq_bestfit_probax_y(plot_data):
531623
fig, ax = plt.subplots()
532624
fig = viz.probplot(plot_data, ax=ax, plottype='qq', bestfit=True, problabel='Test label',
533-
datascale='log', probax='y')
625+
datascale='log', probax='y', estimate_ci=True)
534626
return fig
535627

536628

@@ -541,7 +633,7 @@ def test_probplot_pp_bestfit_probax_y(plot_data):
541633
line_kws = {'linestyle': '--', 'linewidth': 3}
542634
fig = viz.probplot(plot_data, ax=ax, plottype='pp', datascale='linear', probax='y',
543635
datalabel='test x', bestfit=True, problabel='test y',
544-
scatter_kws=scatter_kws, line_kws=line_kws)
636+
scatter_kws=scatter_kws, line_kws=line_kws, estimate_ci=True)
545637
return fig
546638

547639

@@ -555,7 +647,8 @@ def test_probplot_beta_dist_best_fit_y(plot_data):
555647
ax1.set_ylim(bottom=0.5, top=98)
556648

557649
fig = viz.probplot(plot_data, ax=ax2, datalabel='Default (norm)',
558-
bestfit=True, datascale='log', probax='y')
650+
bestfit=True, datascale='log', probax='y',
651+
estimate_ci=True)
559652
ax2.set_ylim(bottom=0.5, top=98)
560653

561654
assert isinstance(fig, plt.Figure)
@@ -572,7 +665,8 @@ def test_probplot_beta_dist_best_fit_x(plot_data):
572665
ax1.set_xlim(left=0.5, right=98)
573666

574667
fig = viz.probplot(plot_data, ax=ax2, problabel='Default (norm)',
575-
bestfit=True, datascale='log', probax='x')
668+
bestfit=True, datascale='log', probax='x',
669+
estimate_ci=True)
576670
ax2.set_xlim(left=0.5, right=98)
577671

578672
assert isinstance(fig, plt.Figure)

0 commit comments

Comments
 (0)