Skip to content

Commit 9afcf12

Browse files
Pengyanboliang
authored andcommitted
[SPARK-20764][ML][PYSPARK][FOLLOWUP] Fix visibility discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python version
## What changes were proposed in this pull request? Add test cases for PR-18062 ## How was this patch tested? The existing UT Author: Peng <[email protected]> Closes #18068 from mpjlu/moreTest.
1 parent d76633e commit 9afcf12

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

python/pyspark/ml/tests.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,7 @@ def test_linear_regression_summary(self):
10661066
self.assertAlmostEqual(s.r2, 1.0, 2)
10671067
self.assertTrue(isinstance(s.residuals, DataFrame))
10681068
self.assertEqual(s.numInstances, 2)
1069+
self.assertEqual(s.degreesOfFreedom, 1)
10691070
devResiduals = s.devianceResiduals
10701071
self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))
10711072
coefStdErr = s.coefficientStandardErrors
@@ -1075,7 +1076,8 @@ def test_linear_regression_summary(self):
10751076
pValues = s.pValues
10761077
self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
10771078
# test evaluation (with training dataset) produces a summary with same values
1078-
# one check is enough to verify a summary is returned, Scala version runs full test
1079+
# one check is enough to verify a summary is returned
1080+
# The child class LinearRegressionTrainingSummary runs full test
10791081
sameSummary = model.evaluate(df)
10801082
self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)
10811083

@@ -1093,6 +1095,7 @@ def test_glr_summary(self):
10931095
self.assertEqual(s.numIterations, 1) # this should default to a single iteration of WLS
10941096
self.assertTrue(isinstance(s.predictions, DataFrame))
10951097
self.assertEqual(s.predictionCol, "prediction")
1098+
self.assertEqual(s.numInstances, 2)
10961099
self.assertTrue(isinstance(s.residuals(), DataFrame))
10971100
self.assertTrue(isinstance(s.residuals("pearson"), DataFrame))
10981101
coefStdErr = s.coefficientStandardErrors
@@ -1111,7 +1114,8 @@ def test_glr_summary(self):
11111114
self.assertTrue(isinstance(s.nullDeviance, float))
11121115
self.assertTrue(isinstance(s.dispersion, float))
11131116
# test evaluation (with training dataset) produces a summary with same values
1114-
# one check is enough to verify a summary is returned, Scala version runs full test
1117+
# one check is enough to verify a summary is returned
1118+
# The child class GeneralizedLinearRegressionTrainingSummary runs full test
11151119
sameSummary = model.evaluate(df)
11161120
self.assertAlmostEqual(sameSummary.deviance, s.deviance)
11171121

0 commit comments

Comments
 (0)