address comments and add a few more doctests

apache · Sep 18, 2019 · bc1d9e1 · bc1d9e1
1 parent 95f88f5
commit bc1d9e1
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 11 deletions.
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -160,8 +160,10 @@ class LinearSVC(JavaClassifier, HasMaxIter, HasRegParam, HasTol,
     ...     Row(label=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
     >>> svm = LinearSVC(maxIter=5, regParam=0.01)
     >>> model = svm.fit(df)
-    >>> model.setPredictionCol("prediction")
+    >>> model.setPredictionCol("newPrediction")
     LinearSVC...
+    >>> model.getPredictionCol()
+    'newPrediction'
     >>> model.coefficients
     DenseVector([0.0, -0.2792, -0.1833])
     >>> model.intercept
@@ -174,7 +176,7 @@ class LinearSVC(JavaClassifier, HasMaxIter, HasRegParam, HasTol,
     >>> model.predict(test0.head().features)
     1.0
     >>> result = model.transform(test0).head()
-    >>> result.prediction
+    >>> result.newPrediction
     1.0
     >>> result.rawPrediction
     DenseVector([-1.4831, 1.4831])
@@ -282,6 +284,10 @@ class LogisticRegression(JavaProbabilisticClassifier, HasMaxIter, HasRegParam, H
     >>> blorModel = blor.fit(bdf)
     >>> blorModel.setFeaturesCol("features")
     LogisticRegressionModel...
+    >>> blorModel.setProbabilityCol("newProbability")
+    LogisticRegressionModel...
+    >>> blorModel.getProbabilityCol()
+    'newProbability'
     >>> blorModel.coefficients
     DenseVector([-1.080..., -0.646...])
     >>> blorModel.intercept
@@ -300,7 +306,7 @@ class LogisticRegression(JavaProbabilisticClassifier, HasMaxIter, HasRegParam, H
     >>> result = blorModel.transform(test0).head()
     >>> result.prediction
     1.0
-    >>> result.probability
+    >>> result.newProbability
     DenseVector([0.02..., 0.97...])
     >>> result.rawPrediction
     DenseVector([-3.54..., 3.54...])
@@ -1179,6 +1185,10 @@ class RandomForestClassifier(JavaProbabilisticClassifier, HasSeed, RandomForestP
     'indexed'
     >>> model.setFeaturesCol("features")
     RandomForestClassificationModel...
+    >>> model.setRawPredictionCol("newRawPrediction")
+    RandomForestClassificationModel...
+    >>> model.getRawPredictionCol()
+    'newRawPrediction'
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 1.0, 1.0])
@@ -1191,7 +1201,7 @@ class RandomForestClassifier(JavaProbabilisticClassifier, HasSeed, RandomForestP
     0.0
     >>> numpy.argmax(result.probability)
     0
-    >>> numpy.argmax(result.rawPrediction)
+    >>> numpy.argmax(result.newRawPrediction)
     0
     >>> result.leafId
     DenseVector([0.0, 0.0, 0.0])
@@ -1417,6 +1427,10 @@ class GBTClassifier(JavaProbabilisticClassifier, GBTClassifierParams, HasCheckpo
     'indexed'
     >>> model.setFeaturesCol("features")
     GBTClassificationModel...
+    >>> model.setThresholds([0.3, 0.7])
+    GBTClassificationModel...
+    >>> model.getThresholds()
+    [0.3, 0.7]
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
@@ -1650,6 +1664,10 @@ class NaiveBayes(JavaProbabilisticClassifier, HasThresholds, HasWeightCol,
     >>> model = nb.fit(df)
     >>> model.setFeaturesCol("features")
     NaiveBayes_...
+    >>> model.setLabelCol("newLabel")
+    NaiveBayes_...
+    >>> model.getLabelCol()
+    'newLabel'
     >>> model.pi
     DenseVector([-0.81..., -0.58...])
     >>> model.theta
@@ -2003,6 +2021,8 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> ovr = OneVsRest(classifier=lr)
     >>> ovr.getRawPredictionCol()
     'rawPrediction'
+    >>> ovr.setPredictionCol("newPrediction")
+    OneVsRest...
     >>> model = ovr.fit(df)
     >>> model.models[0].coefficients
     DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
@@ -2013,21 +2033,21 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> [x.intercept for x in model.models]
     [-2.7..., -2.5..., -1.3...]
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0))]).toDF()
-    >>> model.transform(test0).head().prediction
+    >>> model.transform(test0).head().newPrediction
     0.0
     >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0]))]).toDF()
-    >>> model.transform(test1).head().prediction
+    >>> model.transform(test1).head().newPrediction
     2.0
     >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
-    >>> model.transform(test2).head().prediction
+    >>> model.transform(test2).head().newPrediction
     0.0
     >>> model_path = temp_path + "/ovr_model"
     >>> model.save(model_path)
     >>> model2 = OneVsRestModel.load(model_path)
-    >>> model2.transform(test0).head().prediction
+    >>> model2.transform(test0).head().newPrediction
     0.0
     >>> model.transform(test2).columns
-    ['features', 'rawPrediction', 'prediction']
+    ['features', 'rawPrediction', 'newPrediction']
 
     .. versionadded:: 2.0.0
     """

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
@@ -68,17 +68,19 @@ class LinearRegression(JavaPredictor, HasMaxIter, HasRegParam, HasTol, HasElasti
     >>> model = lr.fit(df)
     >>> model.setFeaturesCol("features")
     LinearRegression...
+    >>> model.setPredictionCol("newPrediction")
+    LinearRegression...
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001
     True
-    >>> abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
+    >>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001
     True
     >>> abs(model.coefficients[0] - 1.0) < 0.001
     True
     >>> abs(model.intercept - 0.0) < 0.001
     True
     >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
-    >>> abs(model.transform(test1).head().prediction - 1.0) < 0.001
+    >>> abs(model.transform(test1).head().newPrediction - 1.0) < 0.001
     True
     >>> lr.setParams("vector")
     Traceback (most recent call last):
@@ -477,6 +479,10 @@ class IsotonicRegression(JavaPredictor, HasWeightCol, JavaMLWritable, JavaMLRead
     >>> model = ir.fit(df)
     >>> model.setFeaturesCol("features")
     IsotonicRegression...
+    >>> model.setLabelCol("newLabel")
+    IsotonicRegression...
+    >>> model.getLabelCol()
+    'newLabel'
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> model.transform(test0).head().prediction
     0.0