Skip to content

Commit

Permalink
address comments and add a few more doctests
Browse files Browse the repository at this point in the history
  • Loading branch information
huaxingao committed Sep 18, 2019
1 parent 95f88f5 commit bc1d9e1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 11 deletions.
38 changes: 29 additions & 9 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,10 @@ class LinearSVC(JavaClassifier, HasMaxIter, HasRegParam, HasTol,
... Row(label=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
>>> svm = LinearSVC(maxIter=5, regParam=0.01)
>>> model = svm.fit(df)
>>> model.setPredictionCol("prediction")
>>> model.setPredictionCol("newPrediction")
LinearSVC...
>>> model.getPredictionCol()
'newPrediction'
>>> model.coefficients
DenseVector([0.0, -0.2792, -0.1833])
>>> model.intercept
Expand All @@ -174,7 +176,7 @@ class LinearSVC(JavaClassifier, HasMaxIter, HasRegParam, HasTol,
>>> model.predict(test0.head().features)
1.0
>>> result = model.transform(test0).head()
>>> result.prediction
>>> result.newPrediction
1.0
>>> result.rawPrediction
DenseVector([-1.4831, 1.4831])
Expand Down Expand Up @@ -282,6 +284,10 @@ class LogisticRegression(JavaProbabilisticClassifier, HasMaxIter, HasRegParam, H
>>> blorModel = blor.fit(bdf)
>>> blorModel.setFeaturesCol("features")
LogisticRegressionModel...
>>> blorModel.setProbabilityCol("newProbability")
LogisticRegressionModel...
>>> blorModel.getProbabilityCol()
'newProbability'
>>> blorModel.coefficients
DenseVector([-1.080..., -0.646...])
>>> blorModel.intercept
Expand All @@ -300,7 +306,7 @@ class LogisticRegression(JavaProbabilisticClassifier, HasMaxIter, HasRegParam, H
>>> result = blorModel.transform(test0).head()
>>> result.prediction
1.0
>>> result.probability
>>> result.newProbability
DenseVector([0.02..., 0.97...])
>>> result.rawPrediction
DenseVector([-3.54..., 3.54...])
Expand Down Expand Up @@ -1179,6 +1185,10 @@ class RandomForestClassifier(JavaProbabilisticClassifier, HasSeed, RandomForestP
'indexed'
>>> model.setFeaturesCol("features")
RandomForestClassificationModel...
>>> model.setRawPredictionCol("newRawPrediction")
RandomForestClassificationModel...
>>> model.getRawPredictionCol()
'newRawPrediction'
>>> model.featureImportances
SparseVector(1, {0: 1.0})
>>> allclose(model.treeWeights, [1.0, 1.0, 1.0])
Expand All @@ -1191,7 +1201,7 @@ class RandomForestClassifier(JavaProbabilisticClassifier, HasSeed, RandomForestP
0.0
>>> numpy.argmax(result.probability)
0
>>> numpy.argmax(result.rawPrediction)
>>> numpy.argmax(result.newRawPrediction)
0
>>> result.leafId
DenseVector([0.0, 0.0, 0.0])
Expand Down Expand Up @@ -1417,6 +1427,10 @@ class GBTClassifier(JavaProbabilisticClassifier, GBTClassifierParams, HasCheckpo
'indexed'
>>> model.setFeaturesCol("features")
GBTClassificationModel...
>>> model.setThresholds([0.3, 0.7])
GBTClassificationModel...
>>> model.getThresholds()
[0.3, 0.7]
>>> model.featureImportances
SparseVector(1, {0: 1.0})
>>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
Expand Down Expand Up @@ -1650,6 +1664,10 @@ class NaiveBayes(JavaProbabilisticClassifier, HasThresholds, HasWeightCol,
>>> model = nb.fit(df)
>>> model.setFeaturesCol("features")
NaiveBayes_...
>>> model.setLabelCol("newLabel")
NaiveBayes_...
>>> model.getLabelCol()
'newLabel'
>>> model.pi
DenseVector([-0.81..., -0.58...])
>>> model.theta
Expand Down Expand Up @@ -2003,6 +2021,8 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
>>> ovr = OneVsRest(classifier=lr)
>>> ovr.getRawPredictionCol()
'rawPrediction'
>>> ovr.setPredictionCol("newPrediction")
OneVsRest...
>>> model = ovr.fit(df)
>>> model.models[0].coefficients
DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
Expand All @@ -2013,21 +2033,21 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
>>> [x.intercept for x in model.models]
[-2.7..., -2.5..., -1.3...]
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0))]).toDF()
>>> model.transform(test0).head().prediction
>>> model.transform(test0).head().newPrediction
0.0
>>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0]))]).toDF()
>>> model.transform(test1).head().prediction
>>> model.transform(test1).head().newPrediction
2.0
>>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
>>> model.transform(test2).head().prediction
>>> model.transform(test2).head().newPrediction
0.0
>>> model_path = temp_path + "/ovr_model"
>>> model.save(model_path)
>>> model2 = OneVsRestModel.load(model_path)
>>> model2.transform(test0).head().prediction
>>> model2.transform(test0).head().newPrediction
0.0
>>> model.transform(test2).columns
['features', 'rawPrediction', 'prediction']
['features', 'rawPrediction', 'newPrediction']
.. versionadded:: 2.0.0
"""
Expand Down
10 changes: 8 additions & 2 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,19 @@ class LinearRegression(JavaPredictor, HasMaxIter, HasRegParam, HasTol, HasElasti
>>> model = lr.fit(df)
>>> model.setFeaturesCol("features")
LinearRegression...
>>> model.setPredictionCol("newPrediction")
LinearRegression...
>>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001
True
>>> abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
>>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001
True
>>> abs(model.coefficients[0] - 1.0) < 0.001
True
>>> abs(model.intercept - 0.0) < 0.001
True
>>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
>>> abs(model.transform(test1).head().prediction - 1.0) < 0.001
>>> abs(model.transform(test1).head().newPrediction - 1.0) < 0.001
True
>>> lr.setParams("vector")
Traceback (most recent call last):
Expand Down Expand Up @@ -477,6 +479,10 @@ class IsotonicRegression(JavaPredictor, HasWeightCol, JavaMLWritable, JavaMLRead
>>> model = ir.fit(df)
>>> model.setFeaturesCol("features")
IsotonicRegression...
>>> model.setLabelCol("newLabel")
IsotonicRegression...
>>> model.getLabelCol()
'newLabel'
>>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand Down

0 comments on commit bc1d9e1

Please sign in to comment.