Skip to content

Commit

Permalink
[SPARK-9864] [DOC] [MLlib] [SQL] Replace since in scaladoc to Since a…
Browse files Browse the repository at this point in the history
…nnotation

Author: MechCoder <[email protected]>

Closes #8352 from MechCoder/since.
  • Loading branch information
MechCoder authored and mengxr committed Aug 21, 2015
1 parent d89cc38 commit f5b028e
Show file tree
Hide file tree
Showing 68 changed files with 692 additions and 862 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.mllib.classification

import org.json4s.{DefaultFormats, JValue}

import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.rdd.RDD
Expand All @@ -36,25 +36,25 @@ trait ClassificationModel extends Serializable {
*
* @param testData RDD representing data points to be predicted
* @return an RDD[Double] where each entry contains the corresponding prediction
* @since 0.8.0
*/
@Since("0.8.0")
def predict(testData: RDD[Vector]): RDD[Double]

/**
* Predict values for a single data point using the model trained.
*
* @param testData array representing a single data point
* @return predicted category from the trained model
* @since 0.8.0
*/
@Since("0.8.0")
def predict(testData: Vector): Double

/**
* Predict values for examples stored in a JavaRDD.
* @param testData JavaRDD representing data points to be predicted
* @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
* @since 0.8.0
*/
@Since("0.8.0")
def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.mllib.classification

import org.apache.spark.SparkContext
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.classification.impl.GLMClassificationModel
import org.apache.spark.mllib.linalg.BLAS.dot
import org.apache.spark.mllib.linalg.{DenseVector, Vector}
Expand Down Expand Up @@ -85,8 +85,8 @@ class LogisticRegressionModel (
* in Binary Logistic Regression. An example with prediction score greater than or equal to
* this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
* It is only used for binary classification.
* @since 1.0.0
*/
@Since("1.0.0")
@Experimental
def setThreshold(threshold: Double): this.type = {
this.threshold = Some(threshold)
Expand All @@ -97,17 +97,17 @@ class LogisticRegressionModel (
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
* It is only used for binary classification.
* @since 1.3.0
*/
@Since("1.3.0")
@Experimental
def getThreshold: Option[Double] = threshold

/**
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
* It is only used for binary classification.
* @since 1.0.0
*/
@Since("1.0.0")
@Experimental
def clearThreshold(): this.type = {
threshold = None
Expand Down Expand Up @@ -158,29 +158,23 @@ class LogisticRegressionModel (
}
}

/**
* @since 1.3.0
*/
@Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
numFeatures, numClasses, weights, intercept, threshold)
}

override protected def formatVersion: String = "1.0"

/**
* @since 1.4.0
*/
@Since("1.4.0")
override def toString: String = {
s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
}
}

object LogisticRegressionModel extends Loader[LogisticRegressionModel] {

/**
* @since 1.3.0
*/
@Since("1.3.0")
override def load(sc: SparkContext, path: String): LogisticRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
Expand Down Expand Up @@ -261,8 +255,8 @@ object LogisticRegressionWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
* @since 1.0.0
*/
@Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
Expand All @@ -284,8 +278,8 @@ object LogisticRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
* @since 1.0.0
*/
@Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
Expand All @@ -306,8 +300,8 @@ object LogisticRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LogisticRegressionModel which has the weights and offset from training.
* @since 1.0.0
*/
@Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
Expand All @@ -324,8 +318,8 @@ object LogisticRegressionWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LogisticRegressionModel which has the weights and offset from training.
* @since 1.0.0
*/
@Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int): LogisticRegressionModel = {
Expand Down Expand Up @@ -361,8 +355,8 @@ class LogisticRegressionWithLBFGS
* Set the number of possible outcomes for k classes classification problem in
* Multinomial Logistic Regression.
* By default, it is binary logistic regression so k will be set to 2.
* @since 1.3.0
*/
@Since("1.3.0")
@Experimental
def setNumClasses(numClasses: Int): this.type = {
require(numClasses > 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._

import org.apache.spark.{Logging, SparkContext, SparkException}
import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.{Loader, Saveable}
Expand Down Expand Up @@ -444,8 +445,8 @@ object NaiveBayes {
*
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
* @since 0.9.0
*/
@Since("0.9.0")
def train(input: RDD[LabeledPoint]): NaiveBayesModel = {
new NaiveBayes().run(input)
}
Expand All @@ -460,8 +461,8 @@ object NaiveBayes {
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
* @param lambda The smoothing parameter
* @since 0.9.0
*/
@Since("0.9.0")
def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
new NaiveBayes(lambda, Multinomial).run(input)
}
Expand All @@ -483,8 +484,8 @@ object NaiveBayes {
*
* @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
* multinomial or bernoulli
* @since 0.9.0
*/
@Since("0.9.0")
def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
require(supportedModelTypes.contains(modelType),
s"NaiveBayes was created with an unknown modelType: $modelType.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.mllib.classification

import org.apache.spark.SparkContext
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.classification.impl.GLMClassificationModel
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.optimization._
Expand Down Expand Up @@ -46,8 +46,8 @@ class SVMModel (
* Sets the threshold that separates positive predictions from negative predictions. An example
* with prediction score greater than or equal to this threshold is identified as an positive,
* and negative otherwise. The default value is 0.0.
* @since 1.3.0
*/
@Since("1.3.0")
@Experimental
def setThreshold(threshold: Double): this.type = {
this.threshold = Some(threshold)
Expand All @@ -57,16 +57,16 @@ class SVMModel (
/**
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
* @since 1.3.0
*/
@Since("1.3.0")
@Experimental
def getThreshold: Option[Double] = threshold

/**
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
* @since 1.0.0
*/
@Since("1.0.0")
@Experimental
def clearThreshold(): this.type = {
threshold = None
Expand All @@ -84,29 +84,23 @@ class SVMModel (
}
}

/**
* @since 1.3.0
*/
@Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
}

override protected def formatVersion: String = "1.0"

/**
* @since 1.4.0
*/
@Since("1.4.0")
override def toString: String = {
s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
}
}

object SVMModel extends Loader[SVMModel] {

/**
* @since 1.3.0
*/
@Since("1.3.0")
override def load(sc: SparkContext, path: String): SVMModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
Expand Down Expand Up @@ -185,8 +179,8 @@ object SVMWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
* @since 0.8.0
*/
@Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
Expand All @@ -209,8 +203,8 @@ object SVMWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
* @since 0.8.0
*/
@Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
Expand All @@ -231,8 +225,8 @@ object SVMWithSGD {
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
* @return a SVMModel which has the weights and offset from training.
* @since 0.8.0
*/
@Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
Expand All @@ -250,8 +244,8 @@ object SVMWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a SVMModel which has the weights and offset from training.
* @since 0.8.0
*/
@Since("0.8.0")
def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
train(input, numIterations, 1.0, 0.01, 1.0)
}
Expand Down
Loading

0 comments on commit f5b028e

Please sign in to comment.