[SPARK-10239] [SPARK-10244] [MLLIB] update since versions in mllib.pm…

…ml and mllib.util Same as #8421 but for `mllib.pmml` and `mllib.util`. cc dbtsai Author: Xiangrui Meng <[email protected]> Closes #8430 from mengxr/SPARK-10239 and squashes the following commits: a189acf [Xiangrui Meng] update since versions in mllib.pmml and mllib.util
apache · Aug 25, 2015 · 00ae4be · 00ae4be
1 parent 9205907
commit 00ae4be
Show file tree

Hide file tree

Showing 9 changed files with 41 additions and 11 deletions.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult
 import org.jpmml.model.JAXBUtil
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
 
 /**
@@ -33,6 +33,7 @@ import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
  * developed by the Data Mining Group (www.dmg.org).
  */
 @DeveloperApi
+@Since("1.4.0")
 trait PMMLExportable {
 
   /**
@@ -48,6 +49,7 @@ trait PMMLExportable {
    * Export the model to a local file in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(localPath: String): Unit = {
     toPMML(new StreamResult(new File(localPath)))
   }
@@ -57,6 +59,7 @@ trait PMMLExportable {
    * Export the model to a directory on a distributed file system in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(sc: SparkContext, path: String): Unit = {
     val pmml = toPMML()
     sc.parallelize(Array(pmml), 1).saveAsTextFile(path)
@@ -67,6 +70,7 @@ trait PMMLExportable {
    * Export the model to the OutputStream in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(outputStream: OutputStream): Unit = {
     toPMML(new StreamResult(outputStream))
   }
@@ -76,6 +80,7 @@ trait PMMLExportable {
    * Export the model to a String in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(): String = {
     val writer = new StringWriter
     toPMML(new StreamResult(writer))

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -17,23 +17,25 @@
 
 package org.apache.spark.mllib.util
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
  * A collection of methods used to validate data before applying ML algorithms.
  */
 @DeveloperApi
+@Since("0.8.0")
 object DataValidators extends Logging {
 
   /**
    * Function to check if labels used for classification are either zero or one.
    *
    * @return True if labels are all zero or one, false otherwise.
    */
+  @Since("1.0.0")
   val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
     val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
     if (numInvalid != 0) {
@@ -48,6 +50,7 @@ object DataValidators extends Logging {
    *
    * @return True if labels are all in the range of {0, 1, ..., k-1}, false otherwise.
    */
+  @Since("1.3.0")
   def multiLabelValidator(k: Int): RDD[LabeledPoint] => Boolean = { data =>
     val numInvalid = data.filter(x =>
       x.label - x.label.toInt != 0.0 || x.label < 0 || x.label > k - 1).count()

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
  * cluster with scale 1 around each center.
  */
 @DeveloperApi
+@Since("0.8.0")
 object KMeansDataGenerator {
 
   /**
@@ -42,6 +43,7 @@ object KMeansDataGenerator {
    * @param r Scaling factor for the distribution of the initial centers
    * @param numPartitions Number of partitions of the generated RDD; default 2
    */
+  @Since("0.8.0")
   def generateKMeansRDD(
       sc: SparkContext,
       numPoints: Int,
@@ -62,6 +64,7 @@ object KMeansDataGenerator {
     }
   }
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 6) {
       // scalastyle:off println

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -22,11 +22,11 @@ import scala.util.Random
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
@@ -35,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  * response variable `Y`.
  */
 @DeveloperApi
+@Since("0.8.0")
 object LinearDataGenerator {
 
   /**
@@ -46,6 +47,7 @@ object LinearDataGenerator {
    * @param seed Random seed
    * @return Java List of input.
    */
+  @Since("0.8.0")
   def generateLinearInputAsList(
       intercept: Double,
       weights: Array[Double],
@@ -68,6 +70,7 @@ object LinearDataGenerator {
    * @param eps Epsilon scaling factor.
    * @return Seq of input.
    */
+  @Since("0.8.0")
   def generateLinearInput(
       intercept: Double,
       weights: Array[Double],
@@ -92,6 +95,7 @@ object LinearDataGenerator {
    * @param eps Epsilon scaling factor.
    * @return Seq of input.
    */
+  @Since("0.8.0")
   def generateLinearInput(
       intercept: Double,
       weights: Array[Double],
@@ -132,6 +136,7 @@ object LinearDataGenerator {
    *
    * @return RDD of LabeledPoint containing sample data.
    */
+  @Since("0.8.0")
   def generateLinearRDD(
       sc: SparkContext,
       nexamples: Int,
@@ -151,6 +156,7 @@ object LinearDataGenerator {
     data
   }
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 2) {
       // scalastyle:off println

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
  * with probability `probOne` and scales features for positive examples by `eps`.
  */
 @DeveloperApi
+@Since("0.8.0")
 object LogisticRegressionDataGenerator {
 
   /**
@@ -43,6 +44,7 @@ object LogisticRegressionDataGenerator {
    * @param nparts Number of partitions of the generated RDD. Default value is 2.
    * @param probOne Probability that a label is 1 (and not 0). Default value is 0.5.
    */
+  @Since("0.8.0")
   def generateLogisticRDD(
     sc: SparkContext,
     nexamples: Int,
@@ -62,6 +64,7 @@ object LogisticRegressionDataGenerator {
     data
   }
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length != 5) {
       // scalastyle:off println

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -23,7 +23,7 @@ import scala.language.postfixOps
 import scala.util.Random
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix}
 import org.apache.spark.rdd.RDD
 
@@ -52,7 +52,9 @@ import org.apache.spark.rdd.RDD
  *   testSampFact   (Double) Percentage of training data to use as test data.
  */
 @DeveloperApi
+@Since("0.8.0")
 object MFDataGenerator {
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 2) {
       // scalastyle:off println

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -36,6 +36,7 @@ import org.apache.spark.streaming.dstream.DStream
 /**
  * Helper methods to load, save and pre-process data used in ML Lib.
  */
+@Since("0.8.0")
 object MLUtils {
 
   private[mllib] lazy val EPSILON = {
@@ -168,6 +169,7 @@ object MLUtils {
    *
    * @see [[org.apache.spark.mllib.util.MLUtils#loadLibSVMFile]]
    */
+  @Since("1.0.0")
   def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
     // TODO: allow to specify label precision and feature precision.
     val dataStr = data.map { case LabeledPoint(label, features) =>

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -21,20 +21,22 @@ import scala.util.Random
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
  * Generate sample data used for SVM. This class generates uniform random values
  * for the features and adds Gaussian noise with weight 0.1 to generate labels.
  */
 @DeveloperApi
+@Since("0.8.0")
 object SVMDataGenerator {
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 2) {
       // scalastyle:off println

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -24,7 +24,7 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
 
@@ -35,6 +35,7 @@ import org.apache.spark.sql.types.{DataType, StructField, StructType}
  * This should be inherited by the class which implements model instances.
  */
 @DeveloperApi
+@Since("1.3.0")
 trait Saveable {
 
   /**
@@ -50,6 +51,7 @@ trait Saveable {
    * @param path  Path specifying the directory in which to save this model.
    *              If the directory already exists, this method throws an exception.
    */
+  @Since("1.3.0")
   def save(sc: SparkContext, path: String): Unit
 
   /** Current version of model save/load format. */
@@ -64,6 +66,7 @@ trait Saveable {
  * This should be inherited by an object paired with the model class.
  */
 @DeveloperApi
+@Since("1.3.0")
 trait Loader[M <: Saveable] {
 
   /**
@@ -75,6 +78,7 @@ trait Loader[M <: Saveable] {
    * @param path  Path specifying the directory to which the model was saved.
    * @return  Model instance
    */
+  @Since("1.3.0")
   def load(sc: SparkContext, path: String): M
 
 }