Skip to content

Commit

Permalink
add doc groups to spark.ml components
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Feb 13, 2015
1 parent 9f31db0 commit febed9a
Show file tree
Hide file tree
Showing 13 changed files with 235 additions and 26 deletions.
3 changes: 3 additions & 0 deletions mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ abstract class Transformer extends PipelineStage with Params {
private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
extends Transformer with HasInputCol with HasOutputCol with Logging {

/** @group setParam */
def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T]

/** @group setParam */
def setOutputCol(value: String): T = set(outputCol, value).asInstanceOf[T]

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ private[spark] abstract class Classifier[
extends Predictor[FeaturesType, E, M]
with ClassifierParams {

/** @group setParam */
def setRawPredictionCol(value: String): E =
set(rawPredictionCol, value).asInstanceOf[E]

Expand All @@ -87,6 +88,7 @@ private[spark]
abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]]
extends PredictionModel[FeaturesType, M] with ClassifierParams {

/** @group setParam */
def setRawPredictionCol(value: String): M = set(rawPredictionCol, value).asInstanceOf[M]

/** Number of classes (values which the label can take). */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,13 @@ class LogisticRegression
setMaxIter(100)
setThreshold(0.5)

/** @group setParam */
def setRegParam(value: Double): this.type = set(regParam, value)

/** @group setParam */
def setMaxIter(value: Int): this.type = set(maxIter, value)

/** @group setParam */
def setThreshold(value: Double): this.type = set(threshold, value)

override protected def train(dataset: DataFrame, paramMap: ParamMap): LogisticRegressionModel = {
Expand Down Expand Up @@ -93,6 +98,7 @@ class LogisticRegressionModel private[ml] (

setThreshold(0.5)

/** @group setParam */
def setThreshold(value: Double): this.type = set(threshold, value)

private val margin: Vector => Double = (features) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ private[spark] abstract class ProbabilisticClassifier[
M <: ProbabilisticClassificationModel[FeaturesType, M]]
extends Classifier[FeaturesType, E, M] with ProbabilisticClassifierParams {

/** @group setParam */
def setProbabilityCol(value: String): E = set(probabilityCol, value).asInstanceOf[E]
}

Expand All @@ -82,6 +83,7 @@ private[spark] abstract class ProbabilisticClassificationModel[
M <: ProbabilisticClassificationModel[FeaturesType, M]]
extends ClassificationModel[FeaturesType, M] with ProbabilisticClassifierParams {

/** @group setParam */
def setProbabilityCol(value: String): M = set(probabilityCol, value).asInstanceOf[M]

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,23 @@ import org.apache.spark.sql.types.DoubleType
class BinaryClassificationEvaluator extends Evaluator with Params
with HasRawPredictionCol with HasLabelCol {

/** param for metric name in evaluation */
/**
* param for metric name in evaluation
* @group param
*/
val metricName: Param[String] = new Param(this, "metricName",
"metric name in evaluation (areaUnderROC|areaUnderPR)", Some("areaUnderROC"))

/** @group getParam */
def getMetricName: String = get(metricName)

/** @group setParam */
def setMetricName(value: String): this.type = set(metricName, value)

/** @group setParam */
def setScoreCol(value: String): this.type = set(rawPredictionCol, value)

/** @group setParam */
def setLabelCol(value: String): this.type = set(labelCol, value)

override def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {
Expand Down
11 changes: 9 additions & 2 deletions mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,18 @@ import org.apache.spark.sql.types.DataType
@AlphaComponent
class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {

/** number of features */
/**
* number of features
* @group param
*/
val numFeatures = new IntParam(this, "numFeatures", "number of features", Some(1 << 18))
def setNumFeatures(value: Int) = set(numFeatures, value)

/** @group getParam */
def getNumFeatures: Int = get(numFeatures)

/** @group setParam */
def setNumFeatures(value: Int) = set(numFeatures, value)

override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = {
val hashingTF = new feature.HashingTF(paramMap(numFeatures))
hashingTF.transform
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
@AlphaComponent
class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams {

/** @group setParam */
def setInputCol(value: String): this.type = set(inputCol, value)

/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)

override def fit(dataset: DataFrame, paramMap: ParamMap): StandardScalerModel = {
Expand Down Expand Up @@ -75,7 +78,10 @@ class StandardScalerModel private[ml] (
scaler: feature.StandardScalerModel)
extends Model[StandardScalerModel] with StandardScalerParams {

/** @group setParam */
def setInputCol(value: String): this.type = set(inputCol, value)

/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)

override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,13 @@ private[spark] abstract class Predictor[
M <: PredictionModel[FeaturesType, M]]
extends Estimator[M] with PredictorParams {

/** @group setParam */
def setLabelCol(value: String): Learner = set(labelCol, value).asInstanceOf[Learner]

/** @group setParam */
def setFeaturesCol(value: String): Learner = set(featuresCol, value).asInstanceOf[Learner]

/** @group setParam */
def setPredictionCol(value: String): Learner = set(predictionCol, value).asInstanceOf[Learner]

override def fit(dataset: DataFrame, paramMap: ParamMap): M = {
Expand Down Expand Up @@ -160,8 +165,10 @@ private[spark] abstract class Predictor[
private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
extends Model[M] with PredictorParams {

/** @group setParam */
def setFeaturesCol(value: String): M = set(featuresCol, value).asInstanceOf[M]

/** @group setParam */
def setPredictionCol(value: String): M = set(predictionCol, value).asInstanceOf[M]

/**
Expand Down
14 changes: 14 additions & 0 deletions mllib/src/main/scala/org/apache/spark/ml/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,19 @@ package org.apache.spark
/**
* Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
* assemble and configure practical machine learning pipelines.
*
* @groupname param Parameters
* @groupdesc param A list of (hyper-)parameter keys this algorithm can take. Users can set and get
* the parameter values through setters and getters, respectively.
* @groupprio param -5
*
* @groupname setParam Parameter setters
* @groupprio setParam 5
*
* @groupname getParam Parameter getters
* @groupprio getParam 6
*
* @groupname Ungrouped Members
* @groupprio Ungrouped 0
*/
package object ml
70 changes: 60 additions & 10 deletions mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,67 +24,117 @@ package org.apache.spark.ml.param
*/

private[ml] trait HasRegParam extends Params {
/** param for regularization parameter */
/**
* param for regularization parameter
* @group param
*/
val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter")

/** @group getParam */
def getRegParam: Double = get(regParam)
}

private[ml] trait HasMaxIter extends Params {
/** param for max number of iterations */
/**
* param for max number of iterations
* @group param
*/
val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")

/** @group getParam */
def getMaxIter: Int = get(maxIter)
}

private[ml] trait HasFeaturesCol extends Params {
/** param for features column name */
/**
* param for features column name
* @group param
*/
val featuresCol: Param[String] =
new Param(this, "featuresCol", "features column name", Some("features"))

/** @group getParam */
def getFeaturesCol: String = get(featuresCol)
}

private[ml] trait HasLabelCol extends Params {
/** param for label column name */
/**
* param for label column name
* @group param
*/
val labelCol: Param[String] = new Param(this, "labelCol", "label column name", Some("label"))

/** @group getParam */
def getLabelCol: String = get(labelCol)
}

private[ml] trait HasPredictionCol extends Params {
/** param for prediction column name */
/**
* param for prediction column name
* @group param
*/
val predictionCol: Param[String] =
new Param(this, "predictionCol", "prediction column name", Some("prediction"))

/** @group getParam */
def getPredictionCol: String = get(predictionCol)
}

private[ml] trait HasRawPredictionCol extends Params {
/** param for raw prediction column name */
/**
* param for raw prediction column name
* @group param
*/
val rawPredictionCol: Param[String] =
new Param(this, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name",
Some("rawPrediction"))

/** @group getParam */
def getRawPredictionCol: String = get(rawPredictionCol)
}

private[ml] trait HasProbabilityCol extends Params {
/** param for predicted class conditional probabilities column name */
/**
* param for predicted class conditional probabilities column name
* @group param
*/
val probabilityCol: Param[String] =
new Param(this, "probabilityCol", "column name for predicted class conditional probabilities",
Some("probability"))

/** @group getParam */
def getProbabilityCol: String = get(probabilityCol)
}

private[ml] trait HasThreshold extends Params {
/** param for threshold in (binary) prediction */
/**
* param for threshold in (binary) prediction
* @group param
*/
val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in prediction")

/** @group getParam */
def getThreshold: Double = get(threshold)
}

private[ml] trait HasInputCol extends Params {
/** param for input column name */
/**
* param for input column name
* @group param
*/
val inputCol: Param[String] = new Param(this, "inputCol", "input column name")

/** @group getParam */
def getInputCol: String = get(inputCol)
}

private[ml] trait HasOutputCol extends Params {
/** param for output column name */
/**
* param for output column name
* @group param
*/
val outputCol: Param[String] = new Param(this, "outputCol", "output column name")

/** @group getParam */
def getOutputCol: String = get(outputCol)
}
Loading

0 comments on commit febed9a

Please sign in to comment.