Skip to content

Commit

Permalink
change default numFeatures to 2^20 in HashingTF
Browse files Browse the repository at this point in the history
change annotation from DeveloperApi to Experimental in Normalizer and StandardScaler
  • Loading branch information
mengxr committed Aug 6, 2014
1 parent 883e122 commit 773c1a9
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ import org.apache.spark.util.Utils
* :: Experimental ::
* Maps a sequence of terms to their term frequencies using the hashing trick.
*
* @param numFeatures number of features (default: 1000000)
* @param numFeatures number of features (default: 2^20^)
*/
@Experimental
class HashingTF(val numFeatures: Int) extends Serializable {

def this() = this(1000000)
def this() = this(1 << 20)

/**
* Returns the index of the input term.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ package org.apache.spark.mllib.feature

import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.Experimental
import org.apache.spark.mllib.linalg.{Vector, Vectors}

/**
* :: DeveloperApi ::
* :: Experimental ::
* Normalizes samples individually to unit L^p^ norm
*
* For any 1 <= p < Double.PositiveInfinity, normalizes samples using
Expand All @@ -33,7 +33,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
*
* @param p Normalization in L^p^ space, p = 2 by default.
*/
@DeveloperApi
@Experimental
class Normalizer(p: Double) extends VectorTransformer {

def this() = this(2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,22 @@ package org.apache.spark.mllib.feature

import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.Experimental
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.rdd.RDDFunctions._
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
import org.apache.spark.rdd.RDD

/**
* :: DeveloperApi ::
* :: Experimental ::
* Standardizes features by removing the mean and scaling to unit variance using column summary
* statistics on the samples in the training set.
*
* @param withMean False by default. Centers the data with mean before scaling. It will build a
* dense output, so this does not work on sparse input and will raise an exception.
* @param withStd True by default. Scales the data to unit standard deviation.
*/
@DeveloperApi
@Experimental
class StandardScaler(withMean: Boolean, withStd: Boolean) extends VectorTransformer {

def this() = this(false, true)
Expand Down

0 comments on commit 773c1a9

Please sign in to comment.