diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 77b229355d7af..249c75b10ecf3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -121,14 +121,17 @@ class Word2Vec extends Serializable with Logging { /** context words from [-window, window] */ private val window = 5 - -/** minimum frequency to consider a vocabulary word */ -private var minCount = 5 - -def setMinCount(minCount: Int): this.type = { -this.minCount = minCount -this -} + + /** minimum frequency to consider a vocabulary word */ + private var minCount = 5 + + /** Sets the minimum frequency a token must appear to be included in the word2vec model's + * vocabulary (default: 5). + */ + def setMinCount(minCount: Int): this.type = { + this.minCount = minCount + this + } private var trainWordsCount = 0 private var vocabSize = 0