Skip to content

Commit

Permalink
add minSplits to libSVMFile
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Apr 1, 2014
1 parent da25e24 commit f7da54b
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
7 changes: 6 additions & 1 deletion mllib/src/main/scala/org/apache/spark/mllib/MLContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,21 @@ class MLContext(val sparkContext: SparkContext) {
* where the feature indices are converted to zero-based.
*
* @param path file or directory path in any Hadoop-supported file system URI
* @param minSplits min number of partitions, default: sparkContext.defaultMinSplits
* @param numFeatures number of features, which will be determined from the input data if a
* non-positive value is given. The default value is 0.
* @param labelParser parser for labels, default: _.toDouble
* @return labeled data stored as an RDD[LabeledPoint]
*/
def libSVMFile(
path: String,
minSplits: Int = sparkContext.defaultMinSplits,
numFeatures: Int = 0,
labelParser: String => Double = _.toDouble): RDD[LabeledPoint] = {
val parsed = sparkContext.textFile(path).map(_.trim).filter(!_.isEmpty).map(_.split(' '))
val parsed = sparkContext.textFile(path, minSplits)
.map(_.trim)
.filter(!_.isEmpty)
.map(_.split(' '))
// Determine number of features.
val d = if (numFeatures > 0) {
numFeatures
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ class MLContextSuite extends FunSuite with LocalSparkContext {

val mlc = MLContext(sc)

val pointsWithNumFeatures = mlc.libSVMFile(tempDir.toURI.toString, 6).collect()
val pointsWithoutNumFeatures = mlc.libSVMFile(tempDir.toURI.toString, 0).collect()
val pointsWithNumFeatures = mlc.libSVMFile(tempDir.toURI.toString, numFeatures = 6).collect()
val pointsWithoutNumFeatures = mlc.libSVMFile(tempDir.toURI.toString).collect()

for (points <- Seq(pointsWithNumFeatures, pointsWithoutNumFeatures)) {
assert(points.length === 3)
Expand Down

0 comments on commit f7da54b

Please sign in to comment.