Skip to content

Commit

Permalink
[SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespac…
Browse files Browse the repository at this point in the history
…e on specific position
  • Loading branch information
fe2s committed Jun 23, 2015
1 parent 47c1d56 commit c1abc2b
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package org.apache.spark.mllib.util

import java.util.StringTokenizer

import org.apache.commons.lang.StringUtils.isBlank

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException
Expand Down Expand Up @@ -98,6 +100,8 @@ private[mllib] object NumericParser {
}
} else if (token == ")") {
parsing = false
} else if (isBlank(token)){
// ignore whitespaces between delim chars, e.g. ", ["
} else {
// expecting a number
items.append(parseDouble(token))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
}
}

test("parse labeled points with whitespaces") {
val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
}

test("parse labeled points with v0.9 format") {
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
}
}
}

test("parser with whitespaces") {
val s = "(0.0, [1.0, 2.0])"
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
assert(parsed(0).asInstanceOf[Double] === 0.0)
assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
}
}

0 comments on commit c1abc2b

Please sign in to comment.