From 6e4f8ca6ea4bab6ffb851d70c33741df714f1d5f Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Mon, 1 Jun 2015 19:22:17 +0800 Subject: [PATCH] add check for ascending order --- .../org/apache/spark/mllib/util/MLUtils.scala | 15 +++++++++++-- .../spark/mllib/util/MLUtilsSuite.scala | 21 +++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 5ac3672a52d64..5e56945981f9c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -82,8 +82,19 @@ object MLUtils { val value = indexAndValue(1).toDouble (index, value) }.unzip - require(indices.size == 0 || indices(0) >= 0, - "indices should be one-based in LIBSVM format") + + // check if indices is one-based and in ascending order + var previous = -1 + var i = 0 + val indicesLength = indices.size + while (i < indicesLength) { + if (indices(i) <= previous) { + throw new IllegalArgumentException("indices should be one-based and in ascending order") + } + previous = indices(i) + i += 1 + } + (label, indices.toArray, values.toArray) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index fcb7bb434d1c7..27050bde16ef3 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -110,7 +110,7 @@ class MLUtilsSuite extends FunSuite with MLlibTestSparkContext { Utils.deleteRecursively(tempDir) } - test("loadLibSVMFile throws SparkException when passing a zero-based vector") { + test("loadLibSVMFile throws IllegalArgumentException when indices is zero-based") { val lines = """ |0 @@ -122,7 +122,24 @@ class MLUtilsSuite extends FunSuite with MLlibTestSparkContext { val path = tempDir.toURI.toString intercept[SparkException] { - val pointsWithoutNumFeatures = loadLibSVMFile(sc, path).collect() + loadLibSVMFile(sc, path).collect() + } + Utils.deleteRecursively(tempDir) + } + + test("loadLibSVMFile throws IllegalArgumentException when indices is not in ascending order") { + val lines = + """ + |0 + |0 3:4.0 2:5.0 6:6.0 + """.stripMargin + val tempDir = Utils.createTempDir() + val file = new File(tempDir.getPath, "part-00000") + Files.write(lines, file, Charsets.US_ASCII) + val path = tempDir.toURI.toString + + intercept[SparkException] { + loadLibSVMFile(sc, path).collect() } Utils.deleteRecursively(tempDir) }