Skip to content

Commit

Permalink
add VectorRDDs with a converter from RDD[Array[Double]]
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Mar 21, 2014
1 parent b28ba2f commit 238ba34
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.mllib.linalg.rdd

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.{Vectors, Vector}

/**
* Factory methods for `RDD[Vector]`.
*/
object VectorRDDs {

/**
* Converts an `RDD[Array[Double]]` to `RDD[Vector]`.
*/
def fromArrayRDD(rdd: RDD[Array[Double]]): RDD[Vector] = rdd.map(v => Vectors.dense(v))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.mllib.linalg.rdd

import org.scalatest.FunSuite

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.LocalSparkContext

class VectorRDDsSuite extends FunSuite with LocalSparkContext {

test("from array rdd") {
val data = Seq(Array(1.0, 2.0), Array(3.0, 4.0))
val arrayRdd = sc.parallelize(data, 2)
val vectorRdd = VectorRDDs.fromArrayRDD(arrayRdd)
assert(arrayRdd.collect().map(v => Vectors.dense(v)).view === vectorRdd.collect().view)
}
}

0 comments on commit 238ba34

Please sign in to comment.