Skip to content

Commit

Permalink
[SPARK-7620] [ML] [MLLIB] Removed calling size, length in while condi…
Browse files Browse the repository at this point in the history
…tion to avoid extra JVM call

Author: DB Tsai <[email protected]>

Closes apache#6137 from dbtsai/clean and squashes the following commits:

185816d [DB Tsai] fix compilication issue
f418d08 [DB Tsai] first commit
  • Loading branch information
DB Tsai authored and mengxr committed May 14, 2015
1 parent d5f18de commit d3db2fd
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ class LogisticRegressionModel private[ml] (
rawPrediction match {
case dv: DenseVector =>
var i = 0
while (i < dv.size) {
val size = dv.size
while (i < size) {
dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i)))
i += 1
}
Expand Down Expand Up @@ -357,7 +358,8 @@ private[classification] class MultiClassSummarizer extends Serializable {
def histogram: Array[Long] = {
val result = Array.ofDim[Long](numClasses)
var i = 0
while (i < result.length) {
val len = result.length
while (i < len) {
result(i) = distinctMap.getOrElse(i, 0L)
i += 1
}
Expand Down Expand Up @@ -480,7 +482,8 @@ private class LogisticAggregator(
var i = 0
val localThisGradientSumArray = this.gradientSumArray
val localOtherGradientSumArray = other.gradientSumArray
while (i < localThisGradientSumArray.length) {
val len = localThisGradientSumArray.length
while (i < len) {
localThisGradientSumArray(i) += localOtherGradientSumArray(i)
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ private[feature] object Bucketizer {
false
} else {
var i = 0
while (i < splits.length - 1) {
val n = splits.length - 1
while (i < n) {
if (splits(i) >= splits(i + 1)) return false
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ private object VectorIndexer {

private def addDenseVector(dv: DenseVector): Unit = {
var i = 0
while (i < dv.size) {
val size = dv.size
while (i < size) {
if (featureValueSets(i).size <= maxCategories) {
featureValueSets(i).add(dv(i))
}
Expand All @@ -201,7 +202,8 @@ private object VectorIndexer {
// TODO: This might be able to handle 0's more efficiently.
var vecIndex = 0 // index into vector
var k = 0 // index into non-zero elements
while (vecIndex < sv.size) {
val size = sv.size
while (vecIndex < size) {
val featureValue = if (k < sv.indices.length && vecIndex == sv.indices(k)) {
k += 1
sv.values(k - 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
val weights = {
val rawWeights = state.x.toArray.clone()
var i = 0
while (i < rawWeights.length) {
val len = rawWeights.length
while (i < len) {
rawWeights(i) *= { if (featuresStd(i) != 0.0) yStd / featuresStd(i) else 0.0 }
i += 1
}
Expand Down Expand Up @@ -307,7 +308,8 @@ private class LeastSquaresAggregator(
val weightsArray = weights.toArray.clone()
var sum = 0.0
var i = 0
while (i < weightsArray.length) {
val len = weightsArray.length
while (i < len) {
if (featuresStd(i) != 0.0) {
weightsArray(i) /= featuresStd(i)
sum += weightsArray(i) * featuresMean(i)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf

protected def isSorted(array: Array[Int]): Boolean = {
var i = 1
while (i < array.length) {
val len = array.length
while (i < len) {
if (array(i) < array(i-1)) return false
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ class L1Updater extends Updater {
// Apply proximal operator (soft thresholding)
val shrinkageVal = regParam * thisIterStepSize
var i = 0
while (i < brzWeights.length) {
val len = brzWeights.length
while (i < len) {
val wi = brzWeights(i)
brzWeights(i) = signum(wi) * max(0.0, abs(wi) - shrinkageVal)
i += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class IsotonicRegressionModel (
/** Asserts the input array is monotone with the given ordering. */
private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = {
var i = 1
while (i < xs.length) {
val len = xs.length
while (i < len) {
require(ord.compare(xs(i - 1), xs(i)) <= 0,
s"Elements (${xs(i - 1)}, ${xs(i)}) are not ordered.")
i += 1
Expand Down Expand Up @@ -329,11 +330,12 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
}

var i = 0
while (i < input.length) {
val len = input.length
while (i < len) {
var j = i

// Find monotonicity violating sequence, if any.
while (j < input.length - 1 && input(j)._1 > input(j + 1)._1) {
while (j < len - 1 && input(j)._1 > input(j + 1)._1) {
j = j + 1
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,30 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
require(n == sample.size, s"Dimensions mismatch when adding new sample." +
s" Expecting $n but got ${sample.size}.")

val localCurrMean= currMean
val localCurrM2n = currM2n
val localCurrM2 = currM2
val localCurrL1 = currL1
val localNnz = nnz
val localCurrMax = currMax
val localCurrMin = currMin
sample.foreachActive { (index, value) =>
if (value != 0.0) {
if (currMax(index) < value) {
currMax(index) = value
if (localCurrMax(index) < value) {
localCurrMax(index) = value
}
if (currMin(index) > value) {
currMin(index) = value
if (localCurrMin(index) > value) {
localCurrMin(index) = value
}

val prevMean = currMean(index)
val prevMean = localCurrMean(index)
val diff = value - prevMean
currMean(index) = prevMean + diff / (nnz(index) + 1.0)
currM2n(index) += (value - currMean(index)) * diff
currM2(index) += value * value
currL1(index) += math.abs(value)
localCurrMean(index) = prevMean + diff / (localNnz(index) + 1.0)
localCurrM2n(index) += (value - localCurrMean(index)) * diff
localCurrM2(index) += value * value
localCurrL1(index) += math.abs(value)

nnz(index) += 1.0
localNnz(index) += 1.0
}
}

Expand Down Expand Up @@ -130,14 +137,14 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
}
} else if (totalCnt == 0 && other.totalCnt != 0) {
this.n = other.n
this.currMean = other.currMean.clone
this.currM2n = other.currM2n.clone
this.currM2 = other.currM2.clone
this.currL1 = other.currL1.clone
this.currMean = other.currMean.clone()
this.currM2n = other.currM2n.clone()
this.currM2 = other.currM2.clone()
this.currL1 = other.currL1.clone()
this.totalCnt = other.totalCnt
this.nnz = other.nnz.clone
this.currMax = other.currMax.clone
this.currMin = other.currMin.clone
this.nnz = other.nnz.clone()
this.currMax = other.currMax.clone()
this.currMin = other.currMin.clone()
}
this
}
Expand Down Expand Up @@ -165,7 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
if (denominator > 0.0) {
val deltaMean = currMean
var i = 0
while (i < currM2n.size) {
val len = currM2n.length
while (i < len) {
realVariance(i) =
currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * (totalCnt - nnz(i)) / totalCnt
realVariance(i) /= denominator
Expand Down Expand Up @@ -211,7 +219,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
val realMagnitude = Array.ofDim[Double](n)

var i = 0
while (i < currM2.size) {
val len = currM2.length
while (i < len) {
realMagnitude(i) = math.sqrt(currM2(i))
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ private[stat] object ChiSqTest extends Logging {
val colSums = new Array[Double](numCols)
val rowSums = new Array[Double](numRows)
val colMajorArr = counts.toArray
val colMajorArrLen = colMajorArr.length

var i = 0
while (i < colMajorArr.size) {
while (i < colMajorArrLen) {
val elem = colMajorArr(i)
if (elem < 0.0) {
throw new IllegalArgumentException("Contingency table cannot contain negative entries.")
Expand All @@ -220,7 +222,7 @@ private[stat] object ChiSqTest extends Logging {
// second pass to collect statistic
var statistic = 0.0
var j = 0
while (j < colMajorArr.size) {
while (j < colMajorArrLen) {
val col = j / numRows
val colSum = colSums(col)
if (colSum == 0.0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
* Add the stats from another calculator into this one, modifying and returning this calculator.
*/
def add(other: ImpurityCalculator): ImpurityCalculator = {
require(stats.size == other.stats.size,
require(stats.length == other.stats.length,
s"Two ImpurityCalculator instances cannot be added with different counts sizes." +
s" Sizes are ${stats.size} and ${other.stats.size}.")
s" Sizes are ${stats.length} and ${other.stats.length}.")
var i = 0
while (i < other.stats.size) {
val len = other.stats.length
while (i < len) {
stats(i) += other.stats(i)
i += 1
}
Expand All @@ -127,11 +128,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
* calculator.
*/
def subtract(other: ImpurityCalculator): ImpurityCalculator = {
require(stats.size == other.stats.size,
require(stats.length == other.stats.length,
s"Two ImpurityCalculator instances cannot be subtracted with different counts sizes." +
s" Sizes are ${stats.size} and ${other.stats.size}.")
s" Sizes are ${stats.length} and ${other.stats.length}.")
var i = 0
while (i < other.stats.size) {
val len = other.stats.length
while (i < len) {
stats(i) -= other.stats(i)
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ object LinearDataGenerator {

x.foreach { v =>
var i = 0
while (i < v.length) {
val len = v.length
while (i < len) {
v(i) = (v(i) - 0.5) * math.sqrt(12.0 * xVariance(i)) + xMean(i)
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ private object BucketizerSuite extends FunSuite {
def linearSearchForBuckets(splits: Array[Double], feature: Double): Double = {
require(feature >= splits.head)
var i = 0
while (i < splits.length - 1) {
val n = splits.length - 1
while (i < n) {
if (feature < splits(i + 1)) return i
i += 1
}
Expand All @@ -138,7 +139,8 @@ private object BucketizerSuite extends FunSuite {
s" ${splits.mkString(", ")}")
}
var i = 0
while (i < splits.length - 1) {
val n = splits.length - 1
while (i < n) {
// Split i should fall in bucket i.
testFeature(splits(i), i)
// Value between splits i,i+1 should be in i, which is also true if the (i+1)-th split is inf.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ object LogisticRegressionSuite {
// This doesn't work if `vector` is a sparse vector.
val vectorArray = vector.toArray
var i = 0
while (i < vectorArray.length) {
val len = vectorArray.length
while (i < len) {
vectorArray(i) = vectorArray(i) * math.sqrt(xVariance(i)) + xMean(i)
i += 1
}
Expand Down

0 comments on commit d3db2fd

Please sign in to comment.