Skip to content

Commit

Permalink
Normalization in Game - part 3:
Browse files Browse the repository at this point in the history
 - unit tests for normalization
 - new "unit" test for GameEstimator while testing Game normalization
 - small improvement in build files
 - Small cleanups
  • Loading branch information
fastier-li committed Mar 14, 2017
1 parent 049e5e5 commit d0924f8
Show file tree
Hide file tree
Showing 143 changed files with 713 additions and 810 deletions.
8 changes: 0 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,5 @@ photon-ml/LOGISTIC*
photon-ml/LINEAR*
photon-ml/POISSON*
photon-ml/SMOOTHED*
<<<<<<< HEAD
derby.log
metastore_db/
=======
derby.log # when running spark-shell here
metastore_db # also when running spark-shell here
derby.log
metastore_db/

>>>>>>> Normalization in Game - part 2:
4 changes: 0 additions & 4 deletions build-scripts/rat.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
* specific language governing permissions and limitations
* under the License.
*/

import org.gradle.api.Plugin
import org.gradle.api.Project
import org.gradle.api.Task
import org.gradle.api.internal.project.IsolatedAntBuilder

apply plugin: RatPlugin
Expand Down
9 changes: 8 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ allprojects {

apply from: 'build-scripts/rat.gradle'

// The gradle variables defined here are visible in sub-projects
ext {
sparkVersion = '1.6.2'
}

rat {
excludes = [
'*.patch',
Expand Down Expand Up @@ -79,12 +84,14 @@ rat {
'**/README.md',
'LICENSE',
'NOTICE',
'derby.log',
'metastore_db/**',
'gradle.properties',
'log4j.properties',
'photon-api/src/integTest/resources/**',
'photon-api/src/test/resources/**',
'photon-client/src/integTest/resources/**',
'tests.sh'
'travis/tests.sh'
]
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package com.linkedin.photon.build.plugins

import org.gradle.api.Plugin
import org.gradle.api.initialization.Settings

import static ScalaUtils.getScalaVersionSuffix

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package com.linkedin.photon.build.plugins

import org.gradle.api.Plugin
import org.gradle.api.Project

import static ScalaUtils.getScalaVersionSuffix

/**
Expand Down
8 changes: 4 additions & 4 deletions dev-scripts/libsvm_text_to_trainingexample_avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
[Usage]:
python libsvm_text_to_trainingexample_avro.py [input_path] [output_schema_path] [output_path] (optional: -r for regression)
"""
import avro.schema
import getopt
import os
import sys
import getopt
from avro.datafile import DataFileWriter
from avro.io import DatumWriter

import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter

def main():
if len(sys.argv) <= 1:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ package com.linkedin.photon.ml.evaluation
import java.util.Random

import org.mockito.Mockito._
import org.testng.annotations.Test
import org.testng.Assert.assertEquals
import org.testng.annotations.Test

import com.linkedin.photon.ml.test.SparkTestUtils

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,10 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {
def getDifferentiableFunctions: Array[Array[Object]] = diffTasks.flatMap {
case TaskType.LOGISTIC_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedGLMLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
LogisticLossFunction,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedGLMLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
LogisticLossFunction,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)(LogisticLossFunction)
def lossFuncWithL2Builder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)(LogisticLossFunction)

binaryClassificationDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -85,16 +79,10 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {

case TaskType.LINEAR_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedGLMLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
SquaredLossFunction,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedGLMLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
SquaredLossFunction,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)(SquaredLossFunction)
def lossFuncWithL2Builder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)(SquaredLossFunction)

linearRegressionDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -103,16 +91,10 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {

case TaskType.POISSON_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedGLMLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
PoissonLossFunction,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedGLMLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
PoissonLossFunction,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)(PoissonLossFunction)
def lossFuncWithL2Builder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)(PoissonLossFunction)

poissonRegressionDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -121,14 +103,8 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {

case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedSmoothedHingeLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedSmoothedHingeLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) => DistributedSmoothedHingeLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedSmoothedHingeLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)

binaryClassificationDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -149,16 +125,10 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {
def getTwiceDifferentiableFunctions: Array[Array[Object]] = twiceDiffTasks.flatMap {
case TaskType.LOGISTIC_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedGLMLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
LogisticLossFunction,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedGLMLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
LogisticLossFunction,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)(LogisticLossFunction)
def lossFuncWithL2Builder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)(LogisticLossFunction)

binaryClassificationDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -167,16 +137,10 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {

case TaskType.LINEAR_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedGLMLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
SquaredLossFunction,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedGLMLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
SquaredLossFunction,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)(SquaredLossFunction)
def lossFuncWithL2Builder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)(SquaredLossFunction)

linearRegressionDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -185,16 +149,10 @@ class DistributedObjectiveFunctionTest extends SparkTestUtils {

case TaskType.POISSON_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder = (sc: SparkContext) => DistributedGLMLossFunction.create(
NO_REG_CONFIGURATION_MOCK,
PoissonLossFunction,
sc,
treeAggDepth)
def lossFuncWithL2Builder = (sc: SparkContext) => DistributedGLMLossFunction.create(
L2_REG_CONFIGURATION_MOCK,
PoissonLossFunction,
sc,
treeAggDepth)
def lossFuncBuilder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, NO_REG_CONFIGURATION_MOCK, treeAggDepth)(PoissonLossFunction)
def lossFuncWithL2Builder = (sc: SparkContext) =>
DistributedGLMLossFunction(sc, L2_REG_CONFIGURATION_MOCK, treeAggDepth)(PoissonLossFunction)

poissonRegressionDataSetGenerationFuncs.flatMap { dataGenFunc =>
Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import com.linkedin.photon.ml.supervised.regression.PoissonRegressionModel
import com.linkedin.photon.ml.test.SparkTestUtils

/**
* Integration tests for GameModel.
* Integration tests for GAMEModel.
*/
class GAMEModelTest extends SparkTestUtils {

Expand Down Expand Up @@ -238,7 +238,7 @@ class GAMEModelTest extends SparkTestUtils {
val glmRE2RDD = sc.parallelize(List(("RE2Item1", glmRE21), ("RE2Item2", glmRE22), ("RE2Item3", glmRE23)))
val RE2Model = new RandomEffectModel(glmRE2RDD, "REModel2", "RE2Features")

// This Game model has 1 fixed effect, and 2 different random effect models
// This GAME model has 1 fixed effect, and 2 different random effect models
GAMEModel(("fixed", FEModel), ("RE1", RE1Model), ("RE2", RE2Model))
}

Expand Down Expand Up @@ -276,7 +276,7 @@ class GAMEModelTest extends SparkTestUtils {
val glmRE2RDD = sc.parallelize(List(("RE2Item1", glmRE21), ("RE2Item2", glmRE22), ("RE2Item3", glmRE23)))
val RE2Model = new RandomEffectModel(glmRE2RDD, "REModel2", "RE2Features")

// This Game model has 1 fixed effect, and 2 different random effect models
// This GAME model has 1 fixed effect, and 2 different random effect models
GAMEModel(("fixed", FEModel), ("RE1", RE1Model), ("RE2", RE2Model))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class MatrixFactorizationModelTest extends SparkTestUtils {
val rowRange = 0 until numRows
val colRange = 0 until numCols

// generate the synthetic game data and scores
// generate the synthetic GAME data and scores
val (gameData, syntheticScores) = rowRange.zip(colRange).map { case (row, col) =>
val rowId = row.toString
val colId = col.toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,25 +174,13 @@ class NormalizationContextTest extends SparkTestUtils {
val configuration = GLMOptimizationConfiguration()
val objectiveFunction = taskType match {
case TaskType.LOGISTIC_REGRESSION =>
DistributedGLMLossFunction.create(
configuration,
LogisticLossFunction,
sc,
treeAggregateDepth = 1)
DistributedGLMLossFunction(sc, configuration, treeAggregateDepth = 1)(LogisticLossFunction)

case TaskType.LINEAR_REGRESSION =>
DistributedGLMLossFunction.create(
configuration,
SquaredLossFunction,
sc,
treeAggregateDepth = 1)
DistributedGLMLossFunction(sc, configuration, treeAggregateDepth = 1)(SquaredLossFunction)

case TaskType.POISSON_REGRESSION =>
DistributedGLMLossFunction.create(
configuration,
PoissonLossFunction,
sc,
treeAggregateDepth = 1)
DistributedGLMLossFunction(sc, configuration, treeAggregateDepth = 1)(PoissonLossFunction)
}
val optimizerNorm = optimizerType match {
case OptimizerType.LBFGS =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,13 @@ import org.apache.spark.rdd.RDD
import org.testng.Assert._
import org.testng.annotations.Test

import com.linkedin.photon.ml.{ModelTraining, TaskType}
import com.linkedin.photon.ml.data.LabeledPoint
import com.linkedin.photon.ml.model.Coefficients
import com.linkedin.photon.ml.optimization.{L2RegularizationContext, OptimizerType}
import com.linkedin.photon.ml.stat.BasicStatisticalSummary
import com.linkedin.photon.ml.supervised.classification.{BinaryClassifier, LogisticRegressionModel}
import com.linkedin.photon.ml.test.SparkTestUtils

import com.linkedin.photon.ml.stat.BasicStatisticalSummary
import com.linkedin.photon.ml.{ModelTraining, TaskType}

/**
* All feature normalizations are affine transformation so the resulting models without regularization should be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,7 @@ class DistributedOptimizationProblemTest extends SparkTestUtils {
doReturn(Some(statesTracker)).when(optimizer).getStateTracker

val configuration = GLMOptimizationConfiguration()
val objective = DistributedGLMLossFunction.create(
configuration,
lossFunction,
sc,
treeAggregateDepth = 1)
val objective = DistributedGLMLossFunction(sc, configuration, treeAggregateDepth = 1)(lossFunction)

val optimizationProblem = new DistributedOptimizationProblem(
optimizer,
Expand Down Expand Up @@ -323,11 +319,7 @@ class DistributedOptimizationProblemTest extends SparkTestUtils {
val configuration = GLMOptimizationConfiguration(
regularizationContext = L2RegularizationContext,
regularizationWeight = regularizationWeight)
val objective = DistributedGLMLossFunction.create(
configuration,
lossFunction,
sc,
treeAggregateDepth = 1)
val objective = DistributedGLMLossFunction(sc, configuration, treeAggregateDepth = 1)(lossFunction)

val optimizationProblem = new DistributedOptimizationProblem(
optimizer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ package com.linkedin.photon.ml.optimization
import scala.collection.Map

import breeze.linalg.Vector
import org.testng.annotations.Test
import org.testng.Assert.assertEquals
import org.testng.annotations.Test

import com.linkedin.photon.ml.test.SparkTestUtils
import com.linkedin.photon.ml.util.FunctionValuesConverged
Expand Down
Loading

0 comments on commit d0924f8

Please sign in to comment.