Skip to content

Commit

Permalink
Merge pull request #292 from navinrathore/MoreBlockingFunctions261
Browse files Browse the repository at this point in the history
More blocking functions
  • Loading branch information
sonalgoyal authored May 26, 2022
2 parents 16f16c4 + 2946eb4 commit 9204493
Show file tree
Hide file tree
Showing 39 changed files with 1,044 additions and 0 deletions.
19 changes: 19 additions & 0 deletions core/src/main/java/zingg/hash/HashFunctionRegistry.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,29 @@ public static HashFunction getFunction(String key) {
init(new Last2Chars());
init(new Last3Chars());
init(new Round());
init(new TruncateDoubleTo1Place());
init(new TruncateDoubleTo2Places());
init(new TruncateDoubleTo3Places());
init(new LastWord());
init(new First2CharsBox());
init(new First3CharsBox());
init(new IsNullOrEmpty());
init(new LessThanZeroDbl());
init(new LessThanZeroInt());
init(new TrimLast1DigitDbl());
init(new TrimLast2DigitsDbl());
init(new TrimLast3DigitsDbl());
init(new TrimLast1DigitInt());
init(new TrimLast2DigitsInt());
init(new TrimLast3DigitsInt());
init(new RangeBetween0And10Int());
init(new RangeBetween10And100Int());
init(new RangeBetween100And1000Int());
init(new RangeBetween1000And10000Int());
init(new RangeBetween0And10Dbl());
init(new RangeBetween10And100Dbl());
init(new RangeBetween100And1000Dbl());
init(new RangeBetween1000And10000Dbl());
}

public static void init(HashFunction fn) {
Expand Down
25 changes: 25 additions & 0 deletions core/src/main/java/zingg/hash/LessThanZeroDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class LessThanZeroDbl extends HashFunction implements UDF1<Double, Boolean> {
public LessThanZeroDbl() {
super("lessThanZeroDbl", DataTypes.DoubleType, DataTypes.BooleanType, true);
}

@Override
public Boolean call(Double field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
25 changes: 25 additions & 0 deletions core/src/main/java/zingg/hash/LessThanZeroInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class LessThanZeroInt extends HashFunction implements UDF1<Integer, Boolean> {
public LessThanZeroInt() {
super("lessThanZeroInt", DataTypes.IntegerType, DataTypes.BooleanType, true);
}

@Override
public Boolean call(Integer field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

public Object apply(Row ds, String column) {
return call((Integer) ds.getAs(column));
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween0And10Dbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween0And10Dbl extends RangeDbl {

public RangeBetween0And10Dbl() {
super(0, 10);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween0And10Int.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween0And10Int extends RangeInt {

public RangeBetween0And10Int() {
super(0, 10);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween1000And10000Dbl extends RangeDbl {

public RangeBetween1000And10000Dbl() {
super(1000, 10000);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween1000And10000Int extends RangeInt {

public RangeBetween1000And10000Int() {
super(1000, 10000);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween100And1000Dbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween100And1000Dbl extends RangeDbl {

public RangeBetween100And1000Dbl() {
super(100, 1000);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween100And1000Int.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween100And1000Int extends RangeInt {

public RangeBetween100And1000Int() {
super(100, 1000);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween10And100Dbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween10And100Dbl extends RangeDbl {

public RangeBetween10And100Dbl() {
super(10, 100);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween10And100Int.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween10And100Int extends RangeInt {

public RangeBetween10And100Int() {
super(10, 100);
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/RangeDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class RangeDbl extends HashFunction implements UDF1<Double, Integer> {
int lowerLimit;
int upperLimit;

public RangeDbl(int lower, int upper) {
super("rangeBetween" + lower + "And" + upper + "Dbl", DataTypes.DoubleType, DataTypes.IntegerType, true);
this.lowerLimit = lower;
this.upperLimit = upper;
}

@Override
public Integer call(Double field) {
int withinRange = 0;
if (field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}

public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/RangeInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class RangeInt extends HashFunction implements UDF1<Integer, Integer> {
int lowerLimit;
int upperLimit;

public RangeInt(int lower, int upper) {
super("rangeBetween" + lower + "And" + upper + "Int", DataTypes.IntegerType, DataTypes.IntegerType, true);
this.lowerLimit = lower;
this.upperLimit = upper;
}

@Override
public Integer call(Integer field) {
int withinRange = 0;
if (field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}

public Object apply(Row ds, String column) {
return call((Integer) ds.getAs(column));
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast1DigitDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast1DigitDbl extends TrimLastDigitsDbl {

public TrimLast1DigitDbl() {
super(1);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast1DigitInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast1DigitInt extends TrimLastDigitsInt {

public TrimLast1DigitInt() {
super(1);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast2DigitsDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast2DigitsDbl extends TrimLastDigitsDbl {

public TrimLast2DigitsDbl() {
super(2);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast2DigitsInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast2DigitsInt extends TrimLastDigitsInt {

public TrimLast2DigitsInt() {
super(2);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast3DigitsDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast3DigitsDbl extends TrimLastDigitsDbl {

public TrimLast3DigitsDbl() {
super(3);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast3DigitsInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast3DigitsInt extends TrimLastDigitsInt {

public TrimLast3DigitsInt() {
super(3);
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/TrimLastDigitsDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class TrimLastDigitsDbl extends HashFunction implements UDF1<Double, Double> {
int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsDbl(int count) {
super("trimLast" + count + "DigitsDbl", DataTypes.DoubleType, DataTypes.DoubleType, true);
this.numDigits = count;
}

@Override
public Double call(Double field) {
Double r = null;
if (field == null) {
r = field;
} else {
r = Math.floor(field / POWERS_OF_10[numDigits]);
}
return r;
}

public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/TrimLastDigitsInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class TrimLastDigitsInt extends HashFunction implements UDF1<Integer, Integer> {
int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsInt(int count) {
super("trimLast" + count + "DigitsInt", DataTypes.IntegerType, DataTypes.IntegerType, true);
this.numDigits = count;
}

@Override
public Integer call(Integer field) {
Integer r = null;
if (field == null) {
r = field;
} else {
r = field / POWERS_OF_10[numDigits];
}
return r;
}

public Object apply(Row ds, String column) {
return call((Integer) ds.getAs(column));
}

}
31 changes: 31 additions & 0 deletions core/src/main/java/zingg/hash/TruncateDouble.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class TruncateDouble extends HashFunction implements UDF1<Double, Double> {
int numDecimalPlaces;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TruncateDouble(int numDecimalPlaces) {
super("truncateDoubleTo" + numDecimalPlaces + "Places", DataTypes.DoubleType, DataTypes.DoubleType, true);
this.numDecimalPlaces = numDecimalPlaces;
}

@Override
public Double call(Double field) {
Double r = null;
if (field == null) {
r = field;
} else {
r = Math.floor(field * POWERS_OF_10[numDecimalPlaces]) / POWERS_OF_10[numDecimalPlaces];
}
return r;
}

@Override
public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TruncateDoubleTo1Place.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TruncateDoubleTo1Place extends TruncateDouble {

public TruncateDoubleTo1Place() {
super(1);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TruncateDoubleTo2Places.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TruncateDoubleTo2Places extends TruncateDouble {

public TruncateDoubleTo2Places() {
super(2);
}

}
Loading

0 comments on commit 9204493

Please sign in to comment.