Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More blocking functions #292

Merged
merged 5 commits into from
May 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions core/src/main/java/zingg/hash/HashFunctionRegistry.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,29 @@ public static HashFunction getFunction(String key) {
init(new Last2Chars());
init(new Last3Chars());
init(new Round());
init(new TruncateDoubleTo1Place());
init(new TruncateDoubleTo2Places());
init(new TruncateDoubleTo3Places());
init(new LastWord());
init(new First2CharsBox());
init(new First3CharsBox());
init(new IsNullOrEmpty());
init(new LessThanZeroDbl());
init(new LessThanZeroInt());
init(new TrimLast1DigitDbl());
init(new TrimLast2DigitsDbl());
init(new TrimLast3DigitsDbl());
init(new TrimLast1DigitInt());
init(new TrimLast2DigitsInt());
init(new TrimLast3DigitsInt());
init(new RangeBetween0And10Int());
init(new RangeBetween10And100Int());
init(new RangeBetween100And1000Int());
init(new RangeBetween1000And10000Int());
init(new RangeBetween0And10Dbl());
init(new RangeBetween10And100Dbl());
init(new RangeBetween100And1000Dbl());
init(new RangeBetween1000And10000Dbl());
}

public static void init(HashFunction fn) {
Expand Down
25 changes: 25 additions & 0 deletions core/src/main/java/zingg/hash/LessThanZeroDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class LessThanZeroDbl extends HashFunction implements UDF1<Double, Boolean> {
public LessThanZeroDbl() {
super("lessThanZeroDbl", DataTypes.DoubleType, DataTypes.BooleanType, true);
}

@Override
public Boolean call(Double field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
25 changes: 25 additions & 0 deletions core/src/main/java/zingg/hash/LessThanZeroInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class LessThanZeroInt extends HashFunction implements UDF1<Integer, Boolean> {
public LessThanZeroInt() {
super("lessThanZeroInt", DataTypes.IntegerType, DataTypes.BooleanType, true);
}

@Override
public Boolean call(Integer field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

public Object apply(Row ds, String column) {
return call((Integer) ds.getAs(column));
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween0And10Dbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween0And10Dbl extends RangeDbl {

public RangeBetween0And10Dbl() {
super(0, 10);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween0And10Int.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween0And10Int extends RangeInt {

public RangeBetween0And10Int() {
super(0, 10);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween1000And10000Dbl extends RangeDbl {

public RangeBetween1000And10000Dbl() {
super(1000, 10000);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween1000And10000Int extends RangeInt {

public RangeBetween1000And10000Int() {
super(1000, 10000);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween100And1000Dbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween100And1000Dbl extends RangeDbl {

public RangeBetween100And1000Dbl() {
super(100, 1000);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween100And1000Int.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween100And1000Int extends RangeInt {

public RangeBetween100And1000Int() {
super(100, 1000);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween10And100Dbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween10And100Dbl extends RangeDbl {

public RangeBetween10And100Dbl() {
super(10, 100);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/RangeBetween10And100Int.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class RangeBetween10And100Int extends RangeInt {

public RangeBetween10And100Int() {
super(10, 100);
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/RangeDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class RangeDbl extends HashFunction implements UDF1<Double, Integer> {
int lowerLimit;
int upperLimit;

public RangeDbl(int lower, int upper) {
super("rangeBetween" + lower + "And" + upper + "Dbl", DataTypes.DoubleType, DataTypes.IntegerType, true);
this.lowerLimit = lower;
this.upperLimit = upper;
}

@Override
public Integer call(Double field) {
int withinRange = 0;
if (field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}

public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/RangeInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class RangeInt extends HashFunction implements UDF1<Integer, Integer> {
int lowerLimit;
int upperLimit;

public RangeInt(int lower, int upper) {
super("rangeBetween" + lower + "And" + upper + "Int", DataTypes.IntegerType, DataTypes.IntegerType, true);
this.lowerLimit = lower;
this.upperLimit = upper;
}

@Override
public Integer call(Integer field) {
int withinRange = 0;
if (field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}

public Object apply(Row ds, String column) {
return call((Integer) ds.getAs(column));
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast1DigitDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast1DigitDbl extends TrimLastDigitsDbl {

public TrimLast1DigitDbl() {
super(1);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast1DigitInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast1DigitInt extends TrimLastDigitsInt {

public TrimLast1DigitInt() {
super(1);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast2DigitsDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast2DigitsDbl extends TrimLastDigitsDbl {

public TrimLast2DigitsDbl() {
super(2);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast2DigitsInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast2DigitsInt extends TrimLastDigitsInt {

public TrimLast2DigitsInt() {
super(2);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast3DigitsDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast3DigitsDbl extends TrimLastDigitsDbl {

public TrimLast3DigitsDbl() {
super(3);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TrimLast3DigitsInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TrimLast3DigitsInt extends TrimLastDigitsInt {

public TrimLast3DigitsInt() {
super(3);
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/TrimLastDigitsDbl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class TrimLastDigitsDbl extends HashFunction implements UDF1<Double, Double> {
int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsDbl(int count) {
super("trimLast" + count + "DigitsDbl", DataTypes.DoubleType, DataTypes.DoubleType, true);
this.numDigits = count;
}

@Override
public Double call(Double field) {
Double r = null;
if (field == null) {
r = field;
} else {
r = Math.floor(field / POWERS_OF_10[numDigits]);
}
return r;
}

public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
30 changes: 30 additions & 0 deletions core/src/main/java/zingg/hash/TrimLastDigitsInt.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class TrimLastDigitsInt extends HashFunction implements UDF1<Integer, Integer> {
int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsInt(int count) {
super("trimLast" + count + "DigitsInt", DataTypes.IntegerType, DataTypes.IntegerType, true);
this.numDigits = count;
}

@Override
public Integer call(Integer field) {
Integer r = null;
if (field == null) {
r = field;
} else {
r = field / POWERS_OF_10[numDigits];
}
return r;
}

public Object apply(Row ds, String column) {
return call((Integer) ds.getAs(column));
}

}
31 changes: 31 additions & 0 deletions core/src/main/java/zingg/hash/TruncateDouble.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package zingg.hash;

import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;

public class TruncateDouble extends HashFunction implements UDF1<Double, Double> {
int numDecimalPlaces;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why use this? just say 10 exp the argument.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public TruncateDouble(int numDecimalPlaces) {
super("truncateDoubleTo" + numDecimalPlaces + "Places", DataTypes.DoubleType, DataTypes.DoubleType, true);
this.numDecimalPlaces = numDecimalPlaces;
}

@Override
public Double call(Double field) {
Double r = null;
if (field == null) {
r = field;
} else {
r = Math.floor(field * POWERS_OF_10[numDecimalPlaces]) / POWERS_OF_10[numDecimalPlaces];
}
return r;
}

@Override
public Object apply(Row ds, String column) {
return call((Double) ds.getAs(column));
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TruncateDoubleTo1Place.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TruncateDoubleTo1Place extends TruncateDouble {

public TruncateDoubleTo1Place() {
super(1);
}

}
9 changes: 9 additions & 0 deletions core/src/main/java/zingg/hash/TruncateDoubleTo2Places.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package zingg.hash;

public class TruncateDoubleTo2Places extends TruncateDouble {

public TruncateDoubleTo2Places() {
super(2);
}

}
Loading