Skip to content

Commit

Permalink
Merge pull request #291 from RavirajBaraiya/MatchTypeEmail
Browse files Browse the repository at this point in the history
Match type email
  • Loading branch information
sonalgoyal authored May 26, 2022
2 parents 7129eff + 75455b7 commit c38df65
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 2 deletions.
5 changes: 5 additions & 0 deletions client/src/main/java/zingg/client/MatchType.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ public enum MatchType implements Serializable {
*/
PINCODE("PINCODE"),

/**
* an email type which is supposed to look at only the first part of the email and ignore the domain.
*/
EMAIL("EMAIL"),

/**
* Long descriptive text, usually more than a couple of words for example
* product descriptions
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/zingg/feature/StringFeature.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import zingg.similarity.function.AJaroWinklerFunction;
import zingg.similarity.function.AffineGapSimilarityFunction;
import zingg.similarity.function.CheckBlankOrNullFunction;
import zingg.similarity.function.EmailMatchTypeFunction;
import zingg.similarity.function.JaccSimFunction;
import zingg.similarity.function.JaroWinklerFunction;
import zingg.similarity.function.NumbersJaccardFunction;
Expand Down Expand Up @@ -46,6 +47,9 @@ public void init(FieldDefinition f) {
if(f.getMatchType().contains(MatchType.PINCODE)){
addSimFunction(new PinCodeMatchTypeFunction());
}
if(f.getMatchType().contains(MatchType.EMAIL)){
addSimFunction(new EmailMatchTypeFunction());
}
if (f.getMatchType().contains(MatchType.NUMERIC_WITH_UNITS)) {
addSimFunction(new ProductCodeFunction());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ public AffineGapSimilarityFunction(String s) {
super(s);
gap = new SAffineGap();
}

@Override
public Double call(String first, String second) {
return super.call(first,second);
}

@Override
public String getUid() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package zingg.similarity.function;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.ml.util.Identifiable$;


public class EmailMatchTypeFunction extends StringSimilarityDistanceFunction {

public static final Log LOG = LogFactory
.getLog(EmailMatchTypeFunction.class);
//private static final String String = null;


public EmailMatchTypeFunction() {
this("EmailMatchTypeFunction");
}

public EmailMatchTypeFunction(String s) {
super(s);
//gap = new SAffineGap();
}

@Override
public Double call(String first, String second) {
first = first.split("@",0)[0];
second = second.split("@",0)[0];
return super.call(first,second);
/*
double score1 = 0.0;
double score2 = 0.0;
double score = 0.0;
try {
if (first == null || first.equals("")) {
score1 = 1.0d;
}
if (second == null || second.equals("")) {
score2 = 1.0d;
}
if (score1 != 1.0d && score2 != 1.0d) {
first = first.split("@",0)[0];
second = second.split("@",0)[0];
score = first.equalsIgnoreCase(second)? 1.0d : 0.0d;
}
else {
score = 1.0d;
}
} catch (Exception e) {
e.printStackTrace();
LOG.warn("Error processing differences for " + first + "," + second);
} finally {
if (Double.isNaN(score)) {
score = 0.0;
}
return score;
*/
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
package zingg.similarity.function;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.wcohen.ss.*;

public abstract class StringSimilarityDistanceFunction extends StringSimilarityFunction{

protected AbstractStringDistance gap;

public static final Log LOG = LogFactory
.getLog(StringSimilarityDistanceFunction.class);

public StringSimilarityDistanceFunction(String name) {
super(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ public StringSimilarityFunction(String name) {

@Override
public Double call(String first, String second) {
if (first == null || first.trim().length() ==0) return 0d;
if (second == null || second.trim().length() ==0) return 0d;
if (first == null || first.trim().length() ==0) return 1d;
if (second == null || second.trim().length() ==0) return 1d;
double score = first.equalsIgnoreCase(second) ? 1d : 0d;
return score;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package zingg.similarity.function;


import java.util.Arrays;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class TestEmailMatchTypeFunction {


@Test
public void testFirstEntryNull() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = null;
String second = "xyz321@pqr.co";
assertEquals(expected.call(first,second.split("@",0)[0]), emailMatchFn.call(first, second));
}

@Test
public void testFirstEntryEmpty() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "";
String second = "xyz321@pqr.co";
assertEquals(expected.call(first.split("@",0)[0],second.split("@",0)[0]), emailMatchFn.call(first, second));
}

@Test
public void testSecondEntryNull() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "xyz321@pqr.co";
String second = null;
assertEquals(expected.call(first.split("@",0)[0],second), emailMatchFn.call(first,second));
}

@Test
public void testSecondEntryEmpty() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "xyz321@pqr.co";
String second = "";
assertEquals(expected.call(first.split("@",0)[0],second.split("@",0)[0]), emailMatchFn.call(first,second));
}
@Test
public void testBothEmpty() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "";
String second = "";
assertEquals(expected.call(first.split("@",0)[0],second.split("@",0)[0]), emailMatchFn.call(first,second));
}

@Test
public void testBothNull() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
//AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = null;
String second = null;
assertEquals(1d, emailMatchFn.call(first,second));
}

@Test
public void testBothExact() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
//AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "xyz321@pqr.co";
String second = "xyz321@pqr.co";
assertEquals(1d, emailMatchFn.call(first,second));
}

@Test
public void testbothDifferent() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "xyz321@pqr.co";
String second = "pqr981@abc.in";
assertEquals(expected.call(first.split("@",0)[0],second.split("@",0)[0]), emailMatchFn.call(first,second));
}

@Test
public void testFirstPartMatch() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
//AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "pqr981@abc.in";
String second = "pqr981@xyz.com";
assertEquals(1d, emailMatchFn.call(first,second));
}

@Test
public void testFirstPartDifferentSecondPartMatch() {
EmailMatchTypeFunction emailMatchFn = new EmailMatchTypeFunction();
AffineGapSimilarityFunction expected = new AffineGapSimilarityFunction();
String first = "pqr981@xyz.com";
String second = "aqr981@xyz.com";
assertEquals(expected.call(first.split("@",0)[0],second.split("@",0)[0]), emailMatchFn.call(first,second));
}

}

0 comments on commit c38df65

Please sign in to comment.