Skip to content

Commit

Permalink
conversion of java df to pandas df
Browse files Browse the repository at this point in the history
  • Loading branch information
sonalgoyal committed May 30, 2022
1 parent 68b22bd commit d624965
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
Binary file modified python/phases/__pycache__/zingg.cpython-39.pyc
Binary file not shown.
20 changes: 14 additions & 6 deletions python/phases/assessModel.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from zingg import *
from pyspark.sql import DataFrame
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from IPython.display import display

args = Arguments()
fname = FieldDefinition("fname","\"string\"",[sc._jvm.zingg.client.MatchType.FUZZY])
lname = FieldDefinition("lname","\"string\"",[sc._jvm.zingg.client.MatchType.FUZZY])
Expand All @@ -20,10 +23,15 @@
client = Client(args, options)
client.init()
client.execute()
mark_spark = client.getMarkedRecords()
mark = mark_spark.select("*").toPandas()
marked = client.getMarkedRecordsStat(mark, value)
matched_marked = client.getMatchedMarkedRecordsStat(mark)
unmatched_marked = client.getUnmatchedMarkedRecordsStat(mark)
unsure_marked = client.getUnsureMarkedRecordsStat(mark)
jMarkedDF = client.getMarkedRecords()
print(jMarkedDF)
markedDF = DataFrame(jMarkedDF, sqlContext)
print(markedDF)
pMarkedDF = markedDF.toPandas()
display(pMarkedDF)

#marked = client.getMarkedRecordsStat(mark, value)
#matched_marked = client.getMatchedMarkedRecordsStat(mark)
#unmatched_marked = client.getUnmatchedMarkedRecordsStat(mark)
#unsure_marked = client.getUnsureMarkedRecordsStat(mark)

0 comments on commit d624965

Please sign in to comment.