-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #270 from navinrathore/generateDocs263
GenerateDocs becomes independent of Data
- Loading branch information
Showing
14 changed files
with
304 additions
and
100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
39 changes: 39 additions & 0 deletions
39
core/src/main/java/zingg/documenter/DataColDocumenter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package zingg.documenter; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.Row; | ||
import org.apache.spark.sql.SparkSession; | ||
import org.apache.spark.sql.types.StructField; | ||
|
||
import zingg.client.Arguments; | ||
import zingg.client.ZinggClientException; | ||
|
||
public class DataColDocumenter extends DocumenterBase { | ||
protected static String name = "zingg.DataColDocumenter"; | ||
public static final Log LOG = LogFactory.getLog(DataColDocumenter.class); | ||
StopWordsDocumenter stopWordsDoc; | ||
|
||
public DataColDocumenter(SparkSession spark, Arguments args) { | ||
super(spark, args); | ||
stopWordsDoc = new StopWordsDocumenter(spark, args); | ||
} | ||
|
||
public void process(Dataset<Row> data) throws ZinggClientException { | ||
createStopWordsDocuments(data); | ||
} | ||
|
||
private void createStopWordsDocuments(Dataset<Row> data) throws ZinggClientException { | ||
if (!data.isEmpty()) { | ||
String columnsDir = args.getZinggDocDir(); | ||
checkAndCreateDir(columnsDir); | ||
|
||
for (StructField field: data.schema().fields()) { | ||
stopWordsDoc.createStopWordsDocument(data, field.name(), columnsDir); | ||
} | ||
} else { | ||
LOG.info("No Stop Words document generated"); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package zingg.documenter; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.Row; | ||
import org.apache.spark.sql.SparkSession; | ||
import org.apache.spark.sql.types.StructField; | ||
|
||
import zingg.client.Arguments; | ||
import zingg.client.ZinggClientException; | ||
import zingg.util.PipeUtil; | ||
|
||
public class DataDocumenter extends DocumenterBase { | ||
protected static String name = "zingg.DataDocumenter"; | ||
protected static String TEMPLATE_TITLE = "Data Documentation"; | ||
private final String DATA_DOC_TEMPLATE = "dataDocTemplate.ftlh"; | ||
|
||
public static final Log LOG = LogFactory.getLog(DataDocumenter.class); | ||
private DataColDocumenter dataColDoc; | ||
private Dataset<Row> data; | ||
|
||
public DataDocumenter(SparkSession spark, Arguments args) { | ||
super(spark, args); | ||
data = spark.emptyDataFrame(); | ||
dataColDoc = new DataColDocumenter(spark, args); | ||
} | ||
|
||
public void process() throws ZinggClientException { | ||
try { | ||
LOG.info("Data document generation starts"); | ||
|
||
try { | ||
data = PipeUtil.read(spark, false, false, args.getData()); | ||
LOG.info("Read input data : " + data.count()); | ||
} catch (ZinggClientException e) { | ||
LOG.warn("No data has been found"); | ||
} | ||
if (!data.isEmpty()) { | ||
createDataDocument(); | ||
dataColDoc.process(data); | ||
} else { | ||
LOG.info("No data document generated"); | ||
} | ||
LOG.info("Data document generation finishes"); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
throw new ZinggClientException(e.getMessage()); | ||
} | ||
} | ||
|
||
private void createDataDocument() throws ZinggClientException { | ||
if (!data.isEmpty()) { | ||
Map<String, Object> root = new HashMap<String, Object>(); | ||
root.put(TemplateFields.TITLE, TEMPLATE_TITLE); | ||
root.put(TemplateFields.MODEL_ID, args.getModelId()); | ||
|
||
List<String[]> list = new ArrayList<String[]> (); | ||
for (StructField field: data.schema().fields()) { | ||
String[] row = new String [3]; | ||
row[0] = field.name(); | ||
row[1] = field.dataType().toString(); | ||
row[2] = field.nullable()? "true": "false"; | ||
list.add(row); | ||
} | ||
root.put(TemplateFields.DATA_FIELDS_LIST, list); | ||
|
||
writeDocument(DATA_DOC_TEMPLATE, root, args.getZinggDataDocFile()); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
55 changes: 55 additions & 0 deletions
55
core/src/main/java/zingg/documenter/ModelColDocumenter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package zingg.documenter; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.Row; | ||
import org.apache.spark.sql.SparkSession; | ||
import org.apache.spark.sql.types.StructField; | ||
|
||
import zingg.client.Arguments; | ||
import zingg.client.ZinggClientException; | ||
|
||
public class ModelColDocumenter extends DocumenterBase { | ||
protected static String name = "zingg.ModelColDocumenter"; | ||
public static final Log LOG = LogFactory.getLog(ModelColDocumenter.class); | ||
|
||
private final String COLUMN_DOC_TEMPLATE = "columnDocTemplate.ftlh"; | ||
private final String Z_COLUMN_TEMPLATE = "zColumnTemplate.ftlh"; | ||
|
||
public ModelColDocumenter(SparkSession spark, Arguments args) { | ||
super(spark, args); | ||
} | ||
|
||
public void process(Dataset<Row> data) throws ZinggClientException { | ||
createColumnDocuments(data); | ||
} | ||
|
||
private void createColumnDocuments(Dataset<Row> data) throws ZinggClientException { | ||
LOG.info("Column Documents generation starts"); | ||
if (!data.isEmpty()) { | ||
String columnsDir = args.getZinggDocDir(); | ||
checkAndCreateDir(columnsDir); | ||
for (StructField field: data.schema().fields()) { | ||
prepareAndWriteColumnDocument(field.name(), columnsDir); | ||
} | ||
} | ||
LOG.info("Column Documents generation finishes"); | ||
} | ||
|
||
private void prepareAndWriteColumnDocument(String fieldName, String columnsDir) throws ZinggClientException { | ||
Map<String, Object> root = new HashMap<String, Object>(); | ||
root.put(TemplateFields.TITLE, fieldName); | ||
root.put(TemplateFields.MODEL_ID, args.getModelId()); | ||
|
||
String filenameHTML = columnsDir + fieldName + ".html"; | ||
if (isZColumn(fieldName)) { | ||
writeDocument(Z_COLUMN_TEMPLATE, root, filenameHTML); | ||
} else { | ||
writeDocument(COLUMN_DOC_TEMPLATE, root, filenameHTML); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.