Spark version update

satendrakumar · satendrakumar · commit f1cfae9425b0 · 2018-07-01T16:08:04.000+05:30
diff --git a/README.md b/README.md
@@ -1 +1,20 @@
 # spark-data-pipeline
+
+#### Elasticsearch Setup
+  i) [Download](https://www.elastic.co/downloads/elasticsearch) the Elasticsearch 6.3.0 or latest version  and unzip it.
+
+  ii) Run the following command.
+
+        $ bin/elasticsearch
+        
+        
+
+#### Getting Started:
+
+ Clone and run in local mode:
+
+        $ git clone git@github.com:techmonad/spark-data-pipeline.git
+        $ cd spark-data-pipeline
+        $ sbt run
+                
+
diff --git a/build.sbt b/build.sbt
@@ -2,14 +2,15 @@ name := "spark-data-pipeline"
 
 version := "1.0"
 
-scalaVersion := "2.11.8"
+scalaVersion := "2.11.11"
 
 
 libraryDependencies ++= Seq(
-  "org.apache.spark" %% "spark-core" % "2.1.0",
-  "org.elasticsearch" %% "elasticsearch-spark-20" % "5.6.0",
+  "org.apache.spark" %% "spark-core" % "2.3.1",
+  "com.univocity" % "univocity-parsers" % "2.6.4",
+  "org.elasticsearch" %% "elasticsearch-spark-20" % "6.3.0",
   "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0" artifacts(Artifact("stanford-corenlp", "models"), Artifact("stanford-corenlp")),
   "ch.qos.logback" % "logback-classic" % "1.2.3",
-  "org.json4s" %% "json4s-native" % "3.5.0",
+  "org.json4s" %% "json4s-native" % "3.5.4",
   "org.scalatest" %% "scalatest" % "3.0.1"
 )
diff --git a/src/main/scala/com/techmonad/pipeline/DataPipeline.scala b/src/main/scala/com/techmonad/pipeline/DataPipeline.scala
@@ -36,7 +36,7 @@ object DataPipeline {
     }
   }
 
-  private def applySource(source: Source)(implicit sc: SparkContext) = {
+  private def applySource(source: Source)(implicit sc: SparkContext): RDD[Record] = {
     CSVReader.read(source.path)
   }
 
@@ -54,16 +54,14 @@ object DataPipeline {
     transformations match {
       case Nil => rdd
       case head :: tail =>
-        applyTransformation(
-          Transformations.get(head).map { v => rdd.map(v.transform) }.getOrElse(rdd)
-          , tail)
+        applyTransformation(Transformations.get(head).map { v => rdd.map(v.transform) }.getOrElse(rdd), tail)
     }
 
-  private def applySchemaValidation(rdd: RDD[Record], validations: List[String]) = {
+  private def applySchemaValidation(rdd: RDD[Record], validations: List[String]): RDD[Record] = {
     applyValidation(rdd, validations)
   }
 
-  private def applySink(rdd: RDD[Record], sink: Sink) =
+  private def applySink(rdd: RDD[Record], sink: Sink): ESPersistenceRDD =
     sink.`type` match {
       case "ES" => new ESPersistenceRDD(rdd)
     }

Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@ object DataPipeline {`
`36`	`36`	`}`
`37`	`37`	`}`
`38`	`38`
`39`		`- private def applySource(source: Source)(implicit sc: SparkContext) = {`
	`39`	`+ private def applySource(source: Source)(implicit sc: SparkContext): RDD[Record] = {`
`40`	`40`	`CSVReader.read(source.path)`
`41`	`41`	`}`
`42`	`42`
`@@ -54,16 +54,14 @@ object DataPipeline {`
`54`	`54`	`transformations match {`
`55`	`55`	`case Nil => rdd`
`56`	`56`	`case head :: tail =>`
`57`		`- applyTransformation(`
`58`		`- Transformations.get(head).map { v => rdd.map(v.transform) }.getOrElse(rdd)`
`59`		`- , tail)`
	`57`	`+ applyTransformation(Transformations.get(head).map { v => rdd.map(v.transform) }.getOrElse(rdd), tail)`
`60`	`58`	`}`
`61`	`59`
`62`		`- private def applySchemaValidation(rdd: RDD[Record], validations: List[String]) = {`
	`60`	`+ private def applySchemaValidation(rdd: RDD[Record], validations: List[String]): RDD[Record] = {`
`63`	`61`	`applyValidation(rdd, validations)`
`64`	`62`	`}`
`65`	`63`
`66`		`- private def applySink(rdd: RDD[Record], sink: Sink) =`
	`64`	`+ private def applySink(rdd: RDD[Record], sink: Sink): ESPersistenceRDD =`
`67`	`65`	sink.`type` match {
`68`	`66`	`case "ES" => new ESPersistenceRDD(rdd)`
`69`	`67`	`}`