Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move populate test data to compile scope #5138

Merged
merged 1 commit into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/populate-it-data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ jobs:
run: scripts/gha_setup.sh
env:
CLOUDSQL_SQLSERVER_PASSWORD: ${{ secrets.CLOUDSQL_SQLSERVER_PASSWORD }}
- name: Temporarily remove conflicting Scala classes # some IT test classes won't compile because they depend on macros based on test data that's missing
run: find integration/src/test/scala/com/spotify/scio/bigquery -type f ! -name 'PopulateTestData.scala' -delete
- name: Populate BQ test data
run: sbt "integration/Test/runMain com.spotify.scio.bigquery.PopulateTestData"
- name: Populate GCS test data
run: sbt "integration/Test/runMain com.spotify.scio.PopulateTestData"
run: sbt "integration/runMain com.spotify.scio.PopulateTestData"
- name: Populate BQ test data
run: sbt "integration/runMain com.spotify.scio.bigquery.PopulateTestData"
- name: Populate SQL test data
run: sbt "integration/runMain com.spotify.scio.jdbc.PopulateTestData"
28 changes: 21 additions & 7 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -1415,14 +1415,14 @@ lazy val `scio-redis` = project
lazy val integration = project
.in(file("integration"))
.dependsOn(
`scio-core` % "test->provided,test",
`scio-core` % "compile",
`scio-avro` % "test->test",
`scio-test` % "test->test",
`scio-cassandra3` % "test->test",
`scio-elasticsearch8` % "test->test",
`scio-extra` % "test->test",
`scio-google-cloud-platform` % "test->test",
`scio-jdbc` % "test->test",
`scio-google-cloud-platform` % "compile;test->test",
`scio-jdbc` % "compile;test->test",
`scio-neo4j` % "test->test",
`scio-smb` % "test->provided,test"
)
Expand All @@ -1433,17 +1433,31 @@ lazy val integration = project
publish / skip := true,
mimaPreviousArtifacts := Set.empty,
libraryDependencies ++= Seq(
// compile
"com.google.api-client" % "google-api-client" % googleApiClientVersion,
"com.google.apis" % "google-api-services-bigquery" % googleApiServicesBigQueryVersion,
"com.google.guava" % "guava" % guavaVersion,
"com.google.http-client" % "google-http-client" % googleHttpClientsVersion,
"com.google.protobuf" % "protobuf-java" % protobufVersion,
"com.microsoft.sqlserver" % "mssql-jdbc" % "12.4.2.jre11",
"joda-time" % "joda-time" % jodaTimeVersion,
"org.apache.avro" % "avro" % avroVersion,
"org.apache.beam" % "beam-sdks-java-core" % beamVersion,
"org.apache.beam" % "beam-sdks-java-io-google-cloud-platform" % beamVersion,
"org.slf4j" % "slf4j-api" % slf4jVersion,
// runtime
"com.google.cloud.sql" % "cloud-sql-connector-jdbc-sqlserver" % "1.15.0" % Runtime,
"org.apache.beam" % "beam-runners-direct-java" % beamVersion % Runtime,
"org.slf4j" % "slf4j-simple" % slf4jVersion % Runtime,
// test
"com.dimafeng" %% "testcontainers-scala-elasticsearch" % testContainersVersion % Test,
"com.dimafeng" %% "testcontainers-scala-neo4j" % testContainersVersion % Test,
"com.dimafeng" %% "testcontainers-scala-scalatest" % testContainersVersion % Test,
"com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion % Test,
"com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion % Test,
"com.google.cloud.sql" % "cloud-sql-connector-jdbc-sqlserver" % "1.15.0" % Test,
"com.microsoft.sqlserver" % "mssql-jdbc" % "12.4.2.jre11" % Test,
"com.spotify" %% "magnolify-datastore" % magnolifyVersion % Test,
"org.apache.beam" % "beam-sdks-java-io-google-cloud-platform" % beamVersion % Test,
"org.slf4j" % "slf4j-simple" % slf4jVersion % Test
"org.apache.beam" % "beam-runners-google-cloud-dataflow-java" % beamVersion % Test,
"org.apache.beam" % "beam-sdks-java-io-google-cloud-platform" % beamVersion % Test
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,10 @@ import scala.jdk.CollectionConverters._
object PopulateTestData {
private lazy val log = LoggerFactory.getLogger(getClass)

def main(args: Array[String]): Unit = {
populateFiles("data-integration-test-eu")
populateSql()
}
def main(args: Array[String]): Unit =
populate("data-integration-test-eu")

def populateFiles(bucket: String): Unit = {
def populate(bucket: String): Unit = {
FileSystems.setDefaultPipelineOptions(PipelineOptionsFactory.create())

val root = Paths.get("src/test/resources")
Expand All @@ -53,39 +51,4 @@ object PopulateTestData {
log.info(s"Populated file $resourceId.")
}
}

// See https://learn.microsoft.com/en-us/sql/connect/ado-net/sql/compare-guid-uniqueidentifier-values?view=sql-server-ver16
def populateSql(): Unit = {
import com.spotify.scio.jdbc.sharded.JdbcUtils
import com.spotify.scio.jdbc.JdbcIOIT._

val conn = JdbcUtils.createConnection(connection)
try {
val stmt = conn.createStatement()
val query =
s"""DROP TABLE IF EXISTS $tableId;
|CREATE TABLE $tableId
|(
| guid UNIQUEIDENTIFIER
| CONSTRAINT guid_default DEFAULT
| NEWSEQUENTIALID() ROWGUIDCOL,
| name VARCHAR(60),
|
| CONSTRAINT guid_pk PRIMARY KEY (guid)
|);
|INSERT INTO $tableId (guid, name)
|VALUES
| (CAST('3AAAAAAA-BBBB-CCCC-DDDD-2EEEEEEEEEEE' AS UNIQUEIDENTIFIER), 'Bob'),
| (CAST('2AAAAAAA-BBBB-CCCC-DDDD-1EEEEEEEEEEE' AS UNIQUEIDENTIFIER), 'Alice'),
| (CAST('1AAAAAAA-BBBB-CCCC-DDDD-3EEEEEEEEEEE' AS UNIQUEIDENTIFIER), 'Carol');
|""".stripMargin
try {
stmt.execute(query)
} finally {
stmt.close()
}
} finally {
conn.close()
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright 2024 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package com.spotify.scio.jdbc

import com.spotify.scio.jdbc.sharded.JdbcUtils

object PopulateTestData {
def main(args: Array[String]): Unit =
populate()

// See https://learn.microsoft.com/en-us/sql/connect/ado-net/sql/compare-guid-uniqueidentifier-values?view=sql-server-ver16
def populate(): Unit = {

val conn = JdbcUtils.createConnection(SqlServer.connection)
try {
val stmt = conn.createStatement()
val query =
s"""DROP TABLE IF EXISTS employee;
|CREATE TABLE employee
|(
| guid UNIQUEIDENTIFIER
| CONSTRAINT guid_default DEFAULT
| NEWSEQUENTIALID() ROWGUIDCOL,
| name VARCHAR(60),
|
| CONSTRAINT guid_pk PRIMARY KEY (guid)
|);
|INSERT INTO employee (guid, name)
|VALUES
| (CAST('3AAAAAAA-BBBB-CCCC-DDDD-2EEEEEEEEEEE' AS UNIQUEIDENTIFIER), 'Bob'),
| (CAST('2AAAAAAA-BBBB-CCCC-DDDD-1EEEEEEEEEEE' AS UNIQUEIDENTIFIER), 'Alice'),
| (CAST('1AAAAAAA-BBBB-CCCC-DDDD-3EEEEEEEEEEE' AS UNIQUEIDENTIFIER), 'Carol');
|""".stripMargin
try {
stmt.execute(query)
} finally {
stmt.close()
}
} finally {
conn.close()
}
}
}
21 changes: 21 additions & 0 deletions integration/src/main/scala/com/spotify/scio/jdbc/SqlServer.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package com.spotify.scio.jdbc

object SqlServer {
val projectId = "data-integration-test"
val regionId = "us-central1"
val sqlServerInstanceId = "scio-sql-server-it"
val databaseId = "shard-it"
val username = "sqlserver"
val password = sys.props.get("cloudsql.sqlserver.password")

val connection = JdbcConnectionOptions(
username,
password,
s"jdbc:sqlserver://localhost;" +
"socketFactoryClass=com.google.cloud.sql.sqlserver.SocketFactory;" +
s"socketFactoryConstructorArg=$projectId:$regionId:$sqlServerInstanceId;" +
s"databaseName=$databaseId;" +
"encrypt=false", // otherwise we'll have to generate certificates
classOf[com.microsoft.sqlserver.jdbc.SQLServerDriver]
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.scalatest.matchers.should.Matchers
import scala.concurrent.Await
import scala.concurrent.duration.Duration

// scio-test/it:runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
// integration/runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
final class AvroTapIT extends AnyFlatSpec with Matchers {
private val schema = new Parser().parse("""{
| "type" : "record",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.scalatest.matchers.should.Matchers
import scala.annotation.StaticAnnotation
import scala.reflect.runtime.universe._

// scio-test/it:runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
// integration/runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
object AvroTypeIT {
@AvroType.fromPath(
"gs://data-integration-test-eu/avro-integration-test/folder-a/folder-b/shakespeare.avro"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.scalatest.flatspec.AnyFlatSpec
import scala.jdk.CollectionConverters._
import scala.util.Success

// scio-test/it:runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
// integration/runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
class BigQueryClientIT extends AnyFlatSpec with Matchers {
private[this] val bq = BigQuery.defaultInstance()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.spotify.scio.bigquery.client.BigQuery
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpec

// scio-test/it:runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
// integration/runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
class BigQueryPartitionUtilIT extends AnyFlatSpec with Matchers {
val bq: BigQuery = BigQuery.defaultInstance()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.scalatest.matchers.should.Matchers

import scala.jdk.CollectionConverters._

// scio-test/it:runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
// integration/runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
class BigQueryStorageIT extends AnyFlatSpec with Matchers {
import BigQueryStorageIT._

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ object BigQueryTypeIT {
class ShakespeareWithSequentialAnnotations
}

// scio-test/it:runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
// integration/runMain com.spotify.scio.PopulateTestData to re-populate data for integration tests
class BigQueryTypeIT extends AnyFlatSpec with Matchers {
import BigQueryTypeIT._

Expand Down
28 changes: 5 additions & 23 deletions integration/src/test/scala/com/spotify/scio/jdbc/JdbcIOIT.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,18 @@ import com.spotify.scio.testing.PipelineSpec
import org.apache.beam.sdk.options.PipelineOptionsFactory

object JdbcIOIT {
val projectId = "data-integration-test"
val regionId = "us-central1"
val sqlServerInstanceId = "scio-sql-server-it"
val databaseId = "shard-it"
val tableId = "employee"
val username = "sqlserver"
val password = sys.props.get("cloudsql.sqlserver.password")

val connection = JdbcConnectionOptions(
username,
password,
s"jdbc:sqlserver://localhost;" +
"socketFactoryClass=com.google.cloud.sql.sqlserver.SocketFactory;" +
s"socketFactoryConstructorArg=$projectId:$regionId:$sqlServerInstanceId;" +
s"databaseName=$databaseId;" +
"encrypt=false", // otherwise we'll have to generate certificates
classOf[com.microsoft.sqlserver.jdbc.SQLServerDriver]
)

val shardColumn = "guid"
final case class Employee(guid: String, name: String)
}

// integration/runMain com.spotify.scio.jdbc.PopulateTestData to re-populate data for integration tests
class JdbcIOIT extends PipelineSpec {
import JdbcIOIT._

"JdbcIO" should "shard SQL Server on gid" in {
val readOptions = JdbcShardedReadOptions(
connection,
tableId,
shardColumn,
SqlServer.connection,
"employee",
"guid",
Shard.range[SqlServerUuidLowerString],
rs => Employee(rs.getString(1), rs.getString(2)),
numShards = 3
Expand Down