Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BEAM-7209][BEAM-9351][BEAM-9428] Upgrade Hive to version 3.1.3 #17749

Merged
merged 5 commits into from
Jun 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
## I/Os

* Support for X source added (Java/Python) ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
* Upgraded to Hive 3.1.3 for HCatalogIO. Users can still provide their own version of Hive. (Java) ([Issue-19554](https://github.com/apache/beam/issues/19554)).

## New Features / Improvements

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,6 @@ class Repositories {
url "https://packages.confluent.io/maven/"
content { includeGroup "io.confluent" }
}

// For pentaho dependencies.
maven {
url "https://public.nexus.pentaho.org/repository/omni"
content { includeGroup "org.pentaho" }
}
}

// Apply a plugin which provides the 'updateOfflineRepository' task that creates an offline
Expand All @@ -76,7 +70,6 @@ class Repositories {
maven { url "https://plugins.gradle.org/m2/" }
maven { url "https://repo.spring.io/plugins-release" }
maven { url "https://packages.confluent.io/maven/" }
maven { url "https://public.nexus.pentaho.org/repository/omni" }
maven { url project.offlineRepositoryRoot }
}
includeSources = false
Expand Down
24 changes: 10 additions & 14 deletions sdks/java/extensions/sql/hcatalog/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,17 @@ applyJavaNature(
],
)

def hive_version = "2.1.0"
def hive_version = "3.1.3"
def netty_version = "4.1.51.Final"

configurations.all {
resolutionStrategy {
// Pin log4j as workaround for CVE-2021-44228
// HIVE-25804 should address this upstream, but only in 4.0
// TODO(BEAM-9351): Upgrade Hive and remove this pin
def log4j_version = "2.17.1"
force "org.apache.logging.log4j:log4j-api:${log4j_version}"
force "org.apache.logging.log4j:log4j-core:${log4j_version}"
force "org.apache.logging.log4j:log4j-slf4j-impl:${log4j_version}"
force "org.apache.logging.log4j:log4j-1.2-api:${log4j_version}"
force "org.apache.logging.log4j:log4j-web:${log4j_version}"
}
}
/*
* We need to rely on manually specifying these evaluationDependsOn to ensure that
* the following projects are evaluated before we evaluate this project. This is because
* we are attempting to reference the "sourceSets.test.output" directly.
* TODO: use testTextures feature which is introduced in Gradle 5.6 instead of
* the test outputs directly.
*/
evaluationDependsOn(":sdks:java:io:hcatalog")

dependencies {
implementation project(":sdks:java:extensions:sql")
Expand All @@ -49,6 +44,7 @@ dependencies {
implementation "com.alibaba:fastjson:1.2.69"
implementation library.java.vendored_guava_26_0_jre

testImplementation project(":sdks:java:io:hcatalog").sourceSets.test.output
// Needed for HCatalogTableProvider tests,
// they use HCat* types
testImplementation "io.netty:netty-all:$netty_version"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ public void testJoinMultipleExtraProvidersWithImplicitHiveDB() throws Exception
pipeline.run();
}

private void reCreateTestTable() throws Exception {
private void reCreateTestTable() {
service.executeQuery("drop table " + TEST_TABLE);
service.executeQuery("create table " + TEST_TABLE + "(f_str string, f_int int)");
}
Expand Down
25 changes: 2 additions & 23 deletions sdks/java/io/hcatalog/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,13 @@ def hadoopVersions = [

hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}

def hive_version = "2.1.0"

test {
// TODO: Get tests to run. Known issues:
// * calcite-avatica bundles w/o repackaging Jackson (CALCITE-1110)
// * hive-exec bundles w/o repackaging Guava (HIVE-13690)
ignoreFailures true
}

configurations.all {
resolutionStrategy {
// Pin log4j as workaround for CVE-2021-44228
// HIVE-25804 should address this upstream, but only in 4.0
// TODO(BEAM-9351): Upgrade Hive and remove this pin
def log4j_version = "2.17.1"
force "org.apache.logging.log4j:log4j-api:${log4j_version}"
force "org.apache.logging.log4j:log4j-core:${log4j_version}"
force "org.apache.logging.log4j:log4j-slf4j-impl:${log4j_version}"
force "org.apache.logging.log4j:log4j-1.2-api:${log4j_version}"
force "org.apache.logging.log4j:log4j-web:${log4j_version}"
}
}
def hive_version = "3.1.3"

/*
* We need to rely on manually specifying these evaluationDependsOn to ensure that
* the following projects are evaluated before we evaluate this project. This is because
* we are attempting to reference the "sourceSets.test.output" directly.
* TODO: Swap to generating test artifacts which we can then rely on instead of
* TODO: use testTextures feature which is introduced in Gradle 5.6 instead of
* the test outputs directly.
*/
evaluationDependsOn(":sdks:java:io:common")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hive.hcatalog.data.DefaultHCatRecord;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.transfer.ReaderContext;
Expand Down Expand Up @@ -369,7 +368,7 @@ private void reCreateTestTable() {
service.executeQuery("create table " + TEST_TABLE + "(mycol1 string, mycol2 int)");
}

private void reCreateTestTableForUnboundedReads() throws CommandNeedRetryException {
private void reCreateTestTableForUnboundedReads() {
service.executeQuery("drop table " + TEST_TABLE);
service.executeQuery(
"create table "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
import org.apache.beam.sdk.annotations.Internal;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.DriverFactory;
import org.apache.hadoop.hive.ql.IDriver;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;

/**
Expand All @@ -40,7 +41,7 @@
*/
@Internal
public final class EmbeddedMetastoreService implements AutoCloseable {
private final Driver driver;
private final IDriver driver;
private final HiveConf hiveConf;
private final SessionState sessionState;

Expand All @@ -64,21 +65,19 @@ public EmbeddedMetastoreService(String baseDirPath) throws IOException {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true);
hiveConf.setVar(
HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd."
+ "SQLStdHiveAuthorizerFactory");
"org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider");
hiveConf.set("test.tmp.dir", hiveDirPath);

System.setProperty("derby.stream.error.file", "/dev/null");
driver = new Driver(hiveConf);
driver = DriverFactory.newDriver(hiveConf);
sessionState = SessionState.start(new SessionState(hiveConf));
}

/** Executes the passed query on the embedded metastore service. */
public void executeQuery(String query) {
try {
driver.run(query);
} catch (CommandNeedRetryException e) {
throw new RuntimeException(e);
CommandProcessorResponse response = driver.run(query);
if (response.failed()) {
throw new RuntimeException(response.getException());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<!-- Properties for test folders -->
<property>
<name>mapreduce.jobtracker.staging.root.dir</name>
<value>${test.tmp.dir}/cli/mapred/staging</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down Expand Up @@ -63,9 +69,14 @@
<value>true</value>
</property>

<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
<value>jdbc:derby:memory:${test.tmp.dir}/junit_metastore_db;create=true</value>
</property>

<property>
Expand Down Expand Up @@ -136,7 +147,7 @@

<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables, org.apache.hadoop.hive.ql.hooks.MaterializedViewRegistryPropertiesHook</value>
<description>Pre Execute Hook for Tests</description>
</property>

Expand Down Expand Up @@ -260,6 +271,12 @@
<value>hive_admin_user</value>
</property>

<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest</value>
<description>The Hive client authorization manager class name.</description>
</property>

<property>
<name>hive.llap.io.cache.orc.size</name>
<value>8388608</value>
Expand Down Expand Up @@ -291,11 +308,36 @@
<value>true</value>
</property>


<property>
<name>hive.llap.io.allocator.direct</name>
<value>false</value>
</property>

<property>
<name>hive.stats.column.autogather</name>
<value>true</value>
</property>

<property>
<name>hive.materializedview.rewriting</name>
<value>true</value>
</property>

<property>
<name>hive.stats.fetch.bitvector</name>
<value>true</value>
</property>


<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
<value>99</value>
</property>

<property>
<name>hive.query.results.cache.enabled</name>
<value>false</value>
</property>


</configuration>
1 change: 0 additions & 1 deletion vendor/calcite-1_28_0/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ List<String> packagesToRelocate = [
"org.apiguardian.api",
"org.codehaus",
"org.objectweb",
"org.pentaho",
"org.yaml",
]

Expand Down