Skip to content

Commit

Permalink
Improve modularity of presto-hive-common module
Browse files Browse the repository at this point in the history
Refactoring presto-hive-common module by creating a separate module for
files required by presto-parquet and presto-cache modules to reduce
dependency clutter
  • Loading branch information
abhiseksaikia authored and arhimondr committed May 22, 2024
1 parent b4721ff commit b258543
Show file tree
Hide file tree
Showing 28 changed files with 237 additions and 18 deletions.
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
<module>presto-parquet</module>
<module>presto-rcfile</module>
<module>presto-hive</module>
<module>presto-hdfs-core</module>
<module>presto-hive-common</module>
<module>presto-hive-hadoop2</module>
<module>presto-hive-metastore</module>
Expand Down Expand Up @@ -312,6 +313,12 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hive-common</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion presto-cache/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hive-common</artifactId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<dependency>
Expand Down
5 changes: 5 additions & 0 deletions presto-delta/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@
<artifactId>presto-hive-common</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-parquet</artifactId>
Expand Down
93 changes: 93 additions & 0 deletions presto-hdfs-core/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<artifactId>presto-root</artifactId>
<groupId>com.facebook.presto</groupId>
<version>0.288-SNAPSHOT</version>
</parent>

<properties>
<air.main.basedir>${project.parent.basedir}</air.main.basedir>
</properties>

<artifactId>presto-hdfs-core</artifactId>
<dependencies>
<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-common</artifactId>
</dependency>

<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
<optional>true</optional>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>slice</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-spi</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto.hadoop</groupId>
<artifactId>hadoop-apache2</artifactId>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>units</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</dependency>

<dependency>
<groupId>org.weakref</groupId>
<artifactId>jmxutils</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.drift</groupId>
<artifactId>drift-api</artifactId>
</dependency>

<!-- for testing -->
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.drift</groupId>
<artifactId>drift-codec</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.drift</groupId>
<artifactId>drift-transport-netty</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.airlift</groupId>
<artifactId>http-client</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;

import com.facebook.airlift.http.client.thrift.ThriftProtocolUtils;
import com.facebook.drift.codec.ThriftCodec;
import com.facebook.drift.codec.ThriftCodecManager;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
import io.airlift.units.DataSize;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.testng.annotations.Test;

import java.io.IOException;
import java.util.Optional;

import static com.facebook.drift.transport.netty.codec.Protocol.FB_COMPACT;
import static com.facebook.presto.hive.HiveFileInfo.createHiveFileInfo;
import static io.airlift.units.DataSize.Unit.BYTE;
import static java.lang.Math.toIntExact;
import static org.testng.Assert.assertEquals;

public class TestHiveFileInfo
{
@Test
public void testThriftRoundTrip()
throws IOException
{
ThriftCodec<HiveFileInfo> hiveFileInfoThriftCodec = new ThriftCodecManager().getCodec(HiveFileInfo.class);
HiveFileInfo hiveFileInfo = createHiveFileInfo(new LocatedFileStatus(
100,
false,
0,
0L,
0L,
0L,
null,
null,
null,
null,
new Path("test"),
new org.apache.hadoop.fs.BlockLocation[] {new BlockLocation(new String[1], new String[] {"localhost"}, 0, 100)}),
Optional.empty());

int thriftBufferSize = toIntExact(new DataSize(1000, BYTE).toBytes());
SliceOutput dynamicSliceOutput = new DynamicSliceOutput(thriftBufferSize);
ThriftProtocolUtils.write(hiveFileInfo, hiveFileInfoThriftCodec, FB_COMPACT, dynamicSliceOutput);
byte[] serialized = dynamicSliceOutput.slice().getBytes();

HiveFileInfo copy = ThriftProtocolUtils.read(hiveFileInfoThriftCodec, FB_COMPACT, Slices.wrappedBuffer(serialized).getInput());
assertEquals(copy, hiveFileInfo);
}
}
9 changes: 2 additions & 7 deletions presto-hive-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,14 @@
<artifactId>jackson-annotations</artifactId>
</dependency>

<dependency>
<groupId>org.weakref</groupId>
<artifactId>jmxutils</artifactId>
</dependency>

<dependency>
<groupId>javax.inject</groupId>
<artifactId>javax.inject</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.drift</groupId>
<artifactId>drift-api</artifactId>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<!-- for testing -->
Expand Down
5 changes: 5 additions & 0 deletions presto-hive-metastore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
<artifactId>presto-hive-common</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.airlift</groupId>
<artifactId>configuration</artifactId>
Expand Down
5 changes: 5 additions & 0 deletions presto-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@
<artifactId>presto-hive-common</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hive-metastore</artifactId>
Expand Down
5 changes: 5 additions & 0 deletions presto-hudi/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@
<artifactId>presto-hive-common</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hive-metastore</artifactId>
Expand Down
5 changes: 5 additions & 0 deletions presto-iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
<artifactId>presto-hive-common</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hdfs-core</artifactId>
</dependency>

<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
Expand Down
6 changes: 0 additions & 6 deletions presto-parquet/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,6 @@
<artifactId>slice</artifactId>
</dependency>

<!-- For Presto Hive warning codes -->
<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-hive-common</artifactId>
</dependency>

<!-- for testing -->
<dependency>
<groupId>com.facebook.presto</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.parquet;

import com.facebook.presto.spi.WarningCode;
import com.facebook.presto.spi.WarningCodeSupplier;

public enum ParquetWarningCode
implements WarningCodeSupplier
{
PARQUET_FILE_STATISTICS_CORRUPTION(1),
/**/;
private final WarningCode warningCode;

public static final int WARNING_CODE_MASK = 0x0100_0000;

ParquetWarningCode(int code)
{
warningCode = new WarningCode(code + WARNING_CODE_MASK, name());
}

@Override
public WarningCode toWarningCode()
{
return warningCode;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
import static com.facebook.presto.common.type.SmallintType.SMALLINT;
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.common.type.Varchars.isVarcharType;
import static com.facebook.presto.hive.HiveWarningCode.HIVE_FILE_STATISTICS_CORRUPTION;
import static com.facebook.presto.parquet.ParquetWarningCode.PARQUET_FILE_STATISTICS_CORRUPTION;
import static com.facebook.presto.parquet.predicate.PredicateUtils.isStatisticsOverflow;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Float.floatToRawIntBits;
Expand Down Expand Up @@ -123,7 +123,7 @@ public static Domain getDomain(
}
catch (Exception exception) {
if (warningCollector.isPresent()) {
warningCollector.get().add(new PrestoWarning(HIVE_FILE_STATISTICS_CORRUPTION, format("Corrupted statistics for column \"%s\" in Parquet file \"%s\": [%s]", column.toString(), id, statistics)));
warningCollector.get().add(new PrestoWarning(PARQUET_FILE_STATISTICS_CORRUPTION, format("Corrupted statistics for column \"%s\" in Parquet file \"%s\": [%s]", column.toString(), id, statistics)));
}
return Domain.create(ValueSet.all(type), hasNullValue);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType;
import static com.facebook.presto.common.type.VarcharType.createVarcharType;
import static com.facebook.presto.hive.HiveWarningCode.HIVE_FILE_STATISTICS_CORRUPTION;
import static com.facebook.presto.parquet.ParquetEncoding.PLAIN_DICTIONARY;
import static com.facebook.presto.parquet.ParquetWarningCode.PARQUET_FILE_STATISTICS_CORRUPTION;
import static com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate.getDomain;
import static com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate.getRange;
import static io.airlift.slice.Slices.EMPTY_SLICE;
Expand Down Expand Up @@ -456,7 +456,7 @@ private boolean assertStatsCorruptionWarning(TestingWarningCollector collector,

List<PrestoWarning> warnings = collector.getWarnings();
assertEquals(warnings.size(), 2);
assertEquals(warnings.get(0).getWarningCode(), HIVE_FILE_STATISTICS_CORRUPTION.toWarningCode());
assertEquals(warnings.get(0).getWarningCode(), PARQUET_FILE_STATISTICS_CORRUPTION.toWarningCode());

return true;
}
Expand Down

0 comments on commit b258543

Please sign in to comment.