Skip to content

Commit

Permalink
Recognize row IDs hidden in Hadoop partitions
Browse files Browse the repository at this point in the history
  • Loading branch information
elharo committed May 20, 2024
1 parent 83d3c2b commit e80152c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.facebook.presto.spi.security.PrincipalType;
import com.facebook.presto.spi.security.RoleGrant;
import com.facebook.presto.spi.security.SelectedRole;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
Expand Down Expand Up @@ -118,6 +119,7 @@
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.lang.Math.round;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
Expand All @@ -135,6 +137,7 @@ public final class ThriftMetastoreUtil
private static final String PUBLIC_ROLE_NAME = "public";
private static final String ADMIN_ROLE_NAME = "admin";
public static final String LAST_DATA_COMMIT_TIME = "lastDataCommitTime";
public static final String ROW_ID_PARTITION_COMPONENT_KEY = "rowIDPartitionComponent";

private ThriftMetastoreUtil() {}

Expand Down Expand Up @@ -511,6 +514,7 @@ public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metasto
.collect(toList()))
.setParameters(partition.getParameters())
.setCreateTime(partition.getCreateTime())
.setRowIdPartitionComponent(getRowIDPartitionComponent(partition))
.setLastDataCommitTime(getLastDataCommitTime(partition));

// mutate apache partition to Presto partition
Expand Down Expand Up @@ -887,4 +891,17 @@ private static OptionalDouble getAverageColumnLength(OptionalLong totalSizeInByt
}
return OptionalDouble.empty();
}

@VisibleForTesting
static Optional<byte[]> getRowIDPartitionComponent(org.apache.hadoop.hive.metastore.api.Partition partition)
{
if (partition.isSetParameters()) {
Map<String, String> parameters = partition.getParameters();
if (parameters.containsKey(ROW_ID_PARTITION_COMPONENT_KEY)) {
String encoded = parameters.get(ROW_ID_PARTITION_COMPONENT_KEY);
return Optional.of(encoded.getBytes(ISO_8859_1));
}
}
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import static org.apache.hadoop.hive.serde.serdeConstants.DOUBLE_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;

public class TestThriftHiveMetastoreUtil
{
Expand Down Expand Up @@ -388,4 +389,15 @@ public void testGetLastDataCommitTimeFromParams()
partition.setParameters(ImmutableMap.of("lastDataCommitTime", "a"));
assertEquals(ThriftMetastoreUtil.getLastDataCommitTime(partition), 0);
}

@Test
public void testGetRowIDPartitionComponent()
{
Partition partition = new Partition();
assertFalse(ThriftMetastoreUtil.getRowIDPartitionComponent(partition).isPresent());

partition.setParameters(ImmutableMap.of("rowIDPartitionComponent", "\u0000\u0001\u00FF"));
byte[] expected = {0, 1, (byte) 255};
assertEquals(ThriftMetastoreUtil.getRowIDPartitionComponent(partition).get(), expected);
}
}

0 comments on commit e80152c

Please sign in to comment.