Skip to content

Commit

Permalink
[iceberg] Tag columns with partition info in DESCRIBE and SHOW COLUMNS
Browse files Browse the repository at this point in the history
  • Loading branch information
imjalpreet committed May 27, 2024
1 parent 2625380 commit bb51436
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_SNAPSHOT_ID;
import static com.facebook.presto.iceberg.IcebergMetadataColumn.DATA_SEQUENCE_NUMBER;
import static com.facebook.presto.iceberg.IcebergMetadataColumn.FILE_PATH;
import static com.facebook.presto.iceberg.IcebergPartitionType.ALL;
import static com.facebook.presto.iceberg.IcebergSessionProperties.isPushdownFilterEnabled;
import static com.facebook.presto.iceberg.IcebergTableProperties.DELETE_MODE;
import static com.facebook.presto.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY;
Expand All @@ -123,6 +124,7 @@
import static com.facebook.presto.iceberg.IcebergUtil.getColumns;
import static com.facebook.presto.iceberg.IcebergUtil.getDeleteMode;
import static com.facebook.presto.iceberg.IcebergUtil.getFileFormat;
import static com.facebook.presto.iceberg.IcebergUtil.getPartitionFields;
import static com.facebook.presto.iceberg.IcebergUtil.getPartitionKeyColumnHandles;
import static com.facebook.presto.iceberg.IcebergUtil.getPartitionSpecsIncludingValidData;
import static com.facebook.presto.iceberg.IcebergUtil.getPartitions;
Expand Down Expand Up @@ -499,16 +501,30 @@ public boolean isLegacyGetLayoutSupported(ConnectorSession session, ConnectorTab

protected List<ColumnMetadata> getColumnMetadatas(Table table)
{
Map<String, List<String>> partitionFields = getPartitionFields(table.spec(), ALL);
return table.schema().columns().stream()
.map(column -> ColumnMetadata.builder()
.setName(column.name())
.setType(toPrestoType(column.type(), typeManager))
.setComment(Optional.ofNullable(column.doc()))
.setHidden(false)
.setExtraInfo(Optional.ofNullable(
partitionFields.containsKey(column.name()) ?
columnExtraInfo(partitionFields.get(column.name())) :
null))
.build())
.collect(toImmutableList());
}

private static String columnExtraInfo(List<String> partitionTransforms)
{
if (partitionTransforms.size() == 1 && partitionTransforms.get(0).equals("identity")) {
return "partition key";
}

return "partition by " + partitionTransforms.stream().collect(Collectors.joining(", "));
}

protected ImmutableMap<String, Object> createMetadataProperties(Table icebergTable)
{
ImmutableMap.Builder<String, Object> properties = ImmutableMap.builder();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg;

public enum IcebergPartitionType
{
IDENTITY,
ALL,
}
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_PARTITION_VALUE;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_SNAPSHOT_ID;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_TABLE_TIMESTAMP;
import static com.facebook.presto.iceberg.IcebergPartitionType.IDENTITY;
import static com.facebook.presto.iceberg.IcebergSessionProperties.isMergeOnReadModeEnabled;
import static com.facebook.presto.iceberg.IcebergTableProperties.getCommitRetries;
import static com.facebook.presto.iceberg.IcebergTableProperties.getFormatVersion;
Expand Down Expand Up @@ -272,16 +273,36 @@ public static long getSnapshotIdAsOfTime(Table table, long millisUtc)
.snapshotId();
}

public static List<IcebergColumnHandle> getColumns(Schema schema, PartitionSpec partitionSpec, TypeManager typeManager)
public static Map<String, List<String>> getPartitionFields(PartitionSpec partitionSpec, IcebergPartitionType partitionType)
{
List<String> partitionFieldNames = new ArrayList<>();
for (int i = 0; i < partitionSpec.fields().size(); i++) {
PartitionField field = partitionSpec.fields().get(i);
if (field.transform().isIdentity()) {
partitionFieldNames.add(field.name());
}
Map<String, List<String>> partitionFields = new HashMap<>();

switch (partitionType) {
case IDENTITY:
for (int i = 0; i < partitionSpec.fields().size(); i++) {
PartitionField field = partitionSpec.fields().get(i);
if (field.transform().isIdentity()) {
partitionFields.put(field.name(), ImmutableList.of(field.transform().toString()));
}
}
break;
case ALL:
for (int i = 0; i < partitionSpec.fields().size(); i++) {
PartitionField field = partitionSpec.fields().get(i);
String sourceColumnName = partitionSpec.schema().findColumnName(field.sourceId());
partitionFields.computeIfAbsent(sourceColumnName, k -> new ArrayList<>())
.add(field.transform().toString());
}
break;
}

return partitionFields;
}

public static List<IcebergColumnHandle> getColumns(Schema schema, PartitionSpec partitionSpec, TypeManager typeManager)
{
Set<String> partitionFieldNames = getPartitionFields(partitionSpec, IDENTITY).keySet();

return schema.columns().stream()
.map(column -> partitionFieldNames.contains(column.name()) ? IcebergColumnHandle.create(column, typeManager, PARTITION_KEY) : IcebergColumnHandle.create(column, typeManager, REGULAR))
.collect(toImmutableList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,42 @@ public void testTruncateTableWithDeleteFiles()
}
}

@Test
public void testShowColumnsForPartitionedTable()
{
// Partitioned Table with only identity partitions
getQueryRunner().execute("CREATE TABLE show_columns_only_identity_partition " +
"(id int," +
" name varchar," +
" team varchar) WITH (partitioning = ARRAY['team'])");

MaterializedResult actual = computeActual("SHOW COLUMNS FROM show_columns_only_identity_partition");

MaterializedResult expectedParametrizedVarchar = resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR)
.row("id", "integer", "", "")
.row("name", "varchar", "", "")
.row("team", "varchar", "partition key", "")
.build();

assertEquals(actual, expectedParametrizedVarchar);

// Partitioned Table with non identity partition transforms
getQueryRunner().execute("CREATE TABLE show_columns_with_non_identity_partition " +
"(id int," +
" name varchar," +
" team varchar) WITH (partitioning = ARRAY['truncate(team, 1)', 'team'])");

actual = computeActual("SHOW COLUMNS FROM show_columns_with_non_identity_partition");

expectedParametrizedVarchar = resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR)
.row("id", "integer", "", "")
.row("name", "varchar", "", "")
.row("team", "varchar", "partition by truncate[1], identity", "")
.build();

assertEquals(actual, expectedParametrizedVarchar);
}

@Override
public void testShowColumns()
{
Expand Down

0 comments on commit bb51436

Please sign in to comment.