Skip to content

Commit

Permalink
Preserve case for RowType's field name and JSON content when CAST
Browse files Browse the repository at this point in the history
Preserve the case of double quoted field names in RowType. When casting
from ``JSON`` to ``ROW``, treat the double quoted field name as case
sensitive and unquoted field name as case insensitive in field
matching.

Add a config property `legacy_json_cast` to control and support the
legacy behavior which do not enforce the case when matching.
  • Loading branch information
hantangwangd committed Apr 11, 2024
1 parent 7bbb2de commit a7a560a
Show file tree
Hide file tree
Showing 20 changed files with 414 additions and 76 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.common;

public class InvalidTypeDefinitionException
extends RuntimeException
{
public InvalidTypeDefinitionException(String message)
{
this(message, null);
}

public InvalidTypeDefinitionException(Throwable throwable)
{
this(null, throwable);
}

public InvalidTypeDefinitionException(String message, Throwable cause)
{
super(message, cause);
}

@Override
public String getMessage()
{
return super.getMessage();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class SqlFunctionProperties
private final Locale sessionLocale;
private final String sessionUser;
private final boolean fieldNamesInJsonCastEnabled;
private final boolean legacyJsonCast;
private final Map<String, String> extraCredentials;

private SqlFunctionProperties(
Expand All @@ -46,6 +47,7 @@ private SqlFunctionProperties(
Locale sessionLocale,
String sessionUser,
boolean fieldNamesInJsonCastEnabled,
boolean legacyJsonCast,
Map<String, String> extraCredentials)
{
this.parseDecimalLiteralAsDouble = parseDecimalLiteralAsDouble;
Expand All @@ -57,6 +59,7 @@ private SqlFunctionProperties(
this.sessionLocale = requireNonNull(sessionLocale, "sessionLocale is null");
this.sessionUser = requireNonNull(sessionUser, "sessionUser is null");
this.fieldNamesInJsonCastEnabled = fieldNamesInJsonCastEnabled;
this.legacyJsonCast = legacyJsonCast;
this.extraCredentials = requireNonNull(extraCredentials, "extraCredentials is null");
}

Expand Down Expand Up @@ -111,6 +114,11 @@ public boolean isFieldNamesInJsonCastEnabled()
return fieldNamesInJsonCastEnabled;
}

public boolean isLegacyJsonCast()
{
return legacyJsonCast;
}

@Override
public boolean equals(Object o)
{
Expand All @@ -129,13 +137,16 @@ public boolean equals(Object o)
Objects.equals(sessionStartTime, that.sessionStartTime) &&
Objects.equals(sessionLocale, that.sessionLocale) &&
Objects.equals(sessionUser, that.sessionUser) &&
Objects.equals(extraCredentials, that.extraCredentials);
Objects.equals(extraCredentials, that.extraCredentials) &&
Objects.equals(legacyJsonCast, that.legacyJsonCast);
}

@Override
public int hashCode()
{
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, extraCredentials);
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey,
legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser,
extraCredentials, legacyJsonCast);
}

public static Builder builder()
Expand All @@ -154,6 +165,7 @@ public static class Builder
private Locale sessionLocale;
private String sessionUser;
private boolean fieldNamesInJsonCastEnabled;
private boolean legacyJsonCast;
private Map<String, String> extraCredentials = emptyMap();

private Builder() {}
Expand Down Expand Up @@ -218,9 +230,26 @@ public Builder setFieldNamesInJsonCastEnabled(boolean fieldNamesInJsonCastEnable
return this;
}

public Builder setLegacyJsonCast(boolean legacyJsonCast)
{
this.legacyJsonCast = legacyJsonCast;
return this;
}

public SqlFunctionProperties build()
{
return new SqlFunctionProperties(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, fieldNamesInJsonCastEnabled, extraCredentials);
return new SqlFunctionProperties(
parseDecimalLiteralAsDouble,
legacyRowFieldOrdinalAccessEnabled,
timeZoneKey,
legacyTimestamp,
legacyMapSubscript,
sessionStartTime,
sessionLocale,
sessionUser,
fieldNamesInJsonCastEnabled,
legacyJsonCast,
extraCredentials);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public static Field field(Type type)
return new Field(Optional.empty(), type);
}

public static Field field(String name, Type type, boolean delimited)
{
return new Field(Optional.of(name), type, delimited);
}

private static TypeSignature makeSignature(List<Field> fields)
{
int size = fields.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.facebook.drift.annotations.ThriftConstructor;
import com.facebook.drift.annotations.ThriftField;
import com.facebook.drift.annotations.ThriftStruct;
import com.facebook.presto.common.InvalidTypeDefinitionException;
import com.facebook.presto.common.QualifiedObjectName;
import com.facebook.presto.common.type.BigintEnumType.LongEnumMap;
import com.facebook.presto.common.type.VarcharEnumType.VarcharEnumMap;
Expand Down Expand Up @@ -510,6 +511,7 @@ private static TypeSignature parseRowTypeSignature(String signature, Set<String>

List<TypeSignatureParameter> fields = new ArrayList<>();

Set<String> distinctFieldNames = new HashSet<>();
for (int i = StandardTypes.ROW.length() + 1; i < signature.length(); i++) {
char c = signature.charAt(i);
switch (state) {
Expand Down Expand Up @@ -556,13 +558,19 @@ else if (c == ')' && bracketLevel > 1) {
}
else if (c == ')') {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
fields.add(parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters));
TypeSignatureParameter parameter = parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters);
parameter.getNamedTypeSignature().getName()
.ifPresent(fieldName -> checkDuplicateAndAdd(distinctFieldNames, fieldName));
fields.add(parameter);
tokenStart = -1;
state = RowTypeSignatureParsingState.FINISHED;
}
else if (c == ',' && bracketLevel == 1) {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
fields.add(parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters));
TypeSignatureParameter parameter = parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters);
parameter.getNamedTypeSignature().getName()
.ifPresent(fieldName -> checkDuplicateAndAdd(distinctFieldNames, fieldName));
fields.add(parameter);
tokenStart = -1;
state = RowTypeSignatureParsingState.START_OF_FIELD;
}
Expand All @@ -578,6 +586,7 @@ else if (c == ')' && bracketLevel > 1) {
else if (c == ')') {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
verify(delimitedColumnName != null, "Expect delimitedColumnName to be non-null");
checkDuplicateAndAdd(distinctFieldNames, delimitedColumnName);
fields.add(TypeSignatureParameter.of(new NamedTypeSignature(
Optional.of(new RowFieldName(delimitedColumnName, true)),
parseTypeSignature(signature.substring(tokenStart, i).trim(), literalParameters))));
Expand All @@ -588,6 +597,7 @@ else if (c == ')') {
else if (c == ',' && bracketLevel == 1) {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
verify(delimitedColumnName != null, "Expect delimitedColumnName to be non-null");
checkDuplicateAndAdd(distinctFieldNames, delimitedColumnName);
fields.add(TypeSignatureParameter.of(new NamedTypeSignature(
Optional.of(new RowFieldName(delimitedColumnName, true)),
parseTypeSignature(signature.substring(tokenStart, i).trim(), literalParameters))));
Expand All @@ -609,6 +619,13 @@ else if (c == ',' && bracketLevel == 1) {
return new TypeSignature(signature.substring(0, StandardTypes.ROW.length()), fields);
}

private static void checkDuplicateAndAdd(Set<String> fieldNames, String fieldName)
{
if (!fieldNames.add(fieldName)) {
throw new InvalidTypeDefinitionException("Duplicate field: " + fieldName);
}
}

private static TypeSignatureParameter parseTypeOrNamedType(String typeOrNamedType, Set<String> literalParameters)
{
int split = typeOrNamedType.indexOf(' ');
Expand Down
13 changes: 13 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -989,3 +989,16 @@ Logging Properties
* **Default value:** ``100MB``

The maximum file size for the log file of the HTTP server.


Legacy Compatible Properties
------------------------------

``legacy_json_cast``
^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``false``

When casting from ``JSON`` to ``ROW``, by default the case of double quoted field names in RowType is strictly enforced when matching, and unquoted field name in RowType is case-insensitive when matching.
To ignore the case of field names in RowType when casting from ``JSON`` to ``ROW`` (for legacy support), set config property ``legacy_json_cast`` to ``true``. After setting the property, the matching would be always case-insensitive.
25 changes: 25 additions & 0 deletions presto-docs/src/main/sphinx/functions/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,31 @@ Cast from JSON
SELECT CAST(JSON '{"k1": [1, 23], "k2": 456}' AS MAP(VARCHAR, JSON)); -- {k1 = JSON '[1,23]', k2 = JSON '456'}
SELECT CAST(JSON '[null]' AS ARRAY(JSON)); -- [JSON 'null']

.. note::

When casting from ``JSON`` to ``ROW``, by default the case of double quoted field names
in RowType is strictly enforced when matching, and unquoted field name in RowType is
case-insensitive when matching. For example::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, v3 BOOLEAN)); -- {v1=123, V2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "v2" VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=null, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc", "v2":"abc2","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, "V2" VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc2, V2=abc, v3=true}

If the name of a field does not match (including case sensitivity), the value is null.

To ignore the case of field names in RowType when casting from ``JSON`` to ``ROW``
(for example, for legacy support) set config property ``legacy_json_cast`` to ``true``
in the coordinator and the worker's `config properties <../admin/properties.html#legacy-compatible-properties>`_.
After setting the property, the matching would be case-insensitive as the following example::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, "V3" BOOLEAN)); -- {v1=123, V2=abc, V3=true}

The following statement returns an error due to duplicate field names in RowType::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v2":"abc2","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, v2 VARCHAR, "V3" BOOLEAN));

.. note:: When casting from ``JSON`` to ``ROW``, both JSON array and JSON object are supported.

JSON Functions
Expand Down
3 changes: 3 additions & 0 deletions presto-main/src/main/java/com/facebook/presto/Session.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import java.util.TimeZone;
import java.util.stream.Collectors;

import static com.facebook.presto.SystemSessionProperties.LEGACY_JSON_CAST;
import static com.facebook.presto.SystemSessionProperties.isFieldNameInJsonCastEnabled;
import static com.facebook.presto.SystemSessionProperties.isLegacyMapSubscript;
import static com.facebook.presto.SystemSessionProperties.isLegacyRowFieldOrdinalAccessEnabled;
Expand Down Expand Up @@ -496,6 +497,7 @@ public ConnectorSession toConnectorSession()

public SqlFunctionProperties getSqlFunctionProperties()
{
boolean legacyJsonCast = this.sessionPropertyManager.decodeSystemPropertyValue(LEGACY_JSON_CAST, null, Boolean.class);
return SqlFunctionProperties.builder()
.setTimeZoneKey(timeZoneKey)
.setLegacyRowFieldOrdinalAccessEnabled(isLegacyRowFieldOrdinalAccessEnabled(this))
Expand All @@ -506,6 +508,7 @@ public SqlFunctionProperties getSqlFunctionProperties()
.setSessionLocale(getLocale())
.setSessionUser(getUser())
.setFieldNamesInJsonCastEnabled(isFieldNameInJsonCastEnabled(this))
.setLegacyJsonCast(legacyJsonCast)
.setExtraCredentials(identity.getExtraCredentials())
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ public final class SystemSessionProperties
public static final String ADD_PARTIAL_NODE_FOR_ROW_NUMBER_WITH_LIMIT = "add_partial_node_for_row_number_with_limit";
public static final String REWRITE_CASE_TO_MAP_ENABLED = "rewrite_case_to_map_enabled";
public static final String FIELD_NAMES_IN_JSON_CAST_ENABLED = "field_names_in_json_cast_enabled";
public static final String LEGACY_JSON_CAST = "legacy_json_cast";
public static final String PULL_EXPRESSION_FROM_LAMBDA_ENABLED = "pull_expression_from_lambda_enabled";
public static final String REWRITE_CONSTANT_ARRAY_CONTAINS_TO_IN_EXPRESSION = "rewrite_constant_array_contains_to_in_expression";
public static final String INFER_INEQUALITY_PREDICATES = "infer_inequality_predicates";
Expand Down Expand Up @@ -1754,6 +1755,11 @@ public SystemSessionProperties(
"Include field names in json output when casting rows",
featuresConfig.isFieldNamesInJsonCastEnabled(),
false),
booleanProperty(
LEGACY_JSON_CAST,
"Keep the legacy json cast behavior, do not reserve the case for field names when casting to row type",
featuresConfig.isLegacyJsonCast(),
false),
booleanProperty(
OPTIMIZE_JOIN_PROBE_FOR_EMPTY_BUILD_RUNTIME,
"Optimize join probe at runtime if build side is empty",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public static Block toArray(ArrayType arrayType, BlockBuilderAppender elementApp
}
BlockBuilder blockBuilder = arrayType.getElementType().createBlockBuilder(null, 20);
while (jsonParser.nextToken() != JsonToken.END_ARRAY) {
elementAppender.append(jsonParser, blockBuilder);
elementAppender.append(jsonParser, blockBuilder, properties);
}
if (jsonParser.nextToken() != null) {
throw new JsonCastException(format("Unexpected trailing token: %s", jsonParser.getText()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ public static Block toMap(MapType mapType, BlockBuilderAppender keyAppender, Blo
HashTable hashTable = new HashTable(mapType.getKeyType(), singleMapBlockBuilder);
int position = 0;
while (jsonParser.nextToken() != JsonToken.END_OBJECT) {
keyAppender.append(jsonParser, singleMapBlockBuilder);
keyAppender.append(jsonParser, singleMapBlockBuilder, properties);
jsonParser.nextToken();
valueAppender.append(jsonParser, singleMapBlockBuilder);
valueAppender.append(jsonParser, singleMapBlockBuilder, properties);

// Duplicate key detection is required even if the JSON is valid.
// For example: CAST(JSON '{"1": 1, "01": 2}' AS MAP<INTEGER, INTEGER>).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

import java.lang.invoke.MethodHandle;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.common.type.TypeSignature.parseTypeSignature;
import static com.facebook.presto.operator.scalar.ScalarFunctionImplementationChoice.ArgumentProperty.valueTypeArgumentProperty;
Expand All @@ -48,7 +46,6 @@
import static com.facebook.presto.util.JsonUtil.JSON_FACTORY;
import static com.facebook.presto.util.JsonUtil.canCastFromJson;
import static com.facebook.presto.util.JsonUtil.createJsonParser;
import static com.facebook.presto.util.JsonUtil.getFieldNameToIndex;
import static com.facebook.presto.util.JsonUtil.parseJsonToSingleRowBlock;
import static com.facebook.presto.util.JsonUtil.truncateIfNecessaryForErrorMessage;
import static com.facebook.presto.util.Reflection.methodHandle;
Expand All @@ -61,7 +58,7 @@ public class JsonToRowCast
extends SqlOperator
{
public static final JsonToRowCast JSON_TO_ROW = new JsonToRowCast();
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender[].class, Optional.class, SqlFunctionProperties.class, Slice.class);
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender[].class, SqlFunctionProperties.class, Slice.class);

private JsonToRowCast()
{
Expand All @@ -83,7 +80,7 @@ public BuiltInScalarFunctionImplementation specialize(BoundVariables boundVariab
BlockBuilderAppender[] fieldAppenders = rowFields.stream()
.map(rowField -> createBlockBuilderAppender(rowField.getType()))
.toArray(BlockBuilderAppender[]::new);
MethodHandle methodHandle = METHOD_HANDLE.bindTo(rowType).bindTo(fieldAppenders).bindTo(getFieldNameToIndex(rowFields));
MethodHandle methodHandle = METHOD_HANDLE.bindTo(rowType).bindTo(fieldAppenders);
return new BuiltInScalarFunctionImplementation(
true,
ImmutableList.of(valueTypeArgumentProperty(RETURN_NULL_ON_NULL)),
Expand All @@ -94,7 +91,6 @@ public BuiltInScalarFunctionImplementation specialize(BoundVariables boundVariab
public static Block toRow(
RowType rowType,
BlockBuilderAppender[] fieldAppenders,
Optional<Map<String, Integer>> fieldNameToIndex,
SqlFunctionProperties properties,
Slice json)
{
Expand All @@ -113,7 +109,8 @@ public static Block toRow(
jsonParser,
(SingleRowBlockWriter) rowBlockBuilder.beginBlockEntry(),
fieldAppenders,
fieldNameToIndex);
rowType,
properties);
rowBlockBuilder.closeEntry();

if (jsonParser.nextToken() != null) {
Expand Down
Loading

0 comments on commit a7a560a

Please sign in to comment.