Skip to content

Commit

Permalink
Revert "Revert "Preserve case for RowType's field name and JSON conte…
Browse files Browse the repository at this point in the history
…nt when ``CAST``""

This reverts commit 9c13a6a.
  • Loading branch information
hantangwangd committed Apr 26, 2024
1 parent fdc97c1 commit 9a20c79
Show file tree
Hide file tree
Showing 20 changed files with 419 additions and 76 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.common;

public class InvalidTypeDefinitionException
extends RuntimeException
{
public InvalidTypeDefinitionException(String message)
{
this(message, null);
}

public InvalidTypeDefinitionException(Throwable throwable)
{
this(null, throwable);
}

public InvalidTypeDefinitionException(String message, Throwable cause)
{
super(message, cause);
}

@Override
public String getMessage()
{
return super.getMessage();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class SqlFunctionProperties
private final Locale sessionLocale;
private final String sessionUser;
private final boolean fieldNamesInJsonCastEnabled;
private final boolean legacyJsonCast;
private final Map<String, String> extraCredentials;

private SqlFunctionProperties(
Expand All @@ -46,6 +47,7 @@ private SqlFunctionProperties(
Locale sessionLocale,
String sessionUser,
boolean fieldNamesInJsonCastEnabled,
boolean legacyJsonCast,
Map<String, String> extraCredentials)
{
this.parseDecimalLiteralAsDouble = parseDecimalLiteralAsDouble;
Expand All @@ -57,6 +59,7 @@ private SqlFunctionProperties(
this.sessionLocale = requireNonNull(sessionLocale, "sessionLocale is null");
this.sessionUser = requireNonNull(sessionUser, "sessionUser is null");
this.fieldNamesInJsonCastEnabled = fieldNamesInJsonCastEnabled;
this.legacyJsonCast = legacyJsonCast;
this.extraCredentials = requireNonNull(extraCredentials, "extraCredentials is null");
}

Expand Down Expand Up @@ -111,6 +114,11 @@ public boolean isFieldNamesInJsonCastEnabled()
return fieldNamesInJsonCastEnabled;
}

public boolean isLegacyJsonCast()
{
return legacyJsonCast;
}

@Override
public boolean equals(Object o)
{
Expand All @@ -129,13 +137,16 @@ public boolean equals(Object o)
Objects.equals(sessionStartTime, that.sessionStartTime) &&
Objects.equals(sessionLocale, that.sessionLocale) &&
Objects.equals(sessionUser, that.sessionUser) &&
Objects.equals(extraCredentials, that.extraCredentials);
Objects.equals(extraCredentials, that.extraCredentials) &&
Objects.equals(legacyJsonCast, that.legacyJsonCast);
}

@Override
public int hashCode()
{
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, extraCredentials);
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey,
legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser,
extraCredentials, legacyJsonCast);
}

public static Builder builder()
Expand All @@ -154,6 +165,7 @@ public static class Builder
private Locale sessionLocale;
private String sessionUser;
private boolean fieldNamesInJsonCastEnabled;
private boolean legacyJsonCast;
private Map<String, String> extraCredentials = emptyMap();

private Builder() {}
Expand Down Expand Up @@ -218,9 +230,26 @@ public Builder setFieldNamesInJsonCastEnabled(boolean fieldNamesInJsonCastEnable
return this;
}

public Builder setLegacyJsonCast(boolean legacyJsonCast)
{
this.legacyJsonCast = legacyJsonCast;
return this;
}

public SqlFunctionProperties build()
{
return new SqlFunctionProperties(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, fieldNamesInJsonCastEnabled, extraCredentials);
return new SqlFunctionProperties(
parseDecimalLiteralAsDouble,
legacyRowFieldOrdinalAccessEnabled,
timeZoneKey,
legacyTimestamp,
legacyMapSubscript,
sessionStartTime,
sessionLocale,
sessionUser,
fieldNamesInJsonCastEnabled,
legacyJsonCast,
extraCredentials);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public static Field field(Type type)
return new Field(Optional.empty(), type);
}

public static Field field(String name, Type type, boolean delimited)
{
return new Field(Optional.of(name), type, delimited);
}

private static TypeSignature makeSignature(List<Field> fields)
{
int size = fields.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.facebook.drift.annotations.ThriftConstructor;
import com.facebook.drift.annotations.ThriftField;
import com.facebook.drift.annotations.ThriftStruct;
import com.facebook.presto.common.InvalidTypeDefinitionException;
import com.facebook.presto.common.QualifiedObjectName;
import com.facebook.presto.common.type.BigintEnumType.LongEnumMap;
import com.facebook.presto.common.type.VarcharEnumType.VarcharEnumMap;
Expand Down Expand Up @@ -510,6 +511,7 @@ private static TypeSignature parseRowTypeSignature(String signature, Set<String>

List<TypeSignatureParameter> fields = new ArrayList<>();

Set<String> distinctFieldNames = new HashSet<>();
for (int i = StandardTypes.ROW.length() + 1; i < signature.length(); i++) {
char c = signature.charAt(i);
switch (state) {
Expand Down Expand Up @@ -556,13 +558,19 @@ else if (c == ')' && bracketLevel > 1) {
}
else if (c == ')') {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
fields.add(parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters));
TypeSignatureParameter parameter = parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters);
parameter.getNamedTypeSignature().getName()
.ifPresent(fieldName -> checkDuplicateAndAdd(distinctFieldNames, fieldName));
fields.add(parameter);
tokenStart = -1;
state = RowTypeSignatureParsingState.FINISHED;
}
else if (c == ',' && bracketLevel == 1) {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
fields.add(parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters));
TypeSignatureParameter parameter = parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters);
parameter.getNamedTypeSignature().getName()
.ifPresent(fieldName -> checkDuplicateAndAdd(distinctFieldNames, fieldName));
fields.add(parameter);
tokenStart = -1;
state = RowTypeSignatureParsingState.START_OF_FIELD;
}
Expand All @@ -578,6 +586,7 @@ else if (c == ')' && bracketLevel > 1) {
else if (c == ')') {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
verify(delimitedColumnName != null, "Expect delimitedColumnName to be non-null");
checkDuplicateAndAdd(distinctFieldNames, delimitedColumnName);
fields.add(TypeSignatureParameter.of(new NamedTypeSignature(
Optional.of(new RowFieldName(delimitedColumnName, true)),
parseTypeSignature(signature.substring(tokenStart, i).trim(), literalParameters))));
Expand All @@ -588,6 +597,7 @@ else if (c == ')') {
else if (c == ',' && bracketLevel == 1) {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
verify(delimitedColumnName != null, "Expect delimitedColumnName to be non-null");
checkDuplicateAndAdd(distinctFieldNames, delimitedColumnName);
fields.add(TypeSignatureParameter.of(new NamedTypeSignature(
Optional.of(new RowFieldName(delimitedColumnName, true)),
parseTypeSignature(signature.substring(tokenStart, i).trim(), literalParameters))));
Expand All @@ -609,6 +619,13 @@ else if (c == ',' && bracketLevel == 1) {
return new TypeSignature(signature.substring(0, StandardTypes.ROW.length()), fields);
}

private static void checkDuplicateAndAdd(Set<String> fieldNames, String fieldName)
{
if (!fieldNames.add(fieldName)) {
throw new InvalidTypeDefinitionException("Duplicate field: " + fieldName);
}
}

private static TypeSignatureParameter parseTypeOrNamedType(String typeOrNamedType, Set<String> literalParameters)
{
int split = typeOrNamedType.indexOf(' ');
Expand Down
13 changes: 13 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -989,3 +989,16 @@ Logging Properties
* **Default value:** ``100MB``

The maximum file size for the log file of the HTTP server.


Legacy Compatible Properties
------------------------------

``legacy_json_cast``
^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``true``

When casting from ``JSON`` to ``ROW``, ignore the case of field names in ``RowType`` for legacy support so that the matching is case-insensitive.
Set ``legacy_json_cast`` to ``false`` to strictly enforce the case-sensitivity of double quoted field names in ``RowType`` when matching. Matching for unquoted field names remains case-insensitive.
24 changes: 24 additions & 0 deletions presto-docs/src/main/sphinx/functions/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,30 @@ Cast from JSON
SELECT CAST(JSON '{"k1": [1, 23], "k2": 456}' AS MAP(VARCHAR, JSON)); -- {k1 = JSON '[1,23]', k2 = JSON '456'}
SELECT CAST(JSON '[null]' AS ARRAY(JSON)); -- [JSON 'null']

.. note::

When casting from ``JSON`` to ``ROW``, for legacy support the case of double quoted field names
in ``RowType`` is ignored when matching. For example::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, "V3" BOOLEAN)); -- {v1=123, V2=abc, V3=true}

The following statement returns an error due to duplicate field::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v2":"abc2","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, v2 VARCHAR, "V3" BOOLEAN));

To enforce the case of field names in ``RowType`` when casting from ``JSON`` to ``ROW``, set the configuration property ``legacy_json_cast`` to ``false``
in the coordinator and the worker's `configuration properties <../admin/properties.html#legacy-compatible-properties>`_.
After setting the property, the matching is case-sensitive for double quoted field names and
remains case-insensitive for unquoted field names. For example::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, v3 BOOLEAN)); -- {v1=123, V2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "v2" VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=null, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc", "v2":"abc2","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, "V2" VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc2, V2=abc, v3=true}

If the name of a field does not match (including case sensitivity), the value is ``null``.

.. note:: When casting from ``JSON`` to ``ROW``, both JSON array and JSON object are supported.

JSON Functions
Expand Down
3 changes: 3 additions & 0 deletions presto-main/src/main/java/com/facebook/presto/Session.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import java.util.TimeZone;
import java.util.stream.Collectors;

import static com.facebook.presto.SystemSessionProperties.LEGACY_JSON_CAST;
import static com.facebook.presto.SystemSessionProperties.isFieldNameInJsonCastEnabled;
import static com.facebook.presto.SystemSessionProperties.isLegacyMapSubscript;
import static com.facebook.presto.SystemSessionProperties.isLegacyRowFieldOrdinalAccessEnabled;
Expand Down Expand Up @@ -496,6 +497,7 @@ public ConnectorSession toConnectorSession()

public SqlFunctionProperties getSqlFunctionProperties()
{
boolean legacyJsonCast = this.sessionPropertyManager.decodeSystemPropertyValue(LEGACY_JSON_CAST, null, Boolean.class);
return SqlFunctionProperties.builder()
.setTimeZoneKey(timeZoneKey)
.setLegacyRowFieldOrdinalAccessEnabled(isLegacyRowFieldOrdinalAccessEnabled(this))
Expand All @@ -506,6 +508,7 @@ public SqlFunctionProperties getSqlFunctionProperties()
.setSessionLocale(getLocale())
.setSessionUser(getUser())
.setFieldNamesInJsonCastEnabled(isFieldNameInJsonCastEnabled(this))
.setLegacyJsonCast(legacyJsonCast)
.setExtraCredentials(identity.getExtraCredentials())
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ public final class SystemSessionProperties
public static final String ADD_PARTIAL_NODE_FOR_ROW_NUMBER_WITH_LIMIT = "add_partial_node_for_row_number_with_limit";
public static final String REWRITE_CASE_TO_MAP_ENABLED = "rewrite_case_to_map_enabled";
public static final String FIELD_NAMES_IN_JSON_CAST_ENABLED = "field_names_in_json_cast_enabled";
public static final String LEGACY_JSON_CAST = "legacy_json_cast";
public static final String PULL_EXPRESSION_FROM_LAMBDA_ENABLED = "pull_expression_from_lambda_enabled";
public static final String REWRITE_CONSTANT_ARRAY_CONTAINS_TO_IN_EXPRESSION = "rewrite_constant_array_contains_to_in_expression";
public static final String INFER_INEQUALITY_PREDICATES = "infer_inequality_predicates";
Expand Down Expand Up @@ -1753,6 +1754,11 @@ public SystemSessionProperties(
"Include field names in json output when casting rows",
featuresConfig.isFieldNamesInJsonCastEnabled(),
false),
booleanProperty(
LEGACY_JSON_CAST,
"Keep the legacy json cast behavior, do not reserve the case for field names when casting to row type",
featuresConfig.isLegacyJsonCast(),
false),
booleanProperty(
OPTIMIZE_JOIN_PROBE_FOR_EMPTY_BUILD_RUNTIME,
"Optimize join probe at runtime if build side is empty",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public static Block toArray(ArrayType arrayType, BlockBuilderAppender elementApp
}
BlockBuilder blockBuilder = arrayType.getElementType().createBlockBuilder(null, 20);
while (jsonParser.nextToken() != JsonToken.END_ARRAY) {
elementAppender.append(jsonParser, blockBuilder);
elementAppender.append(jsonParser, blockBuilder, properties);
}
if (jsonParser.nextToken() != null) {
throw new JsonCastException(format("Unexpected trailing token: %s", jsonParser.getText()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ public static Block toMap(MapType mapType, BlockBuilderAppender keyAppender, Blo
HashTable hashTable = new HashTable(mapType.getKeyType(), singleMapBlockBuilder);
int position = 0;
while (jsonParser.nextToken() != JsonToken.END_OBJECT) {
keyAppender.append(jsonParser, singleMapBlockBuilder);
keyAppender.append(jsonParser, singleMapBlockBuilder, properties);
jsonParser.nextToken();
valueAppender.append(jsonParser, singleMapBlockBuilder);
valueAppender.append(jsonParser, singleMapBlockBuilder, properties);

// Duplicate key detection is required even if the JSON is valid.
// For example: CAST(JSON '{"1": 1, "01": 2}' AS MAP<INTEGER, INTEGER>).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

import java.lang.invoke.MethodHandle;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.common.type.TypeSignature.parseTypeSignature;
import static com.facebook.presto.operator.scalar.ScalarFunctionImplementationChoice.ArgumentProperty.valueTypeArgumentProperty;
Expand All @@ -48,7 +46,6 @@
import static com.facebook.presto.util.JsonUtil.JSON_FACTORY;
import static com.facebook.presto.util.JsonUtil.canCastFromJson;
import static com.facebook.presto.util.JsonUtil.createJsonParser;
import static com.facebook.presto.util.JsonUtil.getFieldNameToIndex;
import static com.facebook.presto.util.JsonUtil.parseJsonToSingleRowBlock;
import static com.facebook.presto.util.JsonUtil.truncateIfNecessaryForErrorMessage;
import static com.facebook.presto.util.Reflection.methodHandle;
Expand All @@ -61,7 +58,7 @@ public class JsonToRowCast
extends SqlOperator
{
public static final JsonToRowCast JSON_TO_ROW = new JsonToRowCast();
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender[].class, Optional.class, SqlFunctionProperties.class, Slice.class);
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender[].class, SqlFunctionProperties.class, Slice.class);

private JsonToRowCast()
{
Expand All @@ -83,7 +80,7 @@ public BuiltInScalarFunctionImplementation specialize(BoundVariables boundVariab
BlockBuilderAppender[] fieldAppenders = rowFields.stream()
.map(rowField -> createBlockBuilderAppender(rowField.getType()))
.toArray(BlockBuilderAppender[]::new);
MethodHandle methodHandle = METHOD_HANDLE.bindTo(rowType).bindTo(fieldAppenders).bindTo(getFieldNameToIndex(rowFields));
MethodHandle methodHandle = METHOD_HANDLE.bindTo(rowType).bindTo(fieldAppenders);
return new BuiltInScalarFunctionImplementation(
true,
ImmutableList.of(valueTypeArgumentProperty(RETURN_NULL_ON_NULL)),
Expand All @@ -94,7 +91,6 @@ public BuiltInScalarFunctionImplementation specialize(BoundVariables boundVariab
public static Block toRow(
RowType rowType,
BlockBuilderAppender[] fieldAppenders,
Optional<Map<String, Integer>> fieldNameToIndex,
SqlFunctionProperties properties,
Slice json)
{
Expand All @@ -113,7 +109,8 @@ public static Block toRow(
jsonParser,
(SingleRowBlockWriter) rowBlockBuilder.beginBlockEntry(),
fieldAppenders,
fieldNameToIndex);
rowType,
properties);
rowBlockBuilder.closeEntry();

if (jsonParser.nextToken() != null) {
Expand Down
Loading

0 comments on commit 9a20c79

Please sign in to comment.