Skip to content

Commit

Permalink
Add support for fractional weights in approx_percentile
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Chirico authored and martint committed Jul 25, 2019
1 parent 91e2514 commit b87425e
Show file tree
Hide file tree
Showing 11 changed files with 101 additions and 54 deletions.
26 changes: 13 additions & 13 deletions presto-docs/src/main/sphinx/functions/aggregate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -182,29 +182,29 @@ Approximate Aggregate Functions
.. function:: approx_percentile(x, w, percentage) -> [same as x]

Returns the approximate weighed percentile for all input values of ``x``
using the per-item weight ``w`` at the percentage ``p``. The weight must be
an integer value of at least one. It is effectively a replication count for
the value ``x`` in the percentile set. The value of ``p`` must be between
zero and one and must be constant for all input rows.
using the per-item weight ``w`` at the percentage ``p``. Weights must be
strictly positive. Integer-value weights can be thought of as a replication
count for the value ``x`` in the percentile set. The value of ``p`` must be
between zero and one and must be constant for all input rows.

.. function:: approx_percentile(x, w, percentage, accuracy) -> [same as x]

Returns the approximate weighed percentile for all input values of ``x``
using the per-item weight ``w`` at the percentage ``p``, with a maximum rank
error of ``accuracy``. The weight must be an integer value of at least one.
It is effectively a replication count for the value ``x`` in the percentile
set. The value of ``p`` must be between zero and one and must be constant
for all input rows. ``accuracy`` must be a value greater than zero and less
than one, and it must be constant for all input rows.
error of ``accuracy``. Weights must be strictly positive. Integer-value
weights can be thought of as a replication count for the value ``x`` in the
percentile set. The value of ``p`` must be between zero and one and must be
constant for all input rows. ``accuracy`` must be a value greater than zero
and less than one, and it must be constant for all input rows.

.. function:: approx_percentile(x, w, percentages) -> array<[same as x]>

Returns the approximate weighed percentile for all input values of ``x``
using the per-item weight ``w`` at each of the given percentages specified
in the array. The weight must be an integer value of at least one. It is
effectively a replication count for the value ``x`` in the percentile set.
Each element of the array must be between zero and one, and the array must
be constant for all input rows.
in the array. Weights must be strictly positive. Integer-value weights can
be thought of as a replication count for the value ``x`` in the percentile
set. Each element of the array must be between zero and one, and the array
must be constant for all input rows.

.. function:: approx_set(x) -> HyperLogLog
:noindex:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ public static void input(@AggregationState DigestAndPercentileState state, @SqlT
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType(StandardTypes.DOUBLE) double percentile)
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType(StandardTypes.DOUBLE) double percentile)
{
ApproximateLongPercentileAggregations.weightedInput(state, doubleToSortableLong(value), weight, percentile);
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType(StandardTypes.DOUBLE) double percentile, @SqlType(StandardTypes.DOUBLE) double accuracy)
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType(StandardTypes.DOUBLE) double percentile, @SqlType(StandardTypes.DOUBLE) double accuracy)
{
ApproximateLongPercentileAggregations.weightedInput(state, doubleToSortableLong(value), weight, percentile, accuracy);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public static void input(@AggregationState DigestAndPercentileArrayState state,
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileArrayState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType("array(double)") Block percentilesArrayBlock)
public static void weightedInput(@AggregationState DigestAndPercentileArrayState state, @SqlType(StandardTypes.DOUBLE) double value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType("array(double)") Block percentilesArrayBlock)
{
ApproximateLongPercentileArrayAggregations.weightedInput(state, doubleToSortableLong(value), weight, percentilesArrayBlock);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public static void input(@AggregationState DigestAndPercentileState state, @SqlT
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType(StandardTypes.DOUBLE) double percentile)
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType(StandardTypes.DOUBLE) double percentile)
{
checkWeight(weight);

Expand All @@ -75,7 +75,7 @@ public static void weightedInput(@AggregationState DigestAndPercentileState stat
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType(StandardTypes.DOUBLE) double percentile, @SqlType(StandardTypes.DOUBLE) double accuracy)
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType(StandardTypes.DOUBLE) double percentile, @SqlType(StandardTypes.DOUBLE) double accuracy)
{
checkWeight(weight);

Expand Down Expand Up @@ -133,7 +133,7 @@ public static void output(@AggregationState DigestAndPercentileState state, Bloc
}
}

private static void checkWeight(long weight)
private static void checkWeight(double weight)
{
checkCondition(weight > 0, INVALID_FUNCTION_ARGUMENT, "percentile weight must be > 0");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static void input(@AggregationState DigestAndPercentileArrayState state,
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileArrayState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType("array(double)") Block percentilesArrayBlock)
public static void weightedInput(@AggregationState DigestAndPercentileArrayState state, @SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType("array(double)") Block percentilesArrayBlock)
{
initializePercentilesArray(state, percentilesArrayBlock);
initializeDigest(state);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ public static void input(@AggregationState DigestAndPercentileState state, @SqlT
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.REAL) long value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType(StandardTypes.DOUBLE) double percentile)
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.REAL) long value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType(StandardTypes.DOUBLE) double percentile)
{
ApproximateLongPercentileAggregations.weightedInput(state, floatToSortableInt(intBitsToFloat((int) value)), weight, percentile);
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.REAL) long value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType(StandardTypes.DOUBLE) double percentile, @SqlType(StandardTypes.DOUBLE) double accuracy)
public static void weightedInput(@AggregationState DigestAndPercentileState state, @SqlType(StandardTypes.REAL) long value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType(StandardTypes.DOUBLE) double percentile, @SqlType(StandardTypes.DOUBLE) double accuracy)
{
ApproximateLongPercentileAggregations.weightedInput(state, floatToSortableInt(intBitsToFloat((int) value)), weight, percentile, accuracy);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void input(@AggregationState DigestAndPercentileArrayState state,
}

@InputFunction
public static void weightedInput(@AggregationState DigestAndPercentileArrayState state, @SqlType(StandardTypes.REAL) long value, @SqlType(StandardTypes.BIGINT) long weight, @SqlType("array(double)") Block percentilesArrayBlock)
public static void weightedInput(@AggregationState DigestAndPercentileArrayState state, @SqlType(StandardTypes.REAL) long value, @SqlType(StandardTypes.DOUBLE) double weight, @SqlType("array(double)") Block percentilesArrayBlock)
{
ApproximateLongPercentileArrayAggregations.weightedInput(state, floatToSortableInt(intBitsToFloat((int) value)), weight, percentilesArrayBlock);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,15 @@ public static Block createLongRepeatBlock(int value, int length)
return builder.build();
}

public static Block createDoubleRepeatBlock(double value, int length)
{
BlockBuilder builder = DOUBLE.createFixedSizeBlockBuilder(length);
for (int i = 0; i < length; i++) {
DOUBLE.writeDouble(builder, value);
}
return builder.build();
}

public static Block createTimestampsWithTimezoneBlock(Long... values)
{
BlockBuilder builder = TIMESTAMP_WITH_TIME_ZONE.createFixedSizeBlockBuilder(values.length);
Expand Down
Loading

0 comments on commit b87425e

Please sign in to comment.