Skip to content

Commit

Permalink
Fix smb secondary key usages in cherry-picked 0.11.x branch (#4627)
Browse files Browse the repository at this point in the history
* Fix smb secondary key usages in cherry-picked 0.11.x branch

* fmt check
  • Loading branch information
clairemcginty authored Dec 8, 2022
1 parent 6ba7e85 commit c4c51d9
Show file tree
Hide file tree
Showing 8 changed files with 0 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Arrays;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.beam.sdk.extensions.smb;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.core.JsonProcessingException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.api.services.bigquery.model.TableRow;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.beam.sdk.extensions.smb;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.protobuf.ByteString;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
Expand Down Expand Up @@ -375,29 +373,6 @@ public void testKeyTypeCheckingUnionTypes()
illegalUnionSchema2));
}

@Test
public void skipsNullSecondaryKeys()
throws CannotProvideCoderException, NonDeterministicException, IOException {
final AvroBucketMetadata<String, Void, GenericRecord> metadata =
new AvroBucketMetadata<>(
4,
1,
String.class,
"favorite_color",
null,
null,
HashType.MURMUR3_32,
SortedBucketIO.DEFAULT_FILENAME_PREFIX,
AvroGeneratedUser.SCHEMA$);

final ByteArrayOutputStream os = new ByteArrayOutputStream();
BucketMetadata.to(metadata, os);

Assert.assertFalse(os.toString().contains("keyFieldSecondary"));
Assert.assertNull(
((AvroBucketMetadata) BucketMetadata.from(os.toString())).getKeyClassSecondary());
}

private static Schema createUnionRecordOfTypes(Schema.Type... types) {
final List<Schema> typeSchemas = new ArrayList<>();
Arrays.asList(types).forEach(t -> typeSchemas.add(Schema.create(t)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,32 +136,6 @@ public void testFilenamePrefixDefault() throws Exception {
((TensorFlowBucketMetadata) BucketMetadata.from(serializedTensorflow)).getFilenamePrefix());
}

@Test
public void testOldBucketMetadataIgnoresExtraFields() throws Exception {
final int futureVersion = BucketMetadata.CURRENT_VERSION + 1;
final String serializedAvro =
"{\"type\":\"org.apache.beam.sdk.extensions.smb.AvroBucketMetadata\",\"version\":"
+ futureVersion
+ ",\"numBuckets\":2,\"numShards\":1,\"keyClass\":\"java.lang.String\",\"hashType\":\"MURMUR3_32\",\"keyField\":\"user_id\", \"extra_field\":\"foo\"}";
final String serializedJson =
"{\"type\":\"org.apache.beam.sdk.extensions.smb.JsonBucketMetadata\",\"version\":"
+ futureVersion
+ ",\"numBuckets\":2,\"numShards\":1,\"keyClass\":\"java.lang.String\",\"hashType\":\"MURMUR3_32\",\"keyField\":\"user_id\", \"extra_field\":\"foo\"}";
final String serializedTensorflow =
"{\"type\":\"org.apache.beam.sdk.extensions.smb.TensorFlowBucketMetadata\",\"version\":"
+ futureVersion
+ ",\"numBuckets\":2,\"numShards\":1,\"keyClass\":\"java.lang.String\",\"hashType\":\"MURMUR3_32\",\"keyField\":\"user_id\", \"extra_field\":\"foo\"}";

// Assert that no exception is thrown decoding.
Assert.assertEquals(
((JsonBucketMetadata) BucketMetadata.from(serializedJson)).getVersion(), futureVersion);
Assert.assertEquals(
((AvroBucketMetadata) BucketMetadata.from(serializedAvro)).getVersion(), futureVersion);
Assert.assertEquals(
((TensorFlowBucketMetadata) BucketMetadata.from(serializedTensorflow)).getVersion(),
futureVersion);
}

@Test
public void testNullKeyEncoding() throws Exception {
final TestBucketMetadata m =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;

import com.google.api.services.bigquery.model.TableRow;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.extensions.smb.BucketMetadata.HashType;
import org.apache.beam.sdk.transforms.display.DisplayData;
Expand Down Expand Up @@ -178,26 +174,4 @@ public void testSameSourceCompatibility() throws Exception {
Assert.assertTrue(metadata2.isPartitionCompatible(metadata3));
Assert.assertFalse(metadata3.isPartitionCompatible(metadata4));
}

@Test
public void skipsNullSecondaryKeys()
throws CannotProvideCoderException, Coder.NonDeterministicException, IOException {
final JsonBucketMetadata<String, Void> metadata =
new JsonBucketMetadata<>(
4,
1,
String.class,
"favorite_color",
null,
null,
HashType.MURMUR3_32,
SortedBucketIO.DEFAULT_FILENAME_PREFIX);

final ByteArrayOutputStream os = new ByteArrayOutputStream();
BucketMetadata.to(metadata, os);

Assert.assertFalse(os.toString().contains("keyFieldSecondary"));
Assert.assertNull(
((JsonBucketMetadata) BucketMetadata.from(os.toString())).getKeyClassSecondary());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,8 @@
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;

import com.google.protobuf.ByteString;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.Coder.NonDeterministicException;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.hamcrest.MatcherAssert;
Expand Down Expand Up @@ -184,19 +180,4 @@ public void testSameSourceCompatibility() throws Exception {
Assert.assertTrue(metadata2.isPartitionCompatible(metadata3));
Assert.assertFalse(metadata3.isPartitionCompatible(metadata4));
}

@Test
public void skipsNullSecondaryKeys()
throws CannotProvideCoderException, Coder.NonDeterministicException, IOException {
final TensorFlowBucketMetadata<String, Void> metadata =
new TensorFlowBucketMetadata<>(
4, 1, String.class, "bar", HashType.MURMUR3_32, SortedBucketIO.DEFAULT_FILENAME_PREFIX);

final ByteArrayOutputStream os = new ByteArrayOutputStream();
BucketMetadata.to(metadata, os);

Assert.assertFalse(os.toString().contains("keyFieldSecondary"));
Assert.assertNull(
((TensorFlowBucketMetadata) BucketMetadata.from(os.toString())).getKeyClassSecondary());
}
}

0 comments on commit c4c51d9

Please sign in to comment.