-
Notifications
You must be signed in to change notification settings - Fork 513
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add opt-in Logical type support to 0.11.x branch (#4792)
* Support logical types in SpecificRecord Parquet writes (#4772) * Reproduce SpecificRecords/logicalType bug * Fix bug * Cleanup test * logical types must be added from worker VM, not launcher * Fix for both reads and writes * cleanup * Don't set supplier if user has explicitly configured one * Test for custom logical type provider * Pass dataModel to AvroParquetWriter.Builder * Only override .withDataModel if Configuration key exists * Isolate SpecificData instances * fix test * update copyright year * cleanup * Add support for scio-smb Parquet writes * Manage config defaults via core-site.xml * Fix formatting * Should be a standalone class * add test * +header * newline * Add remaining Converters * simplify SpecificData creation * Correctly encode logical types in GenericRecord * remove unused import * Make Parquet logical type support opt-in (#4782) * Make logical type support for Parquet-Avro opt-in * Update documentation * conf->jobConf * scalafmt * Update scio-parquet/src/main/scala/com/spotify/scio/parquet/avro/ParquetAvroIO.scala Co-authored-by: Michel Davit <micheld@spotify.com> * Add parens --------- Co-authored-by: Michel Davit <micheld@spotify.com> --------- Co-authored-by: Michel Davit <micheld@spotify.com>
- Loading branch information
1 parent
c4c51d9
commit 5de2e2a
Showing
9 changed files
with
408 additions
and
9 deletions.
There are no files selected for viewing
40 changes: 40 additions & 0 deletions
40
scio-parquet/src/main/java/com/spotify/scio/parquet/avro/LogicalTypeSupplier.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Copyright 2023 Spotify AB. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package com.spotify.scio.parquet.avro; | ||
|
||
import org.apache.avro.Conversions; | ||
import org.apache.avro.data.TimeConversions; | ||
import org.apache.avro.generic.GenericData; | ||
import org.apache.avro.specific.SpecificData; | ||
import org.apache.parquet.avro.SpecificDataSupplier; | ||
|
||
/** A SpecificDataSupplier that supplies built-in conversions for Avro LogicalTypes. */ | ||
public class LogicalTypeSupplier extends SpecificDataSupplier { | ||
@Override | ||
public GenericData get() { | ||
SpecificData specificData = new SpecificData(); | ||
specificData.addLogicalTypeConversion(new TimeConversions.DateConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimeConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimestampConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion()); | ||
specificData.addLogicalTypeConversion(new Conversions.DecimalConversion()); | ||
specificData.addLogicalTypeConversion(new Conversions.UUIDConversion()); | ||
return specificData; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
scio-parquet/src/main/scala/com/spotify/scio/parquet/package.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Copyright 2022 Spotify AB | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.spotify.scio | ||
|
||
import org.apache.hadoop.conf.Configuration | ||
|
||
package object parquet { | ||
object ParquetConfiguration { | ||
def of(entries: (String, Any)*): Configuration = { | ||
val conf = new Configuration() | ||
entries.foreach { case (k, v) => | ||
v match { | ||
case b: Boolean => conf.setBoolean(k, b) | ||
case f: Float => conf.setFloat(k, f) | ||
case d: Double => conf.setDouble(k, d) | ||
case i: Int => conf.setInt(k, i) | ||
case l: Long => conf.setLong(k, l) | ||
case s: String => conf.set(k, s) | ||
case c: Class[_] => conf.setClass(k, c, c) | ||
case _ => conf.set(k, v.toString) | ||
} | ||
} | ||
conf | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
scio-smb/src/main/java/org/apache/beam/sdk/extensions/smb/AvroLogicalTypeSupplier.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Copyright 2023 Spotify AB. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.beam.sdk.extensions.smb; | ||
|
||
import org.apache.avro.Conversions; | ||
import org.apache.avro.data.TimeConversions; | ||
import org.apache.avro.generic.GenericData; | ||
import org.apache.avro.specific.SpecificData; | ||
import org.apache.parquet.avro.SpecificDataSupplier; | ||
|
||
public class AvroLogicalTypeSupplier extends SpecificDataSupplier { | ||
@Override | ||
public GenericData get() { | ||
SpecificData specificData = new SpecificData(); | ||
specificData.addLogicalTypeConversion(new TimeConversions.DateConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimeConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimestampConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion()); | ||
specificData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion()); | ||
specificData.addLogicalTypeConversion(new Conversions.DecimalConversion()); | ||
specificData.addLogicalTypeConversion(new Conversions.UUIDConversion()); | ||
return specificData; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.