Skip to content

Commit

Permalink
typelevel#804 - encoding for Set derivatives as well - test build, 2.…
Browse files Browse the repository at this point in the history
…13 forced changes, compilation issue with toSeq():GenSeq
  • Loading branch information
chris-twiner committed Mar 20, 2024
1 parent 9e45d92 commit e7881c0
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 78 deletions.
2 changes: 1 addition & 1 deletion dataset/src/main/scala/frameless/CollectionCaster.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,6 @@ case class SeqCaster[C[X] <: Iterable[X], Y](child: Expression)
ctx: CodegenContext,
ev: ExprCode
): ExprCode =
defineCodeGen(ctx, ev, c => toSeqOr(s"$c.toSeq()", s"$c"))
defineCodeGen(ctx, ev, c => toSeqOr(s"$c.toVector()", s"$c"))

}
61 changes: 11 additions & 50 deletions dataset/src/main/scala/frameless/TypedEncoder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ import org.apache.spark.unsafe.types.UTF8String
import shapeless._
import shapeless.ops.hlist.IsHCons

import scala.collection.generic.CanBuildFrom

import scala.collection.immutable.{ ListSet, TreeSet }

abstract class TypedEncoder[T](
Expand Down Expand Up @@ -512,71 +510,53 @@ object TypedEncoder {

object CollectionConversion {

implicit def seqToSeq[Y](
implicit
cbf: CanBuildFrom[Nothing, Y, Seq[Y]]
) = new CollectionConversion[Seq, Seq, Y] {
implicit def seqToSeq[Y] = new CollectionConversion[Seq, Seq, Y] {
override def convert(c: Seq[Y]): Seq[Y] = c
}

implicit def seqToVector[Y](
implicit
cbf: CanBuildFrom[Nothing, Y, Vector[Y]]
) = new CollectionConversion[Seq, Vector, Y] {
implicit def seqToVector[Y] = new CollectionConversion[Seq, Vector, Y] {
override def convert(c: Seq[Y]): Vector[Y] = c.toVector
}

implicit def seqToList[Y](
implicit
cbf: CanBuildFrom[Nothing, Y, List[Y]]
) = new CollectionConversion[Seq, List, Y] {
implicit def seqToList[Y] = new CollectionConversion[Seq, List, Y] {
override def convert(c: Seq[Y]): List[Y] = c.toList
}

implicit def setToSet[Y](
implicit
cbf: CanBuildFrom[Nothing, Y, Set[Y]]
) = new CollectionConversion[Set, Set, Y] {
implicit def setToSet[Y] = new CollectionConversion[Set, Set, Y] {
override def convert(c: Set[Y]): Set[Y] = c
}

implicit def setToTreeSet[Y](
implicit
cbf: CanBuildFrom[Nothing, Y, TreeSet[Y]]
ordering: Ordering[Y]
) = new CollectionConversion[Set, TreeSet, Y] {
override def convert(c: Set[Y]): TreeSet[Y] = c.to[TreeSet]
override def convert(c: Set[Y]): TreeSet[Y] = TreeSet.newBuilder.++=(c).result()
}

implicit def setToListSet[Y](
implicit
cbf: CanBuildFrom[Nothing, Y, ListSet[Y]]
) = new CollectionConversion[Set, ListSet, Y] {
override def convert(c: Set[Y]): ListSet[Y] = c.to[ListSet]
implicit def setToListSet[Y] = new CollectionConversion[Set, ListSet, Y] {
override def convert(c: Set[Y]): ListSet[Y] = ListSet.newBuilder.++=(c).result()
}
}

implicit def seqEncoder[C[X] <: Seq[X], T](
implicit
i0: Lazy[RecordFieldEncoder[T]],
i1: ClassTag[C[T]],
i2: CollectionConversion[Seq, C, T],
i3: CanBuildFrom[Nothing, T, C[T]]
i2: CollectionConversion[Seq, C, T]
) = collectionEncoder[Seq, C, T]

implicit def setEncoder[C[X] <: Set[X], T](
implicit
i0: Lazy[RecordFieldEncoder[T]],
i1: ClassTag[C[T]],
i2: CollectionConversion[Set, C, T],
i3: CanBuildFrom[Nothing, T, C[T]]
i2: CollectionConversion[Set, C, T]
) = collectionEncoder[Set, C, T]

def collectionEncoder[O[_], C[X], T](
implicit
i0: Lazy[RecordFieldEncoder[T]],
i1: ClassTag[C[T]],
i2: CollectionConversion[O, C, T],
i3: CanBuildFrom[Nothing, T, C[T]]
i2: CollectionConversion[O, C, T]
): TypedEncoder[C[T]] = new TypedEncoder[C[T]] {
private lazy val encodeT = i0.value.encoder

Expand Down Expand Up @@ -618,25 +598,6 @@ object TypedEncoder {
override def toString: String = s"collectionEncoder($jvmRepr)"
}

/**
* @param i1 implicit lazy `RecordFieldEncoder[T]` to encode individual elements of the set.
* @param i2 implicit `ClassTag[Set[T]]` to provide runtime information about the set type.
* @tparam T the element type of the set.
* @return a `TypedEncoder` instance for `Set[T]`.
*
* implicit def setEncoder[C[X] <: Seq[X], T](
* implicit
* i1: shapeless.Lazy[RecordFieldEncoder[T]],
* i2: ClassTag[Set[T]],
* i3: CollectionConversion[Set, C, T],
* i4: CanBuildFrom[Nothing, T, C[T]]
* ): TypedEncoder[Set[T]] = {
* implicit val inj: Injection[Set[T], Seq[T]] = Injection(_.toSeq, _.toSet)
*
* TypedEncoder.usingInjection
* }
*/

/**
* @tparam A the key type
* @tparam B the value type
Expand Down
67 changes: 41 additions & 26 deletions dataset/src/test/scala/frameless/EncoderTests.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package frameless

import scala.collection.immutable.{ Set, TreeSet }
import scala.collection.immutable.{ListSet, Set, TreeSet}
import org.scalatest.matchers.should.Matchers

object EncoderTests {
Expand All @@ -10,9 +10,7 @@ object EncoderTests {
case class DurationRow(d: java.time.Duration)
case class PeriodRow(p: java.time.Period)

case class VectorOfObject(a: Vector[X1[Int]])

case class TreeSetOfObjects(a: TreeSet[X1[Int]])
case class ContainerOf[CC[X] <: Iterable[X]](a: CC[X1[Int]])
}

class EncoderTests extends TypedDatasetSuite with Matchers {
Expand All @@ -36,31 +34,48 @@ class EncoderTests extends TypedDatasetSuite with Matchers {
implicitly[TypedEncoder[PeriodRow]]
}

test("It should encode a Vector of Objects") {
evalCodeGens {
implicit val e = implicitly[TypedEncoder[VectorOfObject]]
implicit val te = TypedExpressionEncoder[VectorOfObject]
implicit val xe = implicitly[TypedEncoder[X1[VectorOfObject]]]
implicit val xte = TypedExpressionEncoder[X1[VectorOfObject]]
val v = (1 to 20).map(X1(_)).toVector
val ds = {
sqlContext.createDataset(Seq(X1[VectorOfObject](VectorOfObject(v))))
}
ds.head.a.a shouldBe v
def performCollection[C[X] <: Iterable[X]](toType: Seq[X1[Int]] => C[X1[Int]])(implicit ce: TypedEncoder[C[X1[Int]]]): (Unit,Unit) = evalCodeGens {

implicit val cte = TypedExpressionEncoder[C[X1[Int]]]
implicit val e = implicitly[TypedEncoder[ContainerOf[C]]]
implicit val te = TypedExpressionEncoder[ContainerOf[C]]
implicit val xe = implicitly[TypedEncoder[X1[ContainerOf[C]]]]
implicit val xte = TypedExpressionEncoder[X1[ContainerOf[C]]]
val v = toType((1 to 20).map(X1(_)))
val ds = {
sqlContext.createDataset(Seq(X1[ContainerOf[C]](ContainerOf[C](v))))
}
ds.head.a.a shouldBe v
()
}

test("It should serde a Seq of Objects") {
performCollection[Seq](_)
}

test("It should encode a TreeSet of Objects") {
evalCodeGens {
implicit val e = implicitly[TypedEncoder[TreeSetOfObjects]]
implicit val te = TypedExpressionEncoder[TreeSetOfObjects]
implicit val xe = implicitly[TypedEncoder[X1[TreeSetOfObjects]]]
implicit val xte = TypedExpressionEncoder[X1[TreeSetOfObjects]]
val v = (1 to 20).map(X1(_)).to[TreeSet]
val ds = {
sqlContext.createDataset(Seq(X1[TreeSetOfObjects](TreeSetOfObjects(v))))
}
ds.head.a.a shouldBe v
test("It should serde a Set of Objects") {
performCollection[Set](_)
}

test("It should serde a Vector of Objects") {
performCollection[Vector](_.toVector)
}

test("It should serde a TreeSet of Objects") {
// only needed for 2.12
implicit val ordering = new Ordering[X1[Int]] {
val intordering = implicitly[Ordering[Int]]
override def compare(x: X1[Int], y: X1[Int]): Int = intordering.compare(x.a, y.a)
}

performCollection[TreeSet](TreeSet.newBuilder.++=(_).result())
}

test("It should serde a List of Objects") {
performCollection[List](_.toList)
}

test("It should serde a ListSet of Objects") {
performCollection[ListSet](ListSet.newBuilder.++=(_).result())
}
}
2 changes: 1 addition & 1 deletion dataset/src/test/scala/frameless/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ package object frameless {
* @return
*/
def evalCodeGens[T](f: => T): (T, T) =
(forceCodeGen(f), forceInterpreted(f))
(forceInterpreted(f), forceCodeGen(f))

/**
* Sets all SQL configurations specified in `pairs`, calls `f`, and then restores all SQL
Expand Down

0 comments on commit e7881c0

Please sign in to comment.