diff --git a/dataset/src/main/scala/frameless/TypedDataset.scala b/dataset/src/main/scala/frameless/TypedDataset.scala index 3f6c80a5..261e7c25 100644 --- a/dataset/src/main/scala/frameless/TypedDataset.scala +++ b/dataset/src/main/scala/frameless/TypedDataset.scala @@ -626,7 +626,7 @@ class TypedDataset[T] protected[frameless](val dataset: Dataset[T])(implicit val * @tparam V value type of column in T * @return */ - def drop[Out, TRep <: HList, Removed <: HList, ValuesFromRemoved <: HList, V] + def dropTupled[Out, TRep <: HList, Removed <: HList, ValuesFromRemoved <: HList, V] (column: Witness.Lt[Symbol]) (implicit i0: LabelledGeneric.Aux[T, TRep], @@ -643,6 +643,23 @@ class TypedDataset[T] protected[frameless](val dataset: Dataset[T])(implicit val TypedDataset.create[Out](dropped) } + /** + * Drops columns as necessary to return `U` + * + * @example + * {{{ + * case class X(i: Int, j: Int, k: Boolean) + * case class Y(i: Int, k: Boolean) + * val f: TypedDataset[X] = ??? + * val fNew: TypedDataset[Y] = f.drop[Y] + * }}} + * + * @tparam U the output type + * + * @see [[frameless.TypedDataset#project]] + */ + def drop[U](implicit projector: SmartProject[T,U]): TypedDataset[U] = project[U] + /** Prepends a new column to the Dataset. * * {{{ @@ -719,11 +736,11 @@ class TypedDataset[T] protected[frameless](val dataset: Dataset[T])(implicit val * @tparam NewKeys the keys of NewFields as an HList * @tparam NewKey the first, and only, key in NewKey * - * @see [[frameless.TypedDataset.withColumnApply#apply]] + * @see [[frameless.TypedDataset.WithColumnApply#apply]] */ - def withColumn[U] = new withColumnApply[U] + def withColumn[U] = new WithColumnApply[U] - class withColumnApply[U] { + class WithColumnApply[U] { def apply[A, TRep <: HList, URep <: HList, UKeys <: HList, NewFields <: HList, NewKeys <: HList, NewKey <: Symbol] (ca: TypedColumn[T, A]) (implicit diff --git a/dataset/src/test/scala/frameless/DropTest.scala b/dataset/src/test/scala/frameless/DropTest.scala index 5c030b27..3e5a0d73 100644 --- a/dataset/src/test/scala/frameless/DropTest.scala +++ b/dataset/src/test/scala/frameless/DropTest.scala @@ -2,62 +2,48 @@ package frameless import org.scalacheck.Prop import org.scalacheck.Prop._ +import shapeless.test.illTyped class DropTest extends TypedDatasetSuite { - test("drop five columns") { - def prop[A: TypedEncoder](value: A): Prop = { - val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil) - val d4 = d5.drop('a) //drops first column - val d3 = d4.drop('_4) //drops last column - val d2 = d3.drop('_2) //drops middle column - val d1 = d2.drop('_2) + import DropTest._ - Tuple1(value) ?= d1.collect().run().head + test("fail to compile on missing value") { + val f: TypedDataset[X] = TypedDataset.create(X(1, 1, false) :: X(1, 1, false) :: X(1, 10, false) :: Nil) + illTyped { + """val fNew: TypedDataset[XMissing] = f.drop[XMissing]('j)""" } - - check(prop[Int] _) - check(prop[Long] _) - check(prop[String] _) - check(prop[SQLDate] _) - check(prop[Option[X1[Boolean]]] _) } - test("drop first column") { - def prop[A: TypedEncoder](value: A): Prop = { - val d3 = TypedDataset.create(X3(value, value, value) :: Nil) - val d2 = d3.drop('a) - - (value, value) ?= d2.collect().run().head + test("fail to compile on different column name") { + val f: TypedDataset[X] = TypedDataset.create(X(1, 1, false) :: X(1, 1, false) :: X(1, 10, false) :: Nil) + illTyped { + """val fNew: TypedDataset[XDifferentColumnName] = f.drop[XDifferentColumnName]('j)""" } - - check(prop[Int] _) - check(prop[Long] _) - check(prop[String] _) - check(prop[SQLDate] _) - check(prop[Option[X1[Boolean]]] _) } - test("drop middle column") { - def prop[A: TypedEncoder](value: A): Prop = { - val d3 = TypedDataset.create(X3(value, value, value) :: Nil) - val d2 = d3.drop('b) - - (value, value) ?= d2.collect().run().head + test("fail to compile on added column name") { + val f: TypedDataset[X] = TypedDataset.create(X(1, 1, false) :: X(1, 1, false) :: X(1, 10, false) :: Nil) + illTyped { + """val fNew: TypedDataset[XAdded] = f.drop[XAdded]('j)""" } + } - check(prop[Int] _) - check(prop[Long] _) - check(prop[String] _) - check(prop[SQLDate] _) - check(prop[Option[X1[Boolean]]] _) + test("remove column in the middle") { + val f: TypedDataset[X] = TypedDataset.create(X(1, 1, false) :: X(1, 1, false) :: X(1, 10, false) :: Nil) + val fNew: TypedDataset[XGood] = f.drop[XGood] + + fNew.collect().run().foreach(xg => assert(xg === XGood(1, false))) } - test("drop last column") { + test("drop four columns") { def prop[A: TypedEncoder](value: A): Prop = { - val d3 = TypedDataset.create(X3(value, value, value) :: Nil) - val d2 = d3.drop('c) + val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil) + val d4 = d5.drop[X4[A, A, A, A]] + val d3 = d4.drop[X3[A, A, A]] + val d2 = d3.drop[X2[A, A]] + val d1 = d2.drop[X1[A]] - (value, value) ?= d2.collect().run().head + X1(value) ?= d1.collect().run().head } check(prop[Int] _) @@ -67,3 +53,11 @@ class DropTest extends TypedDatasetSuite { check(prop[Option[X1[Boolean]]] _) } } + +object DropTest { + case class X(i: Int, j: Int, k: Boolean) + case class XMissing(i: Int) + case class XDifferentColumnName(ij: Int, k: Boolean) + case class XAdded(i: Int, j: Int, k: Boolean, l: Int) + case class XGood(i: Int, k: Boolean) +} diff --git a/dataset/src/test/scala/frameless/DropTupledTest.scala b/dataset/src/test/scala/frameless/DropTupledTest.scala new file mode 100644 index 00000000..ff0158b9 --- /dev/null +++ b/dataset/src/test/scala/frameless/DropTupledTest.scala @@ -0,0 +1,69 @@ +package frameless + +import org.scalacheck.Prop +import org.scalacheck.Prop._ + +class DropTupledTest extends TypedDatasetSuite { + test("drop five columns") { + def prop[A: TypedEncoder](value: A): Prop = { + val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil) + val d4 = d5.dropTupled('a) //drops first column + val d3 = d4.dropTupled('_4) //drops last column + val d2 = d3.dropTupled('_2) //drops middle column + val d1 = d2.dropTupled('_2) + + Tuple1(value) ?= d1.collect().run().head + } + + check(prop[Int] _) + check(prop[Long] _) + check(prop[String] _) + check(prop[SQLDate] _) + check(prop[Option[X1[Boolean]]] _) + } + + test("drop first column") { + def prop[A: TypedEncoder](value: A): Prop = { + val d3 = TypedDataset.create(X3(value, value, value) :: Nil) + val d2 = d3.dropTupled('a) + + (value, value) ?= d2.collect().run().head + } + + check(prop[Int] _) + check(prop[Long] _) + check(prop[String] _) + check(prop[SQLDate] _) + check(prop[Option[X1[Boolean]]] _) + } + + test("drop middle column") { + def prop[A: TypedEncoder](value: A): Prop = { + val d3 = TypedDataset.create(X3(value, value, value) :: Nil) + val d2 = d3.dropTupled('b) + + (value, value) ?= d2.collect().run().head + } + + check(prop[Int] _) + check(prop[Long] _) + check(prop[String] _) + check(prop[SQLDate] _) + check(prop[Option[X1[Boolean]]] _) + } + + test("drop last column") { + def prop[A: TypedEncoder](value: A): Prop = { + val d3 = TypedDataset.create(X3(value, value, value) :: Nil) + val d2 = d3.dropTupled('c) + + (value, value) ?= d2.collect().run().head + } + + check(prop[Int] _) + check(prop[Long] _) + check(prop[String] _) + check(prop[SQLDate] _) + check(prop[Option[X1[Boolean]]] _) + } +}