Skip to content

Commit

Permalink
Add latest API to SCollection
Browse files Browse the repository at this point in the history
  • Loading branch information
RustedBones committed Sep 18, 2024
1 parent cd3c597 commit 9279c84
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 3 deletions.
10 changes: 10 additions & 0 deletions scio-core/src/main/scala/com/spotify/scio/values/SCollection.scala
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,16 @@ sealed trait SCollection[T] extends PCollectionWrapper[T] {
def min(implicit ord: Ordering[T]): SCollection[T] =
this.reduce(ord.min)


/**
* Return the latest of this SCollection according to its event time, or null if there are no elements.
* @return
* a new SCollection with the latest element
* @group transform
*/
def latest: SCollection[T] =
this.pApply(Latest.globally())

/**
* Compute the SCollection's data distribution using approximate `N`-tiles.
* @return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@ import com.spotify.scio.ScioContext
import com.spotify.scio.util.Functions
import com.spotify.scio.coders.Coder
import com.twitter.algebird.{Aggregator, Monoid, MonoidAggregator, Semigroup}
import org.apache.beam.sdk.transforms.{Combine, Top}
import org.apache.beam.sdk.transforms.{Combine, Latest, Mean, Top}
import org.apache.beam.sdk.values.PCollection

import java.lang.{Iterable => JIterable}

import java.lang.{Double => JDouble, Iterable => JIterable}
import scala.jdk.CollectionConverters._

/**
Expand Down Expand Up @@ -116,6 +115,29 @@ class SCollectionWithFanout[T] private[values] (coll: SCollection[T], fanout: In
)
}

/** [[SCollection.min]] with fan out. */
def min(implicit ord: Ordering[T]): SCollection[T] =
this.reduce(ord.min)

/** [[SCollection.max]] with fan out. */
def max(implicit ord: Ordering[T]): SCollection[T] =
this.reduce(ord.max)

/** [[SCollection.mean]] with fan out. */
def mean(implicit ev: Numeric[T]): SCollection[Double] = {
val e = ev // defeat closure
coll.transform { in =>
in.map(e.toDouble)
.asInstanceOf[SCollection[JDouble]]
.pApply(Mean.globally().withFanout(fanout))
.asInstanceOf[SCollection[Double]]
}
}

/** [[SCollection.latest]] with fan out. */
def latest: SCollection[T] =
coll.pApply(Latest.globally())

/** [[SCollection.top]] with fan out. */
def top(num: Int)(implicit ord: Ordering[T]): SCollection[Iterable[T]] = {
coll.transform { in =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,14 @@ class SCollectionTest extends PipelineSpec {
}
}

it should "support latest" in {
runWithContext { sc =>
def latest(elems: Long*): SCollection[Long] =
sc.parallelize(elems).timestampBy(Instant.ofEpochMilli).latest
latest(1L, 2L, 3L) should containSingleValue(3L)
}
}

it should "support quantilesApprox()" in {
runWithContext { sc =>
val p = sc.parallelize(0 to 100).quantilesApprox(5)
Expand Down

0 comments on commit 9279c84

Please sign in to comment.