Skip to content

Commit 6223ffa

Browse files
committed
IC-40 Removes Aggregations progress
1 parent d8d16be commit 6223ffa

File tree

2 files changed

+16
-66
lines changed

2 files changed

+16
-66
lines changed

src/main/scala/com/intenthq/action_processor/integrations/aggregations/Aggregate.scala

Lines changed: 13 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,23 @@ package com.intenthq.action_processor.integrations.aggregations
22

33
import java.util.concurrent.ConcurrentMap
44

5-
import scala.concurrent.duration.{DurationInt, FiniteDuration, NANOSECONDS}
5+
import scala.jdk.CollectionConverters._
66

7-
import cats.effect.{Blocker, ContextShift, IO, Resource, Sync, SyncIO, Timer}
7+
import cats.effect.{Blocker, ContextShift, IO, Resource, SyncIO}
88

99
import com.intenthq.action_processor.integrations.config.MapDbSettings
1010
import com.intenthq.action_processor.integrations.feeds.FeedContext
1111
import com.intenthq.action_processor.integrations.repositories.MapDBRepository
1212

13+
import org.mapdb.{DataInput2, DataOutput2, HTreeMap, Serializer}
1314
import org.mapdb.elsa.{ElsaMaker, ElsaSerializer}
1415
import org.mapdb.serializer.GroupSerializerObjectArray
15-
import org.mapdb.{DataInput2, DataOutput2, HTreeMap, Serializer}
16-
import scala.jdk.CollectionConverters._
17-
18-
import cats.implicits._
1916

2017
object Aggregate {
2118

2219
private lazy val blocker = Blocker[SyncIO].allocated.unsafeRunSync()._1
2320
private val ec = scala.concurrent.ExecutionContext.global
2421
implicit private val contextShift: ContextShift[IO] = IO.contextShift(ec)
25-
implicit private val timer: Timer[IO] = IO.timer(ec)
2622

2723
def noop[I]: fs2.Pipe[IO, I, (I, Long)] = _.map(_ -> 1L)
2824

@@ -45,26 +41,30 @@ object Aggregate {
4541

4642
def aggregateByKeys[I, K](feedContext: FeedContext[IO], keys: I => List[K], counter: I => Long): fs2.Pipe[IO, I, (K, Long)] =
4743
sourceStream => {
44+
45+
// This pipe aggregates all the elemens and returns a single Map as an aggregate repository
4846
val aggregateInRepository: fs2.Pipe[IO, I, ConcurrentMap[K, Long]] =
4947
in => {
5048
fs2.Stream
5149
.resource[IO, ConcurrentMap[K, Long]](loadAggRepository(feedContext.mapDbSettings)(blocker))
52-
.flatTap { aggRepository =>
50+
.flatMap { aggRepository =>
5351
fs2.Stream.eval_(IO.delay(println("Starting aggregation"))) ++
54-
in.evalTap { o =>
52+
in.evalMapChunk { o =>
5553
IO.delay {
5654
keys(o).foreach { value =>
5755
val previousCounter = aggRepository.getOrDefault(value, 0L)
5856
aggRepository.put(value, counter(o) + previousCounter)
5957
}
58+
aggRepository
6059
}
61-
}.through(AggregationsProgress.showAggregationProgress(5.seconds))
62-
.as(1)
63-
.foldMonoid
64-
.evalMap(n => IO.delay(println(s"Finished aggregation of $n rows")))
60+
}
61+
// Returns last aggRepository with the counter of elements
62+
.fold((aggRepository, 0L)) { case ((_, previousRows), aggRepository) => (aggRepository, previousRows + 1) }
63+
.evalMapChunk { case (aggRepository, n) => IO.delay(println(s"Finished aggregation of $n rows")).as(aggRepository) }
6564
}
6665
}
6766

67+
// Streams the givens aggregate repository entries
6868
val streamAggRepository: fs2.Pipe[IO, ConcurrentMap[K, Long], (K, Long)] =
6969
_.flatMap(aggRepository => fs2.Stream.iterable(aggRepository.asScala))
7070

@@ -74,53 +74,3 @@ object Aggregate {
7474
def aggregateByKey[I, K](feedContext: FeedContext[IO], key: I => K, counter: I => Long): fs2.Pipe[IO, I, (K, Long)] =
7575
aggregateByKeys(feedContext, key.andThen(List(_)), counter)
7676
}
77-
78-
object AggregationsProgress {
79-
def showAggregationProgress[F[_]: Sync: Timer, O](duration: FiniteDuration): fs2.Pipe[F, O, O] = { in =>
80-
val startTime = System.nanoTime()
81-
var lastTime = System.nanoTime()
82-
var lastRow = 0L
83-
def formatTime(duration: FiniteDuration): String = {
84-
val durationSecs = duration.toSeconds
85-
f"${durationSecs / 3600}%d:${(durationSecs % 3600) / 60}%02d:${durationSecs % 60}%02d"
86-
}
87-
in.through(showProgress(duration) {
88-
case (totalRows, o) =>
89-
Sync[F].delay {
90-
val now = System.nanoTime()
91-
val totalTime = FiniteDuration(now - startTime, NANOSECONDS)
92-
val partialTime = FiniteDuration(now - lastTime, NANOSECONDS)
93-
val partialRows = totalRows - lastRow
94-
lastTime = System.nanoTime()
95-
lastRow = totalRows
96-
97-
println(f"\nRow #$totalRows: ${o.toString} ")
98-
println(f"Partial time: ${formatTime(partialTime)}. Total time: ${formatTime(totalTime)}")
99-
println(
100-
f"Partial speed: ${partialRows.toFloat / partialTime.toSeconds}%.2f rows/sec. Total Speed: ${totalRows.toFloat / totalTime.toSeconds}%.2f rows/sec"
101-
)
102-
103-
}
104-
})
105-
}
106-
107-
def showProgress[F[_]: Sync: Timer, O](every: FiniteDuration)(output: (Long, O) => F[Unit]): fs2.Pipe[F, O, O] = { source =>
108-
val ticks = fs2.Stream.every[F](every)
109-
source
110-
// Based on zipWithIndex but starting by 1
111-
.scanChunks(1L) { (index, c) =>
112-
var idx = index
113-
val out = c.map { o =>
114-
val r = (o, idx)
115-
idx += 1
116-
r
117-
}
118-
(idx, out)
119-
}
120-
.zipWith(ticks)((_, _))
121-
.evalMap {
122-
case ((v, index), isTick) =>
123-
(if (isTick) output(index, v) else Sync[F].unit) >> Sync[F].pure(v)
124-
}
125-
}
126-
}

src/main/scala/com/intenthq/action_processor/integrations/config/MapDbSettings.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ object MapDbSettings {
88

99
val Default: MapDbSettings = MapDbSettings(
1010
dbPath = Paths.get("/tmp"),
11-
startDbSize = 5L * 1024,
12-
incSize = 1L * 1024,
13-
segments = 16,
11+
startDbSize = 512 * 1024 * 1024, // 512MB
12+
incSize = 512 * 1024 * 1024, // 512MB
13+
segments = 8,
1414
nodeSize = 128,
1515
levels = 4
1616
)

0 commit comments

Comments
 (0)