Skip to content

Commit e17adfe

Browse files
Merlin Rabensmerlinrabens
authored andcommitted
Next iteration
1 parent b70ccb6 commit e17adfe

File tree

5 files changed

+100
-49
lines changed

5 files changed

+100
-49
lines changed

src/main/scala/com/intenthq/action_processor/integrations/serializations/csv/Csv.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ object Csv {
1919
def dispatch[A](ctx: SealedTrait[Csv, A]): Csv[A] = (a: A) => ctx.dispatch(a)(sub => sub.typeclass.toCSV(sub.cast(a)))
2020

2121
implicit def csvOpt[T: Csv]: Csv[Option[T]] = (a: Option[T]) => a.fold(Array[String](""))(Csv[T].toCSV)
22-
implicit def deriveCsv[A]: Csv[A] = macro Magnolia.gen[A]
2322
implicit val csvStr: Csv[String] = (a: String) => Array(a)
2423
implicit val csvInt: Csv[Int] = (a: Int) => Array(a.toString)
2524
implicit val csvLong: Csv[Long] = (a: Long) => Array(a.toString)
@@ -31,4 +30,6 @@ object Csv {
3130
implicit val csvLocalTime: Csv[LocalTime] = (a: LocalTime) => Array(a.toString)
3231
implicit val csvInstant: Csv[Instant] = (a: Instant) => Array(a.toString)
3332
implicit val csvLocalDateTime: Csv[LocalDateTime] = (a: LocalDateTime) => Array(a.toString)
33+
34+
implicit def deriveCsv[A]: Csv[A] = macro Magnolia.gen[A]
3435
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package com.intenthq.action_processor.integrationsV2
2+
3+
import java.io.StringReader
4+
5+
import cats.effect.{IO, Resource}
6+
import cats.implicits.catsSyntaxApplicativeId
7+
import com.intenthq.action_processor.integrations.SourceContext
8+
9+
import scala.jdk.CollectionConverters._
10+
import de.siegmar.fastcsv.reader.CsvReader
11+
import fs2.Stream
12+
13+
abstract class CsvFeed extends Feed[String, String] {
14+
protected val csvResource: String
15+
16+
protected lazy val csvReader: CsvReader = new CsvReader
17+
18+
protected def csvParse(line: String): IO[Iterable[String]] =
19+
Resource.fromAutoCloseable(IO.delay(new StringReader(line))).use { sr =>
20+
Option(csvReader.parse(sr))
21+
.flatMap(parser => Option(parser.nextRow().getFields.asScala))
22+
.getOrElse(Iterable.empty[String])
23+
.pure[IO]
24+
}
25+
26+
override def inputStream(feedContext: FeedContext): Stream[IO, String]
27+
}
28+
29+
new LocalFileCsvFeed(){
30+
override csvResource = "file.csv"
31+
}

src/main/scala/com/intenthq/action_processor/integrationsV2/Feed.scala

Lines changed: 30 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import java.nio.charset.StandardCharsets
44
import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
55

66
import cats.effect.{ContextShift, IO, Resource}
7+
import com.intenthq.action_processor.integrations.serializations.csv.CsvSerialization
78
import doobie.implicits.{toDoobieStreamOps, toSqlInterpolator}
89
import doobie.util.query.Query0
910
import doobie.util.transactor.Transactor
@@ -23,27 +24,31 @@ import scala.jdk.CollectionConverters.IteratorHasAsScala
2324

2425
object Aggregate {
2526

26-
def apply[I, K]( /*feedContext: FeedContext,*/ key: I => K, counter: I => Long): fs2.Pipe[IO, I, (K, Long)] =
27+
def noop[I]: fs2.Pipe[IO, I, (I, Long)] = _.map(_ -> 1L)
28+
29+
private def loadAggRepository[K]: Resource[IO, ConcurrentMap[K, Long]] =
30+
Resource.pure(new ConcurrentHashMap[K, Long]())
31+
32+
def aggregateByKey[I, K]( /*feedContext: FeedContext,*/ key: I => K, counter: I => Long): fs2.Pipe[IO, I, (K, Long)] =
2733
sourceStream => {
28-
val repository: ConcurrentMap[K, Long] = new ConcurrentHashMap[K, Long]()
2934

30-
def put(o: I): IO[Unit] =
35+
def put(aggRepository: ConcurrentMap[K, Long], o: I): IO[Unit] =
3136
IO.delay {
32-
val previousCounter = repository.getOrDefault(key(o), 0L)
33-
repository.put(key(o), counter(o) + previousCounter)
37+
val previousCounter = aggRepository.getOrDefault(key(o), 0L)
38+
aggRepository.put(key(o), counter(o) + previousCounter)
3439
}.void
3540

36-
def streamKeyValue: fs2.Stream[IO, (K, Long)] =
41+
def streamKeyValue(aggRepository: ConcurrentMap[K, Long]): fs2.Stream[IO, (K, Long)] =
3742
fs2.Stream
3843
.fromIterator[IO](
39-
repository
44+
aggRepository
4045
.entrySet()
4146
.iterator()
4247
.asScala
4348
)
4449
.map(e => (e.getKey, e.getValue))
4550

46-
fs2.Stream.resource[IO, ConcurrentMap[K, Long]](Resource.liftF(IO.delay(repository))).flatMap { _ =>
51+
fs2.Stream.resource[IO, ConcurrentMap[K, Long]](loadAggRepository).flatMap { aggRepository =>
4752
sourceStream.evalMap { i =>
4853
put(i)
4954
}.drain ++ streamKeyValue
@@ -53,65 +58,42 @@ object Aggregate {
5358

5459
trait Feed[I, A] {
5560
def inputStream(feedContext: FeedContext): fs2.Stream[IO, I]
56-
def transform(feedContext: FeedContext): fs2.Pipe[IO, I, A]
57-
def serialize(a: A): Array[Byte]
61+
def transform(feedContext: FeedContext): fs2.Pipe[IO, I, (A, Long)]
62+
def serialize(a: A, counter: Long): Array[Byte]
5863

5964
final def stream(processorContext: FeedContext): fs2.Stream[IO, Array[Byte]] =
6065
inputStream(processorContext)
6166
.through(transform(processorContext))
62-
.map(serialize)
63-
}
64-
65-
abstract class SQLFeed[I, O] extends Feed[I, O] {
66-
protected val jdbcUrl: String
67-
68-
protected val driver: String
69-
70-
protected def query(feedContext: FeedContext): Query0[I]
71-
72-
override def inputStream(feedContext: FeedContext): fs2.Stream[IO, I] =
73-
query(feedContext)
74-
.streamWithChunkSize(chunkSize)
75-
.transact[IO](transactor)
76-
77-
implicit private val contextShift: ContextShift[IO] = IO.contextShift(scala.concurrent.ExecutionContext.global)
78-
79-
protected def createTransactor: Aux[IO, Unit] = Transactor.fromDriverManager[IO](driver, jdbcUrl)
80-
81-
protected lazy val transactor: Transactor[IO] = createTransactor
82-
83-
protected val chunkSize: Int = doobie.util.query.DefaultChunkSize
67+
.map { case (a, counter) => serialize(a, counter) }
8468
}
8569

86-
abstract class Hive[I, O] extends SQLFeed[I, O] {
87-
88-
override protected val jdbcUrl: String = ""
89-
90-
override protected val driver: String = ""
91-
70+
trait NoAggregate[I] { self: Feed[I, I] =>
71+
override def transform(feedContext: FeedContext): fs2.Pipe[IO, I, (I, Long)] = Aggregate.noop
9272
}
9373

9474
object Main {
9575
def main(args: Array[String]): Unit = {
9676

97-
class NoAggCase extends Hive[Int, String] {
77+
case class Person(name: String, address: String, score: Int) {
78+
lazy val aggregateKey = new AggregatedPerson(name, address)
79+
}
80+
case class AggregatedPerson(name: String, address: String)
9881

99-
override protected def query(feedContext: FeedContext): Query0[Int] = sql"1".query[Int]
82+
class PersonFeed extends HiveFeed[Person, Person] with NoAggregate[Person] {
10083

101-
override def transform(feedContext: FeedContext): Pipe[IO, Int, String] = s => s.map(_.toString)
84+
override protected def query(feedContext: FeedContext): Query0[Person] = sql"SELECT 'Nic Cage', 9000".query[Person]
10285

103-
override def serialize(a: String): Array[Byte] = a.getBytes(StandardCharsets.UTF_8)
86+
override def serialize(a: Person, counter: Long): Array[Byte] = CsvSerialization.serialize(a).unsafeRunSync()
10487
}
10588

106-
class AggCase extends Hive[Int, (String, Long)] {
89+
class PersonsAggregatedByScoreFeed extends HiveFeed[Person, AggregatedPerson] {
10790

108-
override protected def query(feedContext: FeedContext): Query0[Int] = sql"1".query[Int]
91+
override protected def query(feedContext: FeedContext): Query0[Person] = sql"SELECT 'Nic Cage', 9000".query[Person]
10992

110-
override def transform(feedContext: FeedContext): Pipe[IO, Int, (String, Long)] = Aggregate.apply(_.toString, _ => 1L)
93+
override def transform(feedContext: FeedContext): Pipe[IO, Person, (AggregatedPerson, Long)] =
94+
Aggregate.aggregateByKey[Person, AggregatedPerson](_.aggregateKey, _.score)
11195

112-
override def serialize(a: (String, Long)): Array[Byte] = a._1.getBytes(StandardCharsets.UTF_8)
96+
override def serialize(a: AggregatedPerson, counter: Long): Array[Byte] = CsvSerialization.serialize((a, counter)).unsafeRunSync()
11397
}
114-
115-
new AggCase().stream(new FeedContext()).compile.drain.unsafeRunSync()
11698
}
11799
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package com.intenthq.action_processor.integrationsV2
2+
3+
import com.intenthq.action_processor.integrations.{JavaLegacyTimeMeta, TimeMeta}
4+
5+
import scala.util.Properties
6+
7+
abstract class HiveFeed[I, O] extends SQLFeed[I, O]("org.apache.hive.jdbc.HiveDriver") with TimeMeta with JavaLegacyTimeMeta {
8+
override protected val jdbcUrl: String = Properties.envOrElse("HIVE_JDBC_URL", "jdbc:hive2://localhost:10000")
9+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package com.intenthq.action_processor.integrationsV2
2+
3+
import cats.effect.{ContextShift, IO}
4+
import com.intenthq.action_processor.integrations.SQLSource
5+
import doobie.implicits.toDoobieStreamOps
6+
import doobie.util.query.Query0
7+
import doobie.util.transactor.Transactor
8+
import doobie.util.transactor.Transactor.Aux
9+
10+
abstract class SQLFeed[I, O](driver: String, parallelism: Int = SQLSource.DefaultParallelism) extends Feed[I, O] {
11+
12+
protected val jdbcUrl: String
13+
14+
protected def query(feedContext: FeedContext): Query0[I]
15+
16+
override def inputStream(feedContext: FeedContext): fs2.Stream[IO, I] =
17+
query(feedContext)
18+
.streamWithChunkSize(chunkSize)
19+
.transact[IO](transactor)
20+
21+
implicit private val contextShift: ContextShift[IO] = IO.contextShift(scala.concurrent.ExecutionContext.global)
22+
23+
protected def createTransactor: Aux[IO, Unit] = Transactor.fromDriverManager[IO](driver, jdbcUrl)
24+
25+
protected lazy val transactor: Transactor[IO] = createTransactor
26+
27+
protected val chunkSize: Int = doobie.util.query.DefaultChunkSize
28+
}

0 commit comments

Comments
 (0)