Skip to content

Commit eef282b

Browse files
committed
feat: enhance TopicWriter and TopicReader with compression options and improve handling of payloads
Signed-off-by: Vladislav Polyakov <polRk@ydb.tech>
1 parent 3c460e6 commit eef282b

File tree

8 files changed

+76
-18
lines changed

8 files changed

+76
-18
lines changed

.changeset/pre.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"sweet-carpets-talk",
3737
"tall-colts-follow",
3838
"tender-symbols-drive",
39+
"violet-words-wave",
3940
"wicked-carrots-own",
4041
"yellow-banks-burn"
4142
]

.changeset/violet-words-wave.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ydbjs/topic': patch
3+
---
4+
5+
enhance TopicWriter and TopicReader with compression options

package-lock.json

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"publish-packages": "turbo run clean build test attw && changeset version && changeset publish"
1919
},
2020
"dependencies": {
21-
"@types/node": "^22.13.10"
21+
"@types/node": "^22.15.29"
2222
},
2323
"devDependencies": {
2424
"oxlint": "^0.16.4",

packages/topic/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# @ydbjs/topic
22

3+
## 6.0.0-alpha.11
4+
5+
### Patch Changes
6+
7+
- Enhance TopicWriter and TopicReader with compression options
8+
39
## 6.0.0-alpha.10
410

511
### Patch Changes

packages/topic/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@ydbjs/topic",
3-
"version": "6.0.0-alpha.10",
3+
"version": "6.0.0-alpha.11",
44
"description": "",
55
"license": "Apache-2.0",
66
"type": "module",

packages/topic/src/reader.ts

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { nextTick } from "node:process";
55
import { create, protoInt64, toJson } from "@bufbuild/protobuf";
66
import { type Duration, DurationSchema, type Timestamp, timestampDate, timestampFromDate } from "@bufbuild/protobuf/wkt";
77
import { StatusIds_StatusCode } from "@ydbjs/api/operation";
8-
import { type OffsetsRange, OffsetsRangeSchema, type StreamReadMessage_CommitOffsetRequest_PartitionCommitOffset, StreamReadMessage_CommitOffsetRequest_PartitionCommitOffsetSchema, type StreamReadMessage_FromClient, StreamReadMessage_FromClientSchema, type StreamReadMessage_FromServer, StreamReadMessage_FromServerSchema, type StreamReadMessage_InitRequest_TopicReadSettings, StreamReadMessage_InitRequest_TopicReadSettingsSchema, type StreamReadMessage_ReadResponse, TopicServiceDefinition } from "@ydbjs/api/topic";
8+
import { Codec, type OffsetsRange, OffsetsRangeSchema, type StreamReadMessage_CommitOffsetRequest_PartitionCommitOffset, StreamReadMessage_CommitOffsetRequest_PartitionCommitOffsetSchema, type StreamReadMessage_FromClient, StreamReadMessage_FromClientSchema, type StreamReadMessage_FromServer, StreamReadMessage_FromServerSchema, type StreamReadMessage_InitRequest_TopicReadSettings, StreamReadMessage_InitRequest_TopicReadSettingsSchema, type StreamReadMessage_ReadResponse, TopicServiceDefinition } from "@ydbjs/api/topic";
99
import type { Driver } from "@ydbjs/core";
1010
import { YDBError } from "@ydbjs/error";
1111
import { type RetryConfig, retry } from "@ydbjs/retry";
@@ -91,15 +91,33 @@ type TopicCommitPromise = {
9191
}
9292

9393
export type TopicReaderOptions<Payload = Uint8Array> = {
94+
// Topic path or an array of topic sources.
9495
topic: string | TopicReaderSource | TopicReaderSource[]
96+
// Consumer name.
9597
consumer: string
98+
// Maximum size of the internal buffer in bytes.
99+
// If not provided, the default is 1MB.
96100
maxBufferBytes?: bigint
101+
// How often to update the token in milliseconds.
97102
updateTokenIntervalMs?: number
98-
103+
// Compression options for the payload.
104+
compression?: {
105+
// Custom decompression function that can be used to decompress the payload before emitting it.
106+
decompress?(codec: Codec, payload: Uint8Array): Uint8Array | Promise<Uint8Array>
107+
}
108+
// Custom decode function to decode the payload.
109+
// If not provided, the payload will be returned as is.
110+
// Decode function calls after decompression, if compression is used.
99111
decode?(payload: Uint8Array): Payload
100-
112+
// Hooks for partition session events.
113+
// Called when a partition session is started.
114+
// It can be used to initialize the partition session, for example, to set the read offset.
101115
onPartitionSessionStart?: onPartitionSessionStartCallback
116+
// Called when a partition session is stopped.
117+
// It can be used to commit the offsets for the partition session.
102118
onPartitionSessionStop?: onPartitionSessionStopCallback
119+
// Called when receive commit offset response from server.
120+
// This callback is called after the offsets are committed to the server.
103121
onCommittedOffset?: onCommittedOffsetCallback
104122
}
105123

@@ -250,6 +268,11 @@ export class TopicReader<Payload = Uint8Array> implements Disposable {
250268
}
251269
}
252270

271+
// If there are no pending commits for this partition session, remove it from the map.
272+
if (pendingCommits && pendingCommits.length === 0) {
273+
this.#pendingCommits.delete(partitionSession.partitionSessionId);
274+
}
275+
253276
this.#fromClientEmitter.emit('message', create(StreamReadMessage_FromClientSchema, {
254277
clientMessage: {
255278
case: 'startPartitionSessionResponse',
@@ -576,7 +599,7 @@ export class TopicReader<Payload = Uint8Array> implements Disposable {
576599
read(options: { limit?: number, waitMs?: number, signal?: AbortSignal } = {}): AsyncIterable<TopicMessage<Payload>[]> {
577600
let limit = options.limit || Infinity,
578601
signal = options.signal,
579-
waitMs = options.waitMs || 60000;
602+
waitMs = options.waitMs || 60_000;
580603

581604
// Check if the reader has been disposed, cannot read with disposed reader
582605
if (this.#disposed) {
@@ -750,14 +773,34 @@ export class TopicReader<Payload = Uint8Array> implements Disposable {
750773
break;
751774
}
752775

776+
let data = msg.data;
777+
if (batch.codec !== Codec.UNSPECIFIED) {
778+
if (!this.#options.compression || !this.#options.compression.decompress) {
779+
dbg('error: cannot decompress message with codec %s, no decompression function provided', batch.codec);
780+
781+
throw new Error(`Cannot decompress message with codec ${batch.codec}, no decompression function provided`);
782+
}
783+
784+
// Decompress the message data using the provided decompress function
785+
try {
786+
// eslint-disable-next-line no-await-in-loop
787+
data = await this.#options.compression.decompress(batch.codec, msg.data);
788+
} catch (err) {
789+
dbg('error: decompression failed for message with codec %s: %O', batch.codec, err);
790+
791+
throw new Error(`Decompression failed for message with codec ${batch.codec}`, { cause: err });
792+
}
793+
}
794+
753795
// Process the message
754796
let message: TopicMessage<Payload> = {
755797
partitionSessionId: partitionSession.partitionSessionId,
756798
partitionId: partitionSession.partitionId,
757799
producerId: batch.producerId,
758800
seqNo: msg.seqNo,
759801
offset: msg.offset,
760-
payload: this.#options.decode!(msg.data),
802+
payload: this.#options.decode?.(data) || data as Payload,
803+
uncompressedSize: msg.uncompressedSize,
761804
createdAt: msg.createdAt ? timestampDate(msg.createdAt) : undefined,
762805
writtenAt: batch.writtenAt ? timestampDate(batch.writtenAt) : undefined,
763806
metadataItems: msg.metadataItems ? Object.fromEntries(msg.metadataItems.map(item => [item.key, item.value])) : undefined,
@@ -940,10 +983,14 @@ export class TopicReader<Payload = Uint8Array> implements Disposable {
940983
* This method should be called when the reader is no longer needed to prevent memory leaks.
941984
*/
942985
dispose() {
943-
dbg('disposing TopicReader for consumer %s', this.#options.consumer)
986+
if (this.#disposed) {
987+
return; // Already disposed, nothing to do
988+
}
989+
this.#disposed = true;
990+
dbg('disposing TopicReader for consumer %s', this.#options.consumer);
944991

945-
this.#disposed = true
946992
this.#buffer.length = 0 // Clear the buffer to release memory
993+
this.#freeBufferSize = this.#maxBufferSize; // Reset free buffer size to max buffer size
947994

948995
for (let partitionSession of this.#partitionSessions.values()) {
949996
// Stop all partition sessions gracefully
@@ -963,11 +1010,11 @@ export class TopicReader<Payload = Uint8Array> implements Disposable {
9631010

9641011
this.#pendingCommits.clear() // Clear pending commits to release memory
9651012

966-
this.#controller.abort()
9671013
this.#fromClientEmitter.removeAllListeners()
9681014
this.#fromServerEmitter.removeAllListeners()
9691015

9701016
clearInterval(this.#updateTokenTicker)
1017+
this.#controller.abort()
9711018
}
9721019

9731020
[Symbol.dispose]() {

packages/topic/src/writer.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ export type TopicWriterOptions<Payload = Uint8Array> = {
9090
// Compression options for the payload.
9191
compression?: {
9292
codec: Codec,
93-
// TODO: Codec options, like compression level, etc.
9493
// Minimum raw size to compress, if the payload is smaller than this size, it will not be compressed.
9594
// This is useful to avoid compressing small payloads that do not benefit from compression.
9695
// Default is 1024 bytes.
@@ -100,7 +99,7 @@ export type TopicWriterOptions<Payload = Uint8Array> = {
10099
// If not provided, the default compression function will be used.
101100
// The default compression function will use the codec specified in the options.
102101
// If the codec is Codec.RAW, the payload will not be compressed.
103-
compress?(payload: Uint8Array): Uint8Array
102+
compress?(payload: Uint8Array): Uint8Array | Promise<Uint8Array>
104103
}
105104
// Custom encoding function that can be used to transform the payload before compressing or sending it to the topic.
106105
encode?(payload: Payload): Uint8Array
@@ -484,7 +483,7 @@ export class TopicWriter<Payload = Uint8Array> implements Disposable, AsyncDispo
484483
if (this.#options.compression.codec !== Codec.RAW && data.length >= (this.#options.compression.minRawSize || MIN_RAW_SIZE)) {
485484
// Use custom compression function if provided, otherwise use default compression
486485
data = this.#options.compression.compress
487-
? this.#options.compression.compress(data)
486+
? await this.#options.compression.compress(data)
488487
: data; // Default to raw if no compression is applied
489488
} else {
490489
// If the payload is smaller than the minimum size, do not compress it

0 commit comments

Comments
 (0)