quixio
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 5 deletions b/‎pyproject.toml
Lines changed: 1 addition & 5 deletions
diff --git a/‎quixstreams/models/serializers/avro.py
Lines changed: 8 additions & 1 deletion b/‎quixstreams/models/serializers/avro.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎quixstreams/models/serializers/json.py
Lines changed: 8 additions & 1 deletion b/‎quixstreams/models/serializers/json.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎quixstreams/models/serializers/protobuf.py
Lines changed: 111 additions & 18 deletions b/‎quixstreams/models/serializers/protobuf.py
Lines changed: 111 additions & 18 deletions
diff --git a/‎quixstreams/models/serializers/schema_registry.py
Lines changed: 22 additions & 1 deletion b/‎quixstreams/models/serializers/schema_registry.py
Lines changed: 22 additions & 1 deletion
diff --git a/‎quixstreams/utils/settings.py
Lines changed: 7 additions & 2 deletions b/‎quixstreams/utils/settings.py
Lines changed: 7 additions & 2 deletions
diff --git a/‎tests/test_quixstreams/test_models/test_serializers/protobuf/generate.sh
Lines changed: 23 additions & 0 deletions b/‎tests/test_quixstreams/test_models/test_serializers/protobuf/generate.sh
Lines changed: 23 additions & 0 deletions
diff --git a/‎tests/test_quixstreams/test_models/test_serializers/protobuf/nested.proto
Lines changed: 10 additions & 0 deletions b/‎tests/test_quixstreams/test_models/test_serializers/protobuf/nested.proto
Lines changed: 10 additions & 0 deletions
diff --git a/‎tests/test_quixstreams/test_models/test_serializers/protobuf/nested_pb2.py
Lines changed: 37 additions & 0 deletions b/‎tests/test_quixstreams/test_models/test_serializers/protobuf/nested_pb2.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎tests/test_quixstreams/test_models/test_serializers/protobuf/root.proto
Lines changed: 17 additions & 0 deletions b/‎tests/test_quixstreams/test_models/test_serializers/protobuf/root.proto
Lines changed: 17 additions & 0 deletions
@@ -53,11 +53,7 @@ ignore-init-method-arguments = true
 line-length = 88
 target-version = ['py38']
 include = '\.pyi?$'
-extend-exclude = '''
-/(
-  env
-)/
-'''
+force-exclude = '.*_pb2\.py$'
 
 [tool.isort]
 atomic = true
 
@@ -70,7 +70,14 @@ def __init__(
 
             serialization_config = {}
             if schema_registry_serialization_config:
-                serialization_config = schema_registry_serialization_config.as_dict()
+                serialization_config = schema_registry_serialization_config.as_dict(
+                    include={
+                        "auto_register_schemas",
+                        "normalize_schemas",
+                        "use_latest_version",
+                        "subject_name_strategy",
+                    },
+                )
 
             self._schema_registry_serializer = _AvroSerializer(
                 schema_registry_client=SchemaRegistryClient(client_config),
 
@@ -74,7 +74,14 @@ def __init__(
 
             serialization_config = {}
             if schema_registry_serialization_config:
-                serialization_config = schema_registry_serialization_config.as_dict()
+                serialization_config = schema_registry_serialization_config.as_dict(
+                    include={
+                        "auto_register_schemas",
+                        "normalize_schemas",
+                        "use_latest_version",
+                        "subject_name_strategy",
+                    },
+                )
 
             self._schema_registry_serializer = _JSONSerializer(
                 schema_str=json.dumps(schema),
 
@@ -1,10 +1,21 @@
-from typing import Union, Mapping, Iterable, Dict
+from typing import Dict, Iterable, Mapping, Optional, Union
+
+from confluent_kafka.schema_registry import SchemaRegistryClient, SchemaRegistryError
+from confluent_kafka.schema_registry.protobuf import (
+    ProtobufDeserializer as _ProtobufDeserializer,
+    ProtobufSerializer as _ProtobufSerializer,
+)
+from confluent_kafka.serialization import SerializationError as _SerializationError
+from google.protobuf.json_format import MessageToDict, ParseDict, ParseError
+from google.protobuf.message import DecodeError, EncodeError, Message
 
-from .base import Serializer, Deserializer, SerializationContext
+from .base import Deserializer, SerializationContext, Serializer
 from .exceptions import SerializationError
+from .schema_registry import (
+    SchemaRegistryClientConfig,
+    SchemaRegistrySerializationConfig,
+)
 
-from google.protobuf.message import Message, DecodeError, EncodeError
-from google.protobuf.json_format import MessageToDict, ParseDict, ParseError
 
 __all__ = ("ProtobufSerializer", "ProtobufDeserializer")
 
@@ -15,6 +26,10 @@ def __init__(
         msg_type: Message,
         deterministic: bool = False,
         ignore_unknown_fields: bool = False,
+        schema_registry_client_config: Optional[SchemaRegistryClientConfig] = None,
+        schema_registry_serialization_config: Optional[
+            SchemaRegistrySerializationConfig
+        ] = None,
     ):
         """
         Serializer that returns data in protobuf format.
@@ -26,26 +41,68 @@ def __init__(
             Default - `False`
         :param ignore_unknown_fields: If True, do not raise errors for unknown fields.
             Default - `False`
+        :param schema_registry_client_config: If provided, serialization is offloaded to Confluent's ProtobufSerializer.
+            Default - `None`
+        :param schema_registry_serialization_config: Additional configuration for Confluent's ProtobufSerializer.
+            Default - `None`
+            >***NOTE:*** `schema_registry_client_config` must also be set.
         """
         super().__init__()
         self._msg_type = msg_type
 
         self._deterministic = deterministic
         self._ignore_unknown_fields = ignore_unknown_fields
 
+        self._schema_registry_serializer = None
+        if schema_registry_client_config:
+            client_config = schema_registry_client_config.as_dict(
+                plaintext_secrets=True,
+            )
+
+            if schema_registry_serialization_config:
+                serialization_config = schema_registry_serialization_config.as_dict()
+            else:
+                # The use.deprecated.format has been mandatory since Confluent Kafka version 1.8.2.
+                # https://github.com/confluentinc/confluent-kafka-python/releases/tag/v1.8.2
+                serialization_config = SchemaRegistrySerializationConfig().as_dict(
+                    include={"use_deprecated_format"},
+                )
+
+            self._schema_registry_serializer = _ProtobufSerializer(
+                msg_type=msg_type,
+                schema_registry_client=SchemaRegistryClient(client_config),
+                conf=serialization_config,
+            )
+
     def __call__(
         self, value: Union[Dict, Message], ctx: SerializationContext
     ) -> Union[str, bytes]:
+        if isinstance(value, self._msg_type):
+            msg = value
+        else:
+            try:
+                msg = ParseDict(
+                    value,
+                    self._msg_type(),
+                    ignore_unknown_fields=self._ignore_unknown_fields,
+                )
+            except TypeError as exc:
+                raise SerializationError(
+                    "Value to serialize must be of type "
+                    f"`{self._msg_type}` or dict, not `{type(value)}`."
+                ) from exc
+            except ParseError as exc:
+                raise SerializationError(str(exc)) from exc
+
+        if self._schema_registry_serializer is not None:
+            try:
+                return self._schema_registry_serializer(msg, ctx)
+            except (SchemaRegistryError, _SerializationError) as exc:
+                raise SerializationError(str(exc)) from exc
 
         try:
-            if isinstance(value, self._msg_type):
-                return value.SerializeToString(deterministic=self._deterministic)
-
-            msg = self._msg_type()
-            return ParseDict(
-                value, msg, ignore_unknown_fields=self._ignore_unknown_fields
-            ).SerializeToString(deterministic=self._deterministic)
-        except (EncodeError, ParseError) as exc:
+            return msg.SerializeToString(deterministic=self._deterministic)
+        except EncodeError as exc:
             raise SerializationError(str(exc)) from exc
 
 
@@ -56,6 +113,10 @@ def __init__(
         use_integers_for_enums: bool = False,
         preserving_proto_field_name: bool = False,
         to_dict: bool = True,
+        schema_registry_client_config: Optional[SchemaRegistryClientConfig] = None,
+        schema_registry_serialization_config: Optional[
+            SchemaRegistrySerializationConfig
+        ] = None,
     ):
         """
         Deserializer that parses protobuf data into a dictionary suitable for a StreamingDataframe.
@@ -71,6 +132,11 @@ def __init__(
             Default - `False`
         :param to_dict: If false, return the protobuf message instead of a dict.
             Default - `True`
+        :param schema_registry_client_config: If provided, deserialization is offloaded to Confluent's ProtobufDeserializer.
+            Default - `None`
+        :param schema_registry_serialization_config: Additional configuration for Confluent's ProtobufDeserializer.
+            Default - `None`
+            >***NOTE:*** `schema_registry_client_config` must also be set.
         """
         super().__init__()
         self._msg_type = msg_type
@@ -79,15 +145,42 @@ def __init__(
         self._use_integers_for_enums = use_integers_for_enums
         self._preserving_proto_field_name = preserving_proto_field_name
 
+        # Confluent's ProtobufDeserializer is not utilizing the
+        # Schema Registry. However, we still accept a fully qualified
+        # SchemaRegistryClientConfig to maintain a unified API and ensure
+        # future compatibility in case we choose to bypass Confluent
+        # and interact with the Schema Registry directly.
+        # On the other hand, ProtobufDeserializer requires
+        # conf dict with a single key: `use.deprecated.format`.
+        self._schema_registry_deserializer = None
+        if schema_registry_client_config:
+
+            # The use.deprecated.format has been mandatory since Confluent Kafka version 1.8.2.
+            # https://github.com/confluentinc/confluent-kafka-python/releases/tag/v1.8.2
+            serialization_config = (
+                schema_registry_serialization_config
+                or SchemaRegistrySerializationConfig()
+            ).as_dict(include={"use_deprecated_format"})
+
+            self._schema_registry_deserializer = _ProtobufDeserializer(
+                message_type=msg_type,
+                conf=serialization_config,
+            )
+
     def __call__(
         self, value: bytes, ctx: SerializationContext
     ) -> Union[Iterable[Mapping], Mapping, Message]:
-        msg = self._msg_type()
-
-        try:
-            msg.ParseFromString(value)
-        except DecodeError as exc:
-            raise SerializationError(str(exc)) from exc
+        if self._schema_registry_deserializer is not None:
+            try:
+                msg = self._schema_registry_deserializer(value, ctx)
+            except (_SerializationError, DecodeError) as exc:
+                raise SerializationError(str(exc)) from exc
+        else:
+            msg = self._msg_type()
+            try:
+                msg.ParseFromString(value)
+            except DecodeError as exc:
+                raise SerializationError(str(exc)) from exc
 
         if not self._to_dict:
             return msg
 
@@ -1,7 +1,11 @@
 from typing import Callable, Optional
 
 from pydantic import SecretStr
-from confluent_kafka.schema_registry import topic_subject_name_strategy
+from confluent_kafka.schema_registry import (
+    reference_subject_name_strategy,
+    SchemaReference,
+    topic_subject_name_strategy,
+)
 
 from quixstreams.utils.settings import BaseSettings
 from quixstreams.models.serializers import SerializationContext
@@ -12,6 +16,7 @@
 ]
 
 SubjectNameStrategy = Callable[[SerializationContext, str], str]
+ReferenceSubjectNameStrategy = Callable[[SerializationContext, SchemaReference], str]
 
 
 class SchemaRegistryClientConfig(BaseSettings):
@@ -57,9 +62,25 @@ class SchemaRegistrySerializationConfig(BaseSettings):
         Defines how Schema Registry subject names are constructed. Standard naming
         strategies are defined in the confluent_kafka.schema_registry namespace.
         Defaults to topic_subject_name_strategy.
+    :param skip_known_types: Whether or not to skip known types when resolving
+        schema dependencies. Defaults to False.
+    :param reference_subject_name_strategy: Defines how Schema Registry subject names
+        for schema references are constructed. Defaults to reference_subject_name_strategy.
+    :param use_deprecated_format: Specifies whether the Protobuf serializer should
+        serialize message indexes without zig-zag encoding. This option must be explicitly
+        configured as older and newer Protobuf producers are incompatible.
+        If the consumers of the topic being produced to are using confluent-kafka-python <1.8,
+        then this property must be set to True until all old consumers have been upgraded.
     """
 
     auto_register_schemas: bool = True
     normalize_schemas: bool = False
     use_latest_version: bool = False
     subject_name_strategy: SubjectNameStrategy = topic_subject_name_strategy
+
+    # Protobuf-only atrributes
+    skip_known_types: bool = False
+    reference_subject_name_strategy: ReferenceSubjectNameStrategy = (
+        reference_subject_name_strategy
+    )
+    use_deprecated_format: bool = False
@@ -1,3 +1,5 @@
+from typing import Optional, Set
+
 from pydantic import AliasGenerator, SecretStr
 from pydantic_settings import BaseSettings as _BaseSettings, SettingsConfigDict
 
@@ -13,14 +15,17 @@ class BaseSettings(_BaseSettings):
         ),
     )
 
-    def as_dict(self, plaintext_secrets: bool = False) -> dict:
+    def as_dict(
+        self, plaintext_secrets: bool = False, include: Optional[Set[str]] = None
+    ) -> dict:
         """
         Dump any non-empty config values as a dictionary.
 
         :param plaintext_secrets: whether secret values are plaintext or obscured (***)
+        :param include: optional list of fields to be included in the dictionary
         :return: a dictionary
         """
-        dump = self.model_dump(by_alias=True, exclude_none=True)
+        dump = self.model_dump(by_alias=True, exclude_none=True, include=include)
         if plaintext_secrets:
             for field, value in dump.items():
                 if isinstance(value, SecretStr):
 
@@ -0,0 +1,23 @@
+# This helper script will convert *.proto into *_pb2.py
+# and fix the annoying absolute import problem
+# (more at https://github.com/protocolbuffers/protobuf/issues/1491)
+
+# Usage:
+# $ cd cd tests/test_quixstreams/test_models/protobuf
+# $ ./generate.sh
+
+# Generate Python code for all .proto files in the current directory
+for proto_file in *.proto; do
+  protoc --python_out=. "$proto_file"
+done
+
+# Fix the import paths in the generated *_pb2.py files
+for pb2_file in *_pb2.py; do
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    # macOS version of sed (BSD sed)
+    sed -i '' -E 's/^import ([a-zA-Z_][a-zA-Z0-9_]*)_pb2 as (.*)/from . import \1_pb2 as \2/' "$pb2_file"
+  else
+    # GNU sed (Linux, etc.)
+    sed -i -E 's/^import ([a-zA-Z_][a-zA-Z0-9_]*)_pb2 as (.*)/from . import \1_pb2 as \2/' "$pb2_file"
+  fi
+done
@@ -0,0 +1,10 @@
+syntax = "proto3";
+
+import "google/protobuf/timestamp.proto";
+
+package schemas.v1;
+
+message Nested {
+  int32 id = 1;
+  google.protobuf.Timestamp time = 2;
+}
@@ -0,0 +1,17 @@
+syntax = "proto3";
+
+import "nested.proto";
+
+package schemas.v1;
+
+enum Letter {
+  A = 0;
+  B = 1;
+}
+
+message Root {
+  string name = 1;
+  int32 id = 2;
+  Letter enum = 3;
+  Nested nested = 4;
+}