chore(deps): update apache/beam_python3.11_sdk docker tag to v2.59.0 (#12550)

renovate-bot · gcf-owl-bot[bot] · davidcavazos · web-flow · commit 11ebb7796020 · 2024-09-05T14:03:15.000-07:00
* chore(deps): update apache/beam_python3.11_sdk docker tag to v2.59.0 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * match requirement version to docker version * use beam 2.58 since 2.59 is not out yet --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: David Cavazos <dcavazos@google.com>
diff --git a/dataflow/snippets/Dockerfile b/dataflow/snippets/Dockerfile
@@ -22,7 +22,7 @@ FROM ubuntu:focal
 
 WORKDIR /pipeline
 
-COPY --from=apache/beam_python3.11_sdk:2.57.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.11_sdk:2.58.0 /opt/apache/beam /opt/apache/beam
 ENTRYPOINT [ "/opt/apache/beam/boot" ]
 
 COPY requirements.txt .
diff --git a/dataflow/snippets/batch_write_storage.py b/dataflow/snippets/batch_write_storage.py
@@ -24,7 +24,7 @@
 from typing_extensions import Self
 
 
-def write_to_cloud_storage(argv : List[str] = None) -> None:
+def write_to_cloud_storage(argv: List[str] = None) -> None:
     # Parse the pipeline options passed into the application.
     class MyOptions(PipelineOptions):
         @classmethod
@@ -41,6 +41,8 @@ def _add_argparse_args(cls: Self, parser: argparse.ArgumentParser) -> None:
             | "Create elements" >> beam.Create(wordsList)
             | "Write Files" >> WriteToText(options.output, file_name_suffix=".txt")
         )
+
+
 # [END dataflow_batch_write_to_storage]
 
 
diff --git a/dataflow/snippets/read_kafka.py b/dataflow/snippets/read_kafka.py
@@ -25,7 +25,6 @@
 
 
 def read_from_kafka() -> None:
-
     # Parse the pipeline options passed into the application. Example:
     #     --topic=$KAFKA_TOPIC --bootstrap_server=$BOOTSTRAP_SERVER
     #     --output=$CLOUD_STORAGE_BUCKET --streaming
@@ -34,34 +33,33 @@ def read_from_kafka() -> None:
     class MyOptions(PipelineOptions):
         @staticmethod
         def _add_argparse_args(parser: argparse.ArgumentParser) -> None:
-            parser.add_argument('--topic')
-            parser.add_argument('--bootstrap_server')
-            parser.add_argument('--output')
+            parser.add_argument("--topic")
+            parser.add_argument("--bootstrap_server")
+            parser.add_argument("--output")
 
     options = MyOptions()
     with beam.Pipeline(options=options) as pipeline:
         (
             pipeline
             # Read messages from an Apache Kafka topic.
             | ReadFromKafka(
-                consumer_config={
-                    "bootstrap.servers": options.bootstrap_server
-                },
+                consumer_config={"bootstrap.servers": options.bootstrap_server},
                 topics=[options.topic],
                 with_metadata=False,
                 max_num_records=5,
-                start_read_time=0
+                start_read_time=0,
             )
             # The previous step creates a key-value collection, keyed by message ID.
             # The values are the message payloads.
             | beam.Values()
             # Subdivide the output into fixed 5-second windows.
             | beam.WindowInto(window.FixedWindows(5))
             | WriteToText(
-                file_path_prefix=options.output,
-                file_name_suffix='.txt',
-                num_shards=1)
+                file_path_prefix=options.output, file_name_suffix=".txt", num_shards=1
+            )
         )
+
+
 # [END dataflow_kafka_read]
 
 
diff --git a/dataflow/snippets/requirements.txt b/dataflow/snippets/requirements.txt
@@ -1,2 +1,2 @@
-apache-beam[gcp]==2.50.0
+apache-beam[gcp]==2.58.0
 kafka-python==2.0.2
diff --git a/dataflow/snippets/tests/test_batch_write_storage.py b/dataflow/snippets/tests/test_batch_write_storage.py
@@ -22,7 +22,7 @@
 from ..batch_write_storage import write_to_cloud_storage
 
 
-bucket_name = f'test-bucket-{uuid.uuid4()}'
+bucket_name = f"test-bucket-{uuid.uuid4()}"
 storage_client = storage.Client()
 
 
@@ -39,7 +39,7 @@ def setup_and_teardown() -> None:
 
 
 def test_write_to_cloud_storage(setup_and_teardown: None) -> None:
-    sys.argv = ['', f'--output=gs://{bucket_name}/output/out-']
+    sys.argv = ["", f"--output=gs://{bucket_name}/output/out-"]
     write_to_cloud_storage()
 
     blobs = list(storage_client.list_blobs(bucket_name))
diff --git a/dataflow/snippets/tests/test_read_kafka.py b/dataflow/snippets/tests/test_read_kafka.py
@@ -25,16 +25,18 @@
 import pytest
 
 
-BOOTSTRAP_SERVER = 'localhost:9092'
-TOPIC_NAME = f'topic-{uuid.uuid4()}'
-CONTAINER_IMAGE_NAME = 'kafka-pipeline:1'
+BOOTSTRAP_SERVER = "localhost:9092"
+TOPIC_NAME = f"topic-{uuid.uuid4()}"
+CONTAINER_IMAGE_NAME = "kafka-pipeline:1"
 
 
-@pytest.fixture(scope='module', autouse=True)
+@pytest.fixture(scope="module", autouse=True)
 def kafka_container() -> None:
     # Start a containerized Kafka server.
     docker_client = docker.from_env()
-    container = docker_client.containers.run('apache/kafka:3.7.0', network_mode='host', detach=True)
+    container = docker_client.containers.run(
+        "apache/kafka:3.7.0", network_mode="host", detach=True
+    )
     try:
         create_topic()
         yield
@@ -48,41 +50,43 @@ def create_topic() -> None:
         try:
             client = KafkaAdminClient(bootstrap_servers=BOOTSTRAP_SERVER)
             topics = []
-            topics.append(NewTopic(name=TOPIC_NAME, num_partitions=1, replication_factor=1))
+            topics.append(
+                NewTopic(name=TOPIC_NAME, num_partitions=1, replication_factor=1)
+            )
             client.create_topics(topics)
             break
         except NoBrokersAvailable:
             time.sleep(5)
 
 
 def test_read_from_kafka(tmp_path: Path) -> None:
-
-    file_name_prefix = f'output-{uuid.uuid4()}'
-    file_name = f'{tmp_path}/{file_name_prefix}-00000-of-00001.txt'
+    file_name_prefix = f"output-{uuid.uuid4()}"
+    file_name = f"{tmp_path}/{file_name_prefix}-00000-of-00001.txt"
 
     # Send some messages to Kafka
     producer = KafkaProducer(bootstrap_servers=BOOTSTRAP_SERVER)
     for i in range(0, 5):
-        message = f'event-{i}'
+        message = f"event-{i}"
         producer.send(TOPIC_NAME, message.encode())
 
     # Build a container image for the pipeline.
     client = docker.from_env()
-    client.images.build(path='./', tag=CONTAINER_IMAGE_NAME)
+    client.images.build(path="./", tag=CONTAINER_IMAGE_NAME)
 
     # Run the pipeline.
     client.containers.run(
         image=CONTAINER_IMAGE_NAME,
-        command=f'/pipeline/read_kafka.py --output /out/{file_name_prefix} --bootstrap_server {BOOTSTRAP_SERVER} --topic {TOPIC_NAME}',
-        volumes=['/var/run/docker.sock:/var/run/docker.sock', f'{tmp_path}/:/out'],
-        network_mode='host',
-        entrypoint='python')
+        command=f"/pipeline/read_kafka.py --output /out/{file_name_prefix} --bootstrap_server {BOOTSTRAP_SERVER} --topic {TOPIC_NAME}",
+        volumes=["/var/run/docker.sock:/var/run/docker.sock", f"{tmp_path}/:/out"],
+        network_mode="host",
+        entrypoint="python",
+    )
 
     # Verify the pipeline wrote the Kafka messages to the output file.
-    with open(file_name, 'r') as f:
+    with open(file_name, "r") as f:
         text = f.read()
         for i in range(0, 5):
-            assert f'event-{i}' in text
+            assert f"event-{i}" in text
 
 
 if __name__ == "__main__":
diff --git a/dataflow/snippets/tests/test_write_pubsub.py b/dataflow/snippets/tests/test_write_pubsub.py
@@ -25,8 +25,8 @@
 from ..write_pubsub import write_to_pubsub
 
 
-topic_id = f'test-topic-{uuid.uuid4()}'
-subscription_id = f'{topic_id}-sub'
+topic_id = f"test-topic-{uuid.uuid4()}"
+subscription_id = f"{topic_id}-sub"
 project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
 
 publisher = pubsub_v1.PublisherClient()
@@ -48,8 +48,7 @@ def setup_and_teardown() -> None:
         )
         yield
     finally:
-        subscriber.delete_subscription(
-            request={"subscription": subscription_path})
+        subscriber.delete_subscription(request={"subscription": subscription_path})
         publisher.delete_topic(request={"topic": topic_path})
 
 
@@ -76,7 +75,7 @@ def read_messages() -> None:
             request={"subscription": subscription_path, "ack_ids": ack_ids}
         )
 
-        if (len(received_messages) >= NUM_MESSAGES):
+        if len(received_messages) >= NUM_MESSAGES:
             break
 
         time.sleep(5)
@@ -86,10 +85,10 @@ def read_messages() -> None:
 
 def test_write_to_pubsub(setup_and_teardown: None) -> None:
     topic_path = publisher.topic_path(project_id, topic_id)
-    with patch("sys.argv", ["", '--streaming', f'--topic={topic_path}']):
+    with patch("sys.argv", ["", "--streaming", f"--topic={topic_path}"]):
         write_to_pubsub()
 
         # Read from Pub/Sub to verify the pipeline successfully wrote messages.
         # Duplicate reads are possible.
         messages = read_messages()
-        assert (len(messages) >= NUM_MESSAGES)
+        assert len(messages) >= NUM_MESSAGES
diff --git a/dataflow/snippets/write_pubsub.py b/dataflow/snippets/write_pubsub.py
@@ -32,17 +32,13 @@ def item_to_message(item: Dict[str, Any]) -> PubsubMessage:
     # https://cloud.google.com/dataflow/docs/guides/common-errors#name-error
     from apache_beam.io import PubsubMessage
 
-    attributes = {
-        'buyer': item['name'],
-        'timestamp': str(item['ts'])
-    }
-    data = bytes(item['product'], 'utf-8')
+    attributes = {"buyer": item["name"], "timestamp": str(item["ts"])}
+    data = bytes(item["product"], "utf-8")
 
     return PubsubMessage(data=data, attributes=attributes)
 
 
 def write_to_pubsub(argv: List[str] = None) -> None:
-
     # Parse the pipeline options passed into the application. Example:
     #     --topic=$TOPIC_PATH --streaming
     # For more information, see
@@ -54,10 +50,10 @@ def _add_argparse_args(cls: Self, parser: argparse.ArgumentParser) -> None:
             parser.add_argument("--topic", required=True)
 
     example_data = [
-        {'name': 'Robert', 'product': 'TV', 'ts': 1613141590000},
-        {'name': 'Maria', 'product': 'Phone', 'ts': 1612718280000},
-        {'name': 'Juan', 'product': 'Laptop', 'ts': 1611618000000},
-        {'name': 'Rebeca', 'product': 'Video game', 'ts': 1610000000000}
+        {"name": "Robert", "product": "TV", "ts": 1613141590000},
+        {"name": "Maria", "product": "Phone", "ts": 1612718280000},
+        {"name": "Juan", "product": "Laptop", "ts": 1611618000000},
+        {"name": "Rebeca", "product": "Video game", "ts": 1610000000000},
     ]
     options = MyOptions()
 
@@ -66,12 +62,12 @@ def _add_argparse_args(cls: Self, parser: argparse.ArgumentParser) -> None:
             pipeline
             | "Create elements" >> beam.Create(example_data)
             | "Convert to Pub/Sub messages" >> beam.Map(item_to_message)
-            | WriteToPubSub(
-                  topic=options.topic,
-                  with_attributes=True)
+            | WriteToPubSub(topic=options.topic, with_attributes=True)
         )
 
-    print('Pipeline ran successfully.')
+    print("Pipeline ran successfully.")
+
+
 # [END dataflow_pubsub_write_with_attributes]
 
 

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-apache-beam[gcp]==2.50.0`
	`1`	`+apache-beam[gcp]==2.58.0`
`2`	`2`	`kafka-python==2.0.2`