Skip to content

Commit eb22237

Browse files
dervoetirazvan
andauthored
chore(nifi): Bump to 2.2.0 (#167)
* chore(nifi): Bump to 2.2.0 * Update demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml Co-authored-by: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> * fix: delete unneeded json files / introduce SplitRecord to fix earthquake demo * temp changes for testing * fix: use main branch instead of release branch --------- Co-authored-by: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com>
1 parent 1e56f47 commit eb22237

File tree

17 files changed

+1397
-15789
lines changed

17 files changed

+1397
-15789
lines changed

.github/workflows/dev_nifi.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ env:
55
IMAGE_NAME: nifi
66
# TODO (@NickLarsenNZ): Use a versioned image with stackable0.0.0-dev or stackableXX.X.X so that
77
# the demo is reproducable for the release and it will be automatically replaced for the release branch.
8-
IMAGE_VERSION: 1.28.1-postgresql
8+
IMAGE_VERSION: 2.2.0-postgresql
99
REGISTRY_PATH: stackable
1010
DOCKERFILE_PATH: "demos/signal-processing/Dockerfile-nifi"
1111

demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.xml

Lines changed: 0 additions & 7893 deletions
This file was deleted.

demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml

Lines changed: 84 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@ spec:
1414
containers:
1515
- name: create-nifi-ingestion-job
1616
image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev
17-
command: ["bash", "-c", "curl -O https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.xml && python -u /tmp/script/script.py"]
17+
command:
18+
- bash
19+
- -euo
20+
- pipefail
21+
- -c
22+
- python -u /tmp/script/script.py
1823
volumeMounts:
1924
- name: script
2025
mountPath: /tmp/script
@@ -41,9 +46,8 @@ metadata:
4146
name: create-nifi-ingestion-job-script
4247
data:
4348
script.py: |
44-
from nipyapi.canvas import get_root_pg_id, schedule_process_group, list_all_controllers, schedule_controller
49+
from nipyapi.canvas import get_root_pg_id, schedule_process_group, list_all_controllers, schedule_controller, update_controller
4550
from nipyapi.security import service_login
46-
from nipyapi.templates import get_template, upload_template, deploy_template
4751
import nipyapi
4852
import os
4953
import urllib3
@@ -52,26 +56,94 @@ data:
5256
ENDPOINT = f"https://nifi-node-default-0.nifi-node-default.{os.environ['NAMESPACE']}.svc.cluster.local:8443" # For local testing / developing replace it, afterwards change back to f"https://nifi-node-default-0.nifi-node-default.{os.environ['NAMESPACE']}.svc.cluster.local:8443"
5357
USERNAME = "admin"
5458
PASSWORD = open("/nifi-admin-credentials-secret/admin").read()
55-
TEMPLATE_NAME = "LakehouseKafkaIngest"
56-
TEMPLATE_FILE = f"{TEMPLATE_NAME}.xml"
5759
5860
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
5961
6062
nipyapi.config.nifi_config.host = f"{ENDPOINT}/nifi-api"
6163
nipyapi.config.nifi_config.verify_ssl = False
6264
63-
print("Logging in")
65+
print(f"Logging in as {USERNAME}")
6466
service_login(username=USERNAME, password=PASSWORD)
6567
print("Logged in")
6668
67-
pg_id = get_root_pg_id()
69+
organization = "stackabletech"
70+
repository = "demos"
71+
branch = "main"
72+
version = "main"
73+
directory = "demos/data-lakehouse-iceberg-trino-spark"
74+
flow_name = "LakehouseKafkaIngest"
75+
76+
# Check if the GitHub flow registry client already exists
77+
flow_registry_clients = nipyapi.nifi.ControllerApi().get_flow_registry_clients().registries
78+
79+
github_client = None
80+
for client in flow_registry_clients:
81+
if client.component.name == "GitHubFlowRegistryClient":
82+
github_client = client
83+
print("Found existing GitHub flow registry client")
84+
break
6885
69-
upload_template(pg_id, TEMPLATE_FILE)
86+
if not github_client:
87+
print("Creating new GitHub flow registry client")
88+
github_client = nipyapi.nifi.ControllerApi().create_flow_registry_client(
89+
body={
90+
"revision": {"version": 0},
91+
"component": {
92+
"name": "GitHubFlowRegistryClient",
93+
"type": "org.apache.nifi.github.GitHubFlowRegistryClient",
94+
"properties": {
95+
"Repository Owner": organization,
96+
"Repository Name": repository,
97+
},
98+
"bundle": {
99+
"group": "org.apache.nifi",
100+
"artifact": "nifi-github-nar",
101+
"version": "2.2.0",
102+
},
103+
},
104+
}
105+
)
106+
107+
pg_id = get_root_pg_id()
70108
71-
template_id = get_template(TEMPLATE_NAME).id
72-
deploy_template(pg_id, template_id, 200, 0)
109+
try:
110+
# Create process group from the file in the Git repo
111+
nipyapi.nifi.ProcessGroupsApi().create_process_group(
112+
id=pg_id,
113+
body={
114+
"revision": {"version": 0},
115+
"component": {
116+
"position": {"x": 300, "y": 10},
117+
"versionControlInformation": {
118+
"registryId": github_client.component.id,
119+
"flowId": flow_name,
120+
"bucketId": directory,
121+
"branch": branch,
122+
"version": version,
123+
},
124+
},
125+
},
126+
)
127+
except ValueError as e:
128+
# Ignore, because nipyapi can't handle non-int versions yet
129+
if "invalid literal for int() with base 10" in str(e):
130+
print("Ignoring ValueError")
131+
else:
132+
raise e
73133
74-
for controller in list_all_controllers():
75-
schedule_controller(controller, scheduled=True)
134+
# Scheduling the `Kafka3ConnectionService` fails, if it is started before `StandardRestrictedSSLContextService`, since it depends on it
135+
# To work around this, we try to schedule the controllers multiple times
136+
# If `Kafka3ConnectionService` is started before `StandardRestrictedSSLContextService`, scheduling it will fail in the first iteration
137+
# But it should succeed in the second attempt, since by then `StandardRestrictedSSLContextService` is started
138+
max_retries = 2
139+
for _ in range(max_retries):
140+
controllers = list_all_controllers(pg_id)
141+
for controller in controllers:
142+
if controller.component.state != "ENABLED":
143+
try:
144+
schedule_controller(controller, scheduled=True)
145+
print(f"Scheduled controller: {controller.component.name}")
146+
except Exception as e:
147+
print(f"Failed to schedule controller {controller.component.name}: {e}")
76148
77149
schedule_process_group(pg_id, scheduled=True)

demos/nifi-kafka-druid-earthquake-data/IngestEarthquakesToKafka.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)