@@ -35,13 +35,14 @@ GLUE_IAM_ROLE=""
35
35
AWS_ACCOUNT=" "
36
36
KEYS_PER_TILE=0
37
37
ROWS_PER_WORKER=250000
38
- # Target type might be keyspace or parquet
39
38
TARGET_TYPE=keyspaces
40
39
SKIP_GLUE_CONNECTOR=false
41
40
SKIP_KEYSPACES_LEDGER=false
42
41
JSON_MAPPING=" "
43
42
REPLICATION_POINT_IN_TIME=0
44
43
REPLICATION_STATS_ENABLED=false
44
+ GLUE_MONITORING=false
45
+ SAFE_MODE=true
45
46
OS=$( uname -a | awk ' {print $1}' )
46
47
47
48
# Progress bar configuration
@@ -90,6 +91,7 @@ log() {
90
91
91
92
if [[ " $OS " == Linux || " $OS " == Darwin ]]; then
92
93
log " OS: $OS "
94
+ log " AWS CLI: $( aws --version) "
93
95
else
94
96
log " ERROR: Please run this script in AWS CloudShell or Linux/Darwin"
95
97
exit 1
@@ -193,10 +195,10 @@ function uploader_helper() {
193
195
local next_pos=$3
194
196
local final_pos=$4
195
197
check_file_exists " $path_to_conf /$artifact_name "
196
- progress $curr_pos $final_pos " Uploading $artifact_name "
198
+ progress $curr_pos $final_pos " Uploading $artifact_name "
197
199
if ls " $path_to_conf /$artifact_name " > /dev/null
198
200
then
199
- progress $next_pos $final_pos " Uploading $artifact_name "
201
+ progress $next_pos $final_pos " Uploading $artifact_name "
200
202
aws s3 cp " $path_to_conf " /" $artifact_name " " $S3_LANDING_ZONE " /artifacts/" $artifact_name " > /dev/null
201
203
else
202
204
log " ERROR: $path_to_conf /$artifact_name not found"
@@ -228,14 +230,14 @@ function barrier() {
228
230
}
229
231
230
232
function Usage_Exit {
231
- log " $0 [--state init/run/request-stop|--tiles number of tiles|--landing-zone s3Uri|--writetime-column col3|\
233
+ echo " $0 [--state init/run/request-stop|--tiles number of tiles|--landing-zone s3Uri|--writetime-column col3|\
232
234
--src-keyspace keyspace_name|--src-table table_name|--trg-keyspace keyspace_name|--trg-table table_name]"
233
- log " Script version:" ${MIGRATOR_VERSION}
234
- log " init - Deploy CQLReplicator Glue job, and download jars"
235
- log " run - Start migration process"
236
- log " stats - Upload progress. Only for historical workload"
237
- log " request-stop - Stop migration process"
238
- log " cleanup - Delete all CQLReplicator artifacts"
235
+ echo " Script version:" ${MIGRATOR_VERSION}
236
+ echo " init - Deploy CQLReplicator Glue job, and download jars"
237
+ echo " run - Start migration process"
238
+ echo " stats - Upload progress. Only for historical workload"
239
+ echo " request-stop - Stop migration process"
240
+ echo " cleanup - Delete all the CQLReplicator's artifacts"
239
241
exit 1
240
242
}
241
243
@@ -246,9 +248,9 @@ function Clean_Up {
246
248
aws s3 rb " $S3_LANDING_ZONE "
247
249
local connection_name
248
250
connection_name=$( aws glue get-job --job-name CQLReplicator --query ' Job.Connections.Connections[0]' --output text)
249
- aws glue delete-connection --connection-name " $connection_name " --region " $AWS_REGION "
250
- aws glue delete-connection --connection-name cql-replicator-memorydb-integration --region " $AWS_REGION " > /dev/null
251
- aws glue delete-connection --connection-name cql-replicator-opensearch-integration --region " $AWS_REGION " > /dev/null
251
+ aws glue delete-connection --connection-name " $connection_name " --region " $AWS_REGION " > /dev/null 2>&1
252
+ aws glue delete-connection --connection-name cql-replicator-memorydb-integration --region " $AWS_REGION " > /dev/null 2>&1
253
+ aws glue delete-connection --connection-name cql-replicator-opensearch-integration --region " $AWS_REGION " > /dev/null 2>&1
252
254
aws glue delete-job --job-name CQLReplicator --region " $AWS_REGION "
253
255
if [[ $SKIP_KEYSPACES_LEDGER == false ]]; then
254
256
aws keyspaces delete-keyspace --keyspace-name migration --region " $AWS_REGION "
@@ -335,8 +337,13 @@ function Init {
335
337
336
338
# Create Glue Connector
337
339
local glue_conn_name
340
+ local enhanced_monitoring=" "
341
+ if [[ " $GLUE_MONITORING " == true ]]; then
342
+ enhanced_monitoring=' ,"--enable-continuous-cloudwatch-log":"true","--enable-continuous-log-filter":"true","--enable-metrics":"true","--enable-observability-metrics":"true"'
343
+ fi
344
+
338
345
if [[ $SKIP_GLUE_CONNECTOR == false ]]; then
339
- progress 3 5 " Creating Glue connector and CQLReplicator job "
346
+ progress 3 5 " Creating Glue artifacts "
340
347
glue_conn_name=$( echo cql-replicator-" $( uuidgen) " | tr ' [:upper:]' ' [:lower:]' )
341
348
aws glue create-connection --connection-input ' {
342
349
"Name":"' $glue_conn_name ' ",
@@ -390,11 +397,12 @@ function Init {
390
397
"--extra-jars":"' $S3_LANDING_ZONE ' /artifacts/jedis-4.4.6.jar,' $S3_LANDING_ZONE ' /artifacts/spark-cassandra-connector-assembly_2.12-3.4.1.jar,' $S3_LANDING_ZONE ' /artifacts/resilience4j-retry-1.7.1.jar,' $S3_LANDING_ZONE ' /artifacts/resilience4j-core-1.7.1.jar,' $S3_LANDING_ZONE ' /artifacts/vavr-0.10.4.jar,' $S3_LANDING_ZONE ' /artifacts/aws-sigv4-auth-cassandra-java-driver-plugin-4.0.9.jar,' $S3_LANDING_ZONE ' /artifacts/opensearch-spark-30_2.12-1.0.1.jar",
391
398
"--conf":"spark.files=' $S3_LANDING_ZONE ' /artifacts/KeyspacesConnector.conf,' $S3_LANDING_ZONE ' /artifacts/CassandraConnector.conf --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.extensions=com.datastax.spark.connector.CassandraSparkExtensions --conf spark.kryoserializer.buffer.max=128m --conf spark.rdd.compress=true --conf spark.cleaner.periodicGC.interval=1min --conf spark.kryo.referenceTracking=false --conf spark.cleaner.referenceTracking.cleanCheckpoints=true --conf spark.task.maxFailures=64",
392
399
"--class":"GlueApp"
400
+ ' $enhanced_monitoring '
393
401
}' > /dev/null
394
402
fi
395
403
396
404
if [[ $SKIP_GLUE_CONNECTOR == true ]]; then
397
- progress 3 5 " Creating CQLReplicator job "
405
+ progress 3 5 " Creating Glue artifacts "
398
406
aws glue create-job \
399
407
--name " CQLReplicator" \
400
408
--role " $GLUE_IAM_ROLE " \
@@ -411,16 +419,17 @@ function Init {
411
419
"--extra-jars":"' $S3_LANDING_ZONE ' /artifacts/jedis-4.4.6.jar,' $S3_LANDING_ZONE ' /artifacts/spark-cassandra-connector-assembly_2.12-3.4.1.jar,' $S3_LANDING_ZONE ' /artifacts/resilience4j-retry-1.7.1.jar,' $S3_LANDING_ZONE ' /artifacts/resilience4j-core-1.7.1.jar,' $S3_LANDING_ZONE ' /artifacts/vavr-0.10.4.jar,' $S3_LANDING_ZONE ' /artifacts/aws-sigv4-auth-cassandra-java-driver-plugin-4.0.9.jar,' $S3_LANDING_ZONE ' /artifacts/opensearch-spark-30_2.12-1.0.1.jar",
412
420
"--conf":"spark.files=' $S3_LANDING_ZONE ' /artifacts/KeyspacesConnector.conf,' $S3_LANDING_ZONE ' /artifacts/CassandraConnector.conf --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.extensions=com.datastax.spark.connector.CassandraSparkExtensions --conf spark.kryoserializer.buffer.max=128m --conf spark.rdd.compress=true --conf spark.cleaner.periodicGC.interval=1min --conf spark.kryo.referenceTracking=false --conf spark.cleaner.referenceTracking.cleanCheckpoints=true --conf spark.task.maxFailures=64",
413
421
"--class":"GlueApp"
422
+ ' $enhanced_monitoring '
414
423
}' > /dev/null
415
424
fi
416
425
417
426
if [[ $SKIP_KEYSPACES_LEDGER == true ]]; then
418
- progress 4 5 " Skipping CQLReplicator's internal keyspace "
419
- progress 5 5 " Skipping CQLReplicator's internal table "
427
+ progress 4 5 " Skipping CQLReplicator's internal keyspace "
428
+ progress 5 5 " Skipping CQLReplicator's internal table "
420
429
fi
421
430
422
431
if [[ $SKIP_KEYSPACES_LEDGER == false ]]; then
423
- progress 4 5 " Creating CQLReplicator's internal resources "
432
+ progress 4 5 " Creating CQLReplicator's internal resources "
424
433
# Create a keyspace - migration
425
434
aws keyspaces create-keyspace --keyspace-name migration --region " $AWS_REGION " > /dev/null
426
435
sleep 20
@@ -438,7 +447,7 @@ function Init {
438
447
{ "name": "offload_status", "type": "text" } ],
439
448
"partitionKeys": [ { "name": "ks" }, { "name": "tbl" } ],
440
449
"clusteringKeys": [ { "name": "tile", "orderBy": "ASC" }, { "name": "ver", "orderBy": "ASC" } ] }' --region " $AWS_REGION " > /dev/null
441
- progress 5 5 " Creating CQLReplicator's internal resources "
450
+ progress 5 5 " Created the CQLReplicator internal resources "
442
451
fi
443
452
444
453
log " Deploy is completed"
@@ -463,6 +472,7 @@ function Start_Discovery {
463
472
log " TTL COLUMN:" $TTL_COLUMN
464
473
log " ROWS PER DPU:" $ROWS_PER_WORKER
465
474
log " START REPLICATING FROM: $REPLICATION_POINT_IN_TIME (0 is disabled)"
475
+ log " SAFE MODE: $SAFE_MODE "
466
476
local workers=$(( 1 + TILES / 2 ))
467
477
log " Checking if the discovery job is already running..."
468
478
check_discovery_runs " true"
@@ -478,6 +488,7 @@ function Start_Discovery {
478
488
"--TARGET_KS":"' $TARGET_KS ' ",
479
489
"--TARGET_TBL":"' $TARGET_TBL ' ",
480
490
"--WRITETIME_COLUMN":"' $WRITETIME_COLUMN ' ",
491
+ "--SAFE_MODE":"' $SAFE_MODE ' ",
481
492
"--OFFLOAD_LARGE_OBJECTS":"' $OFFLOAD_LARGE_OBJECTS_B64 ' ",
482
493
"--REPLICATION_POINT_IN_TIME":"' $REPLICATION_POINT_IN_TIME ' ",
483
494
"--TTL_COLUMN":"' $TTL_COLUMN ' "}' --output text)
@@ -504,6 +515,7 @@ function Start_Replication {
504
515
"--TARGET_KS":"' $TARGET_KS ' ",
505
516
"--TARGET_TBL":"' $TARGET_TBL ' ",
506
517
"--WRITETIME_COLUMN":"' $WRITETIME_COLUMN ' ",
518
+ "--SAFE_MODE":"' $SAFE_MODE ' ",
507
519
"--OFFLOAD_LARGE_OBJECTS":"' $OFFLOAD_LARGE_OBJECTS_B64 ' ",
508
520
"--REPLICATION_POINT_IN_TIME":"' $REPLICATION_POINT_IN_TIME ' ",
509
521
"--TTL_COLUMN":"' $TTL_COLUMN ' "}' --output text)
@@ -705,6 +717,14 @@ while (( "$#" )); do
705
717
REPLICATION_STATS_ENABLED=true
706
718
shift 1
707
719
;;
720
+ --enhanced-monitoring-enabled)
721
+ GLUE_MONITORING=true
722
+ shift 1
723
+ ;;
724
+ --safe-mode-disabled)
725
+ SAFE_MODE=false
726
+ shift 1
727
+ ;;
708
728
--)
709
729
shift
710
730
break
0 commit comments