@@ -349,34 +349,156 @@ export class SentinelFramework extends DockerBase {
349
349
) ;
350
350
}
351
351
352
- return [
353
- ...await Promise . all ( promises )
354
- ]
352
+ const sentinels = await Promise . all ( promises ) ;
353
+
354
+ await this . validateSentinelConfiguration ( sentinels ) ;
355
+
356
+ return sentinels ;
357
+ }
358
+
359
+ // Add this new method to validate sentinel configuration
360
+ private async validateSentinelConfiguration (
361
+ sentinels : Awaited < ReturnType < SentinelFramework [ 'spawnRedisSentinelSentinelDocker' ] > > [ ]
362
+ ) : Promise < void > {
363
+ const maxRetries = 10 ;
364
+ const retryDelay = 2000 ;
365
+
366
+ // Wait for all sentinels to recognize each other
367
+ for ( let retry = 0 ; retry < maxRetries ; retry ++ ) {
368
+ try {
369
+ let allConfigured = true ;
370
+
371
+ // Check that each sentinel recognizes all other sentinels
372
+ for ( const sentinel of sentinels ) {
373
+ if ( ! sentinel . client . isReady ) {
374
+ allConfigured = false ;
375
+ break ;
376
+ }
377
+
378
+ // Check if this sentinel can see the master
379
+ const masterInfo = await sentinel . client . sentinel . sentinelMaster ( this . config . sentinelName )
380
+ . catch ( ( ) => null ) ;
381
+
382
+ if ( ! masterInfo ) {
383
+ allConfigured = false ;
384
+ break ;
385
+ }
386
+
387
+ // Check if this sentinel can see other sentinels
388
+ const knownSentinels = await sentinel . client . sentinel . sentinelSentinels ( this . config . sentinelName )
389
+ . catch ( ( ) => [ ] ) ;
390
+
391
+ // Ensure this sentinel knows about all other sentinels (minus itself)
392
+ if ( knownSentinels . length < sentinels . length - 1 ) {
393
+ allConfigured = false ;
394
+ break ;
395
+ }
396
+ }
397
+
398
+ if ( allConfigured ) {
399
+ // All sentinels are properly configured
400
+ return ;
401
+ }
402
+
403
+ // Wait before retrying
404
+ await setTimeout ( retryDelay ) ;
405
+ } catch ( err ) {
406
+ // Wait before retrying after an error
407
+ await setTimeout ( retryDelay ) ;
408
+ }
355
409
}
410
+
411
+ throw new Error ( 'Sentinel configuration did not propagate correctly within the timeout period' ) ;
412
+ }
356
413
357
- async getAllRunning ( ) {
414
+ async getAllRunning ( ) : Promise < void > {
415
+ const MAX_RETRIES = 5 ;
416
+ const RETRY_DELAY = 500 ;
417
+
418
+ // Fix for Redis nodes
358
419
for ( const port of this . getAllNodesPort ( ) ) {
359
- let first = true ;
360
- while ( await isPortAvailable ( port ) ) {
361
- if ( ! first ) {
362
- console . log ( `problematic restart ${ port } ` ) ;
363
- await setTimeout ( 500 ) ;
364
- } else {
365
- first = false ;
420
+ let retries = 0 ;
421
+
422
+ while ( await isPortAvailable ( port ) ) {
423
+ if ( retries >= MAX_RETRIES ) {
424
+ throw new Error ( `Failed to restart Redis node at port ${ port } after ${ MAX_RETRIES } attempts` ) ;
425
+ }
426
+
427
+ try {
428
+ await this . restartNode ( port . toString ( ) ) ;
429
+ await setTimeout ( RETRY_DELAY ) ; // Give the node time to start
430
+ } catch ( err ) {
431
+ console . error ( `Error restarting Redis node at port ${ port } :` , err ) ;
366
432
}
367
- await this . restartNode ( port . toString ( ) ) ;
433
+
434
+ retries ++ ;
368
435
}
369
436
}
370
437
438
+ // Fix for Sentinel nodes
371
439
for ( const port of this . getAllSentinelsPort ( ) ) {
372
- let first = true ;
373
- while ( await isPortAvailable ( port ) ) {
374
- if ( ! first ) {
375
- await setTimeout ( 500 ) ;
376
- } else {
377
- first = false ;
440
+ let retries = 0 ;
441
+
442
+ while ( await isPortAvailable ( port ) ) {
443
+ if ( retries >= MAX_RETRIES ) {
444
+ throw new Error ( `Failed to restart Sentinel node at port ${ port } after ${ MAX_RETRIES } attempts` ) ;
445
+ }
446
+
447
+ try {
448
+ await this . restartSentinel ( port . toString ( ) ) ;
449
+ await setTimeout ( RETRY_DELAY ) ; // Give the sentinel time to start
450
+ } catch ( err ) {
451
+ console . error ( `Error restarting Sentinel at port ${ port } :` , err ) ;
452
+ }
453
+
454
+ retries ++ ;
455
+ }
456
+ }
457
+
458
+ // Verify all nodes are actually responsive
459
+ await this . verifyNodesResponsive ( ) ;
460
+ }
461
+
462
+ // Add a method to verify nodes are responsive
463
+ private async verifyNodesResponsive ( ) : Promise < void > {
464
+ const MAX_ATTEMPTS = 10 ;
465
+ const ATTEMPT_DELAY = 2000 ;
466
+
467
+ // Check Redis nodes
468
+ for ( const nodeInfo of this . #nodeMap. values ( ) ) {
469
+ if ( ! nodeInfo . client . isReady ) {
470
+ // Try to reconnect client if not ready
471
+ for ( let attempt = 0 ; attempt < MAX_ATTEMPTS ; attempt ++ ) {
472
+ try {
473
+ await nodeInfo . client . connect ( ) ;
474
+ await nodeInfo . client . ping ( ) ;
475
+ break ;
476
+ } catch ( err ) {
477
+ if ( attempt === MAX_ATTEMPTS - 1 ) {
478
+ throw new Error ( `Node at port ${ nodeInfo . docker . port } is not responsive after ${ MAX_ATTEMPTS } attempts` ) ;
479
+ }
480
+ await setTimeout ( ATTEMPT_DELAY ) ;
481
+ }
482
+ }
483
+ }
484
+ }
485
+
486
+ // Check Sentinel nodes
487
+ for ( const sentinelInfo of this . #sentinelMap. values ( ) ) {
488
+ if ( ! sentinelInfo . client . isReady ) {
489
+ // Try to reconnect client if not ready
490
+ for ( let attempt = 0 ; attempt < MAX_ATTEMPTS ; attempt ++ ) {
491
+ try {
492
+ await sentinelInfo . client . connect ( ) ;
493
+ await sentinelInfo . client . ping ( ) ;
494
+ break ;
495
+ } catch ( err ) {
496
+ if ( attempt === MAX_ATTEMPTS - 1 ) {
497
+ throw new Error ( `Sentinel at port ${ sentinelInfo . docker . port } is not responsive after ${ MAX_ATTEMPTS } attempts` ) ;
498
+ }
499
+ await setTimeout ( ATTEMPT_DELAY ) ;
500
+ }
378
501
}
379
- await this . restartSentinel ( port . toString ( ) ) ;
380
502
}
381
503
}
382
504
}
@@ -486,8 +608,16 @@ export class SentinelFramework extends DockerBase {
486
608
if ( node === undefined ) {
487
609
throw new Error ( "unknown node: " + id ) ;
488
610
}
489
-
611
+
612
+ let masterPort : number | null = null ;
613
+ try {
614
+ masterPort = await this . getMasterPort ( ) ;
615
+ } catch ( err ) {
616
+ console . log ( `Could not determine master before restarting node ${ id } : ${ err } ` ) ;
617
+ }
618
+
490
619
await this . dockerStart ( node . docker . dockerId ) ;
620
+
491
621
if ( ! node . client . isOpen ) {
492
622
node . client = await RedisClient . create ( {
493
623
password : this . config . password ,
@@ -496,6 +626,17 @@ export class SentinelFramework extends DockerBase {
496
626
}
497
627
} ) . on ( "error" , ( ) => { } ) . connect ( ) ;
498
628
}
629
+
630
+ // Wait for node to be ready
631
+ await setTimeout ( 500 ) ;
632
+
633
+ if ( masterPort && node . docker . port !== masterPort ) {
634
+ try {
635
+ await node . client . replicaOf ( '127.0.0.1' , masterPort ) ;
636
+ } catch ( err ) {
637
+ console . error ( `Failed to reconfigure node ${ id } as replica: ${ err } ` ) ;
638
+ }
639
+ }
499
640
}
500
641
501
642
async stopSentinel ( id : string ) {
0 commit comments