Skip to content

2 nodes of galera cluster (3 nodes totally) restart periodically #398

@ybxiang

Description

@ybxiang

Dear experts:

I use bellow docker-stack.yml to start galera cluster with 3 nodes, but 2 nodes of galera cluster (3 nodes totally) restart periodically:

version: '3.7'

services:

  mariadb01:
    image: mariadb:10.5
    networks:
      - terra-overlay-net
    environment:
      MYSQL_ROOT_PASSWORD: "test-root"
    command: --wsrep-new-cluster --binlog-format=ROW --wsrep-on=1 --wsrep-cluster-name=terra-mariadb-cluster --wsrep-cluster-address=gcomm://mariadb02,mariadb03,mariadb01 --wsrep-forced-binlog-format=ROW --wsrep-provider=/usr/lib/galera/libgalera_smm.so --wsrep-sst-method=rsync --wsrep-node-address=mariadb01 --wsrep-node-name=server1 --server-id=1 --bind-address=0.0.0.0 --default-storage-engine=InnoDB --innodb-autoinc-lock-mode=2
    volumes:
      - ./mariadb01-data:/var/lib/mysql
    deploy:
      mode: replicated
      replicas: 1

  mariadb02:
    image: mariadb:10.5
    depends_on:
      - mariadb01
    networks:
      - terra-overlay-net
    environment:
      MYSQL_ROOT_PASSWORD: "test-root"
    command: --binlog-format=ROW --wsrep-on=1 --wsrep-cluster-name=terra-mariadb-cluster --wsrep-cluster-address=gcomm://mariadb01,mariadb03,mariadb02 --wsrep-forced-binlog-format=ROW --wsrep-provider=/usr/lib/galera/libgalera_smm.so --wsrep-sst-method=rsync --wsrep-node-address=mariadb02 --wsrep-node-name=server2 --server-id=2 --bind-address=0.0.0.0 --default-storage-engine=InnoDB --innodb-autoinc-lock-mode=2
    volumes:
      - ./mariadb02-data:/var/lib/mysql
    deploy:
      mode: replicated
      replicas: 1

  mariadb03:
    image: mariadb:10.5
    depends_on:
      - mariadb01
    networks:
      - terra-overlay-net
    environment:
      MYSQL_ROOT_PASSWORD: "test-root"
    command: --binlog-format=ROW --wsrep-on=1 --wsrep-cluster-name=terra-mariadb-cluster --wsrep-cluster-address=gcomm://mariadb01,mariadb02,mariadb03 --wsrep-forced-binlog-format=ROW --wsrep-provider=/usr/lib/galera/libgalera_smm.so --wsrep-sst-method=rsync --wsrep-node-address=mariadb03 --wsrep-node-name=server3 --server-id=3 --bind-address=0.0.0.0 --default-storage-engine=InnoDB --innodb-autoinc-lock-mode=2
    volumes:
      - ./mariadb03-data:/var/lib/mysql
    deploy:
      mode: replicated
      replicas: 1

networks:
  terra-overlay-net:
    driver: overlay
    name: terra-overlay-net
    external: true

The commands to create network and start the servie stack:

echo "prepare clean data directories ******************************************"
rm -rf mariadb01-data  mariadb02-data  mariadb03-data
sleep 1
mkdir  mariadb01-data  mariadb02-data  mariadb03-data

echo "prepare fresh overlay network *******************************************"
docker network rm terra-overlay-net
docker network prune -f
sleep 1
docker network create -d overlay --attachable --subnet 172.16.238.0/24 terra-overlay-net
sleep 1

echo "start services **********************************************************"
docker stack deploy --compose-file=docker-stack.yml terra-mariadb-cluster

But mariadb02 and mariadb03 restart periodically with bellow errors:

2021-10-09  7:08:04 0 [Note] WSREP: Flow-control interval: [23, 23]
WSREP_SST: [INFO] previous SST is not completed, waiting for it to exit (20211009 07:08:04.838)
WSREP_SST: [INFO] previous SST is not completed, waiting for it to exit (20211009 07:08:05.849)
WSREP_SST: [ERROR] previous SST script still running. (20211009 07:08:05.852)
2021-10-09  7:08:05 0 [ERROR] WSREP: Failed to read 'ready <addr>' from: wsrep_sst_rsync --role 'joiner' --address 'mariadb02' --datadir '/var/lib/mysql/' --parent '1' --mysqld-args --binlog-format=ROW --wsrep-on=1 --wsrep-cluster-name=terra-mariadb-cluster --wsrep-cluster-address=gcomm://mariadb01,mariadb03,mariadb02 --wsrep-forced-binlog-format=ROW --wsrep-provider=/usr/lib/galera/libgalera_smm.so --wsrep-sst-method=rsync --wsrep-node-address=mariadb02 --wsrep-node-name=server2 --server-id=2 --bind-address=0.0.0.0 --default-storage-engine=InnoDB --innodb-autoinc-lock-mode=2
 Read: '(null)'
2021-10-09  7:08:05 0 [ERROR] WSREP: Process completed with error: wsrep_sst_rsync --role 'joiner' --address 'mariadb02' --datadir '/var/lib/mysql/' --parent '1' --mysqld-args --binlog-format=ROW --wsrep-on=1 --wsrep-cluster-name=terra-mariadb-cluster --wsrep-cluster-address=gcomm://mariadb01,mariadb03,mariadb02 --wsrep-forced-binlog-format=ROW --wsrep-provider=/usr/lib/galera/libgalera_smm.so --wsrep-sst-method=rsync --wsrep-node-address=mariadb02 --wsrep-node-name=server2 --server-id=2 --bind-address=0.0.0.0 --default-storage-engine=InnoDB --innodb-autoinc-lock-mode=2: 114 (Operation already in progress)
2021-10-09  7:08:05 1 [ERROR] WSREP: Failed to prepare for 'rsync' SST. Unrecoverable.
2021-10-09  7:08:05 1 [ERROR] WSREP: SST request callback failed. This is unrecoverable, restart required.

Is there any possible bugs in the docker-stack.yml???
I spent more than 1 week on this issue, please help me!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions