Skip to content

Commit 3c80ebb

Browse files
committed
Merge tag 'md-6.11-20240712' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.11/block
Pull MD fixes from Song: "Changes in this set are: 1. md-cluster fixes by Heming Zhao; 2. raid1 fix by Mateusz Jończyk." * tag 'md-6.11-20240712' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md: md/raid1: set max_sectors during early return from choose_slow_rdev() md-cluster: fix no recovery job when adding/re-adding a disk md-cluster: fix hanging issue while a new disk adding
2 parents 3c1743a + 36a5c03 commit 3c80ebb

File tree

4 files changed

+56
-13
lines changed

4 files changed

+56
-13
lines changed

drivers/md/md-cluster.c

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#define LVB_SIZE 64
1717
#define NEW_DEV_TIMEOUT 5000
18+
#define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)
1819

1920
struct dlm_lock_resource {
2021
dlm_lockspace_t *ls;
@@ -56,6 +57,7 @@ struct resync_info {
5657
#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
5758
#define MD_CLUSTER_PENDING_RECV_EVENT 7
5859
#define MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD 8
60+
#define MD_CLUSTER_WAITING_FOR_SYNC 9
5961

6062
struct md_cluster_info {
6163
struct mddev *mddev; /* the md device which md_cluster_info belongs to */
@@ -91,6 +93,7 @@ struct md_cluster_info {
9193
sector_t sync_hi;
9294
};
9395

96+
/* For compatibility, add the new msg_type at the end. */
9497
enum msg_type {
9598
METADATA_UPDATED = 0,
9699
RESYNCING,
@@ -100,6 +103,7 @@ enum msg_type {
100103
BITMAP_NEEDS_SYNC,
101104
CHANGE_CAPACITY,
102105
BITMAP_RESIZE,
106+
RESYNCING_START,
103107
};
104108

105109
struct cluster_msg {
@@ -130,8 +134,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
130134
0, sync_ast, res, res->bast);
131135
if (ret)
132136
return ret;
133-
wait_event(res->sync_locking, res->sync_locking_done);
137+
ret = wait_event_timeout(res->sync_locking, res->sync_locking_done,
138+
WAIT_DLM_LOCK_TIMEOUT);
134139
res->sync_locking_done = false;
140+
if (!ret) {
141+
pr_err("locking DLM '%s' timeout!\n", res->name);
142+
return -EBUSY;
143+
}
135144
if (res->lksb.sb_status == 0)
136145
res->mode = mode;
137146
return res->lksb.sb_status;
@@ -455,6 +464,7 @@ static void process_suspend_info(struct mddev *mddev,
455464
clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
456465
remove_suspend_info(mddev, slot);
457466
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
467+
clear_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
458468
md_wakeup_thread(mddev->thread);
459469
return;
460470
}
@@ -525,6 +535,7 @@ static int process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
525535
res = -1;
526536
}
527537
clear_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
538+
set_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
528539
return res;
529540
}
530541

@@ -593,6 +604,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
593604
case CHANGE_CAPACITY:
594605
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
595606
break;
607+
case RESYNCING_START:
608+
clear_bit(MD_CLUSTER_WAITING_FOR_SYNC, &mddev->cluster_info->state);
609+
break;
596610
case RESYNCING:
597611
set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
598612
process_suspend_info(mddev, le32_to_cpu(msg->slot),
@@ -743,15 +757,15 @@ static void unlock_comm(struct md_cluster_info *cinfo)
743757
*/
744758
static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
745759
{
746-
int error;
760+
int error, unlock_error;
747761
int slot = cinfo->slot_number - 1;
748762

749763
cmsg->slot = cpu_to_le32(slot);
750764
/*get EX on Message*/
751765
error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
752766
if (error) {
753767
pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
754-
goto failed_message;
768+
return error;
755769
}
756770

757771
memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
@@ -781,14 +795,10 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
781795
}
782796

783797
failed_ack:
784-
error = dlm_unlock_sync(cinfo->message_lockres);
785-
if (unlikely(error != 0)) {
798+
while ((unlock_error = dlm_unlock_sync(cinfo->message_lockres)))
786799
pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
787-
error);
788-
/* in case the message can't be released due to some reason */
789-
goto failed_ack;
790-
}
791-
failed_message:
800+
unlock_error);
801+
792802
return error;
793803
}
794804

@@ -1343,6 +1353,23 @@ static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
13431353
spin_unlock_irq(&cinfo->suspend_lock);
13441354
}
13451355

1356+
static int resync_status_get(struct mddev *mddev)
1357+
{
1358+
struct md_cluster_info *cinfo = mddev->cluster_info;
1359+
1360+
return test_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
1361+
}
1362+
1363+
static int resync_start_notify(struct mddev *mddev)
1364+
{
1365+
struct md_cluster_info *cinfo = mddev->cluster_info;
1366+
struct cluster_msg cmsg = {0};
1367+
1368+
cmsg.type = cpu_to_le32(RESYNCING_START);
1369+
1370+
return sendmsg(cinfo, &cmsg, 0);
1371+
}
1372+
13461373
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
13471374
{
13481375
struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1577,6 +1604,8 @@ static const struct md_cluster_operations cluster_ops = {
15771604
.resync_start = resync_start,
15781605
.resync_finish = resync_finish,
15791606
.resync_info_update = resync_info_update,
1607+
.resync_start_notify = resync_start_notify,
1608+
.resync_status_get = resync_status_get,
15801609
.resync_info_get = resync_info_get,
15811610
.metadata_update_start = metadata_update_start,
15821611
.metadata_update_finish = metadata_update_finish,

drivers/md/md-cluster.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ struct md_cluster_operations {
1414
int (*leave)(struct mddev *mddev);
1515
int (*slot_number)(struct mddev *mddev);
1616
int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
17+
int (*resync_start_notify)(struct mddev *mddev);
18+
int (*resync_status_get)(struct mddev *mddev);
1719
void (*resync_info_get)(struct mddev *mddev, sector_t *lo, sector_t *hi);
1820
int (*metadata_update_start)(struct mddev *mddev);
1921
int (*metadata_update_finish)(struct mddev *mddev);

drivers/md/md.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8978,7 +8978,8 @@ void md_do_sync(struct md_thread *thread)
89788978
* This will mean we have to start checking from the beginning again.
89798979
*
89808980
*/
8981-
8981+
if (mddev_is_clustered(mddev))
8982+
md_cluster_ops->resync_start_notify(mddev);
89828983
do {
89838984
int mddev2_minor = -1;
89848985
mddev->curr_resync = MD_RESYNC_DELAYED;
@@ -9992,8 +9993,18 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
99929993
*/
99939994
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
99949995
!(le32_to_cpu(sb->feature_map) &
9995-
MD_FEATURE_RESHAPE_ACTIVE)) {
9996-
rdev2->saved_raid_disk = role;
9996+
MD_FEATURE_RESHAPE_ACTIVE) &&
9997+
!md_cluster_ops->resync_status_get(mddev)) {
9998+
/*
9999+
* -1 to make raid1_add_disk() set conf->fullsync
10000+
* to 1. This could avoid skipping sync when the
10001+
* remote node is down during resyncing.
10002+
*/
10003+
if ((le32_to_cpu(sb->feature_map)
10004+
& MD_FEATURE_RECOVERY_OFFSET))
10005+
rdev2->saved_raid_disk = -1;
10006+
else
10007+
rdev2->saved_raid_disk = role;
999710008
ret = remove_and_add_spares(mddev, rdev2);
999810009
pr_info("Activated spare: %pg\n",
999910010
rdev2->bdev);

drivers/md/raid1.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
680680
len = r1_bio->sectors;
681681
read_len = raid1_check_read_range(rdev, this_sector, &len);
682682
if (read_len == r1_bio->sectors) {
683+
*max_sectors = read_len;
683684
update_read_sectors(conf, disk, this_sector, read_len);
684685
return disk;
685686
}

0 commit comments

Comments
 (0)