15
15
16
16
#define LVB_SIZE 64
17
17
#define NEW_DEV_TIMEOUT 5000
18
+ #define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)
18
19
19
20
struct dlm_lock_resource {
20
21
dlm_lockspace_t * ls ;
@@ -56,6 +57,7 @@ struct resync_info {
56
57
#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
57
58
#define MD_CLUSTER_PENDING_RECV_EVENT 7
58
59
#define MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD 8
60
+ #define MD_CLUSTER_WAITING_FOR_SYNC 9
59
61
60
62
struct md_cluster_info {
61
63
struct mddev * mddev ; /* the md device which md_cluster_info belongs to */
@@ -91,6 +93,7 @@ struct md_cluster_info {
91
93
sector_t sync_hi ;
92
94
};
93
95
96
+ /* For compatibility, add the new msg_type at the end. */
94
97
enum msg_type {
95
98
METADATA_UPDATED = 0 ,
96
99
RESYNCING ,
@@ -100,6 +103,7 @@ enum msg_type {
100
103
BITMAP_NEEDS_SYNC ,
101
104
CHANGE_CAPACITY ,
102
105
BITMAP_RESIZE ,
106
+ RESYNCING_START ,
103
107
};
104
108
105
109
struct cluster_msg {
@@ -130,8 +134,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
130
134
0 , sync_ast , res , res -> bast );
131
135
if (ret )
132
136
return ret ;
133
- wait_event (res -> sync_locking , res -> sync_locking_done );
137
+ ret = wait_event_timeout (res -> sync_locking , res -> sync_locking_done ,
138
+ WAIT_DLM_LOCK_TIMEOUT );
134
139
res -> sync_locking_done = false;
140
+ if (!ret ) {
141
+ pr_err ("locking DLM '%s' timeout!\n" , res -> name );
142
+ return - EBUSY ;
143
+ }
135
144
if (res -> lksb .sb_status == 0 )
136
145
res -> mode = mode ;
137
146
return res -> lksb .sb_status ;
@@ -455,6 +464,7 @@ static void process_suspend_info(struct mddev *mddev,
455
464
clear_bit (MD_RESYNCING_REMOTE , & mddev -> recovery );
456
465
remove_suspend_info (mddev , slot );
457
466
set_bit (MD_RECOVERY_NEEDED , & mddev -> recovery );
467
+ clear_bit (MD_CLUSTER_WAITING_FOR_SYNC , & cinfo -> state );
458
468
md_wakeup_thread (mddev -> thread );
459
469
return ;
460
470
}
@@ -525,6 +535,7 @@ static int process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
525
535
res = -1 ;
526
536
}
527
537
clear_bit (MD_CLUSTER_WAITING_FOR_NEWDISK , & cinfo -> state );
538
+ set_bit (MD_CLUSTER_WAITING_FOR_SYNC , & cinfo -> state );
528
539
return res ;
529
540
}
530
541
@@ -593,6 +604,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
593
604
case CHANGE_CAPACITY :
594
605
set_capacity_and_notify (mddev -> gendisk , mddev -> array_sectors );
595
606
break ;
607
+ case RESYNCING_START :
608
+ clear_bit (MD_CLUSTER_WAITING_FOR_SYNC , & mddev -> cluster_info -> state );
609
+ break ;
596
610
case RESYNCING :
597
611
set_bit (MD_RESYNCING_REMOTE , & mddev -> recovery );
598
612
process_suspend_info (mddev , le32_to_cpu (msg -> slot ),
@@ -743,15 +757,15 @@ static void unlock_comm(struct md_cluster_info *cinfo)
743
757
*/
744
758
static int __sendmsg (struct md_cluster_info * cinfo , struct cluster_msg * cmsg )
745
759
{
746
- int error ;
760
+ int error , unlock_error ;
747
761
int slot = cinfo -> slot_number - 1 ;
748
762
749
763
cmsg -> slot = cpu_to_le32 (slot );
750
764
/*get EX on Message*/
751
765
error = dlm_lock_sync (cinfo -> message_lockres , DLM_LOCK_EX );
752
766
if (error ) {
753
767
pr_err ("md-cluster: failed to get EX on MESSAGE (%d)\n" , error );
754
- goto failed_message ;
768
+ return error ;
755
769
}
756
770
757
771
memcpy (cinfo -> message_lockres -> lksb .sb_lvbptr , (void * )cmsg ,
@@ -781,14 +795,10 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
781
795
}
782
796
783
797
failed_ack :
784
- error = dlm_unlock_sync (cinfo -> message_lockres );
785
- if (unlikely (error != 0 )) {
798
+ while ((unlock_error = dlm_unlock_sync (cinfo -> message_lockres )))
786
799
pr_err ("md-cluster: failed convert to NL on MESSAGE(%d)\n" ,
787
- error );
788
- /* in case the message can't be released due to some reason */
789
- goto failed_ack ;
790
- }
791
- failed_message :
800
+ unlock_error );
801
+
792
802
return error ;
793
803
}
794
804
@@ -1343,6 +1353,23 @@ static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
1343
1353
spin_unlock_irq (& cinfo -> suspend_lock );
1344
1354
}
1345
1355
1356
+ static int resync_status_get (struct mddev * mddev )
1357
+ {
1358
+ struct md_cluster_info * cinfo = mddev -> cluster_info ;
1359
+
1360
+ return test_bit (MD_CLUSTER_WAITING_FOR_SYNC , & cinfo -> state );
1361
+ }
1362
+
1363
+ static int resync_start_notify (struct mddev * mddev )
1364
+ {
1365
+ struct md_cluster_info * cinfo = mddev -> cluster_info ;
1366
+ struct cluster_msg cmsg = {0 };
1367
+
1368
+ cmsg .type = cpu_to_le32 (RESYNCING_START );
1369
+
1370
+ return sendmsg (cinfo , & cmsg , 0 );
1371
+ }
1372
+
1346
1373
static int resync_info_update (struct mddev * mddev , sector_t lo , sector_t hi )
1347
1374
{
1348
1375
struct md_cluster_info * cinfo = mddev -> cluster_info ;
@@ -1577,6 +1604,8 @@ static const struct md_cluster_operations cluster_ops = {
1577
1604
.resync_start = resync_start ,
1578
1605
.resync_finish = resync_finish ,
1579
1606
.resync_info_update = resync_info_update ,
1607
+ .resync_start_notify = resync_start_notify ,
1608
+ .resync_status_get = resync_status_get ,
1580
1609
.resync_info_get = resync_info_get ,
1581
1610
.metadata_update_start = metadata_update_start ,
1582
1611
.metadata_update_finish = metadata_update_finish ,
0 commit comments