@@ -1407,7 +1407,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
1407
1407
{
1408
1408
struct dlm_ls * ls = lkb -> lkb_resource -> res_ls ;
1409
1409
int error = 0 ;
1410
- int wc ;
1411
1410
1412
1411
mutex_lock (& ls -> ls_waiters_mutex );
1413
1412
@@ -1429,17 +1428,20 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
1429
1428
error = - EBUSY ;
1430
1429
goto out ;
1431
1430
}
1432
- wc = atomic_inc_return ( & lkb -> lkb_wait_count ) ;
1431
+ lkb -> lkb_wait_count ++ ;
1433
1432
hold_lkb (lkb );
1434
1433
1435
1434
log_debug (ls , "addwait %x cur %d overlap %d count %d f %x" ,
1436
- lkb -> lkb_id , lkb -> lkb_wait_type , mstype , wc ,
1437
- dlm_iflags_val (lkb ));
1435
+ lkb -> lkb_id , lkb -> lkb_wait_type , mstype ,
1436
+ lkb -> lkb_wait_count , dlm_iflags_val (lkb ));
1438
1437
goto out ;
1439
1438
}
1440
1439
1441
- wc = atomic_fetch_inc (& lkb -> lkb_wait_count );
1442
- DLM_ASSERT (!wc , dlm_print_lkb (lkb ); printk ("wait_count %d\n" , wc ););
1440
+ DLM_ASSERT (!lkb -> lkb_wait_count ,
1441
+ dlm_print_lkb (lkb );
1442
+ printk ("wait_count %d\n" , lkb -> lkb_wait_count ););
1443
+
1444
+ lkb -> lkb_wait_count ++ ;
1443
1445
lkb -> lkb_wait_type = mstype ;
1444
1446
lkb -> lkb_wait_nodeid = to_nodeid ; /* for debugging */
1445
1447
hold_lkb (lkb );
@@ -1502,7 +1504,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
1502
1504
log_debug (ls , "remwait %x convert_reply zap overlap_cancel" ,
1503
1505
lkb -> lkb_id );
1504
1506
lkb -> lkb_wait_type = 0 ;
1505
- atomic_dec ( & lkb -> lkb_wait_count ) ;
1507
+ lkb -> lkb_wait_count -- ;
1506
1508
unhold_lkb (lkb );
1507
1509
goto out_del ;
1508
1510
}
@@ -1529,15 +1531,16 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
1529
1531
if (overlap_done && lkb -> lkb_wait_type ) {
1530
1532
log_error (ls , "remwait error %x reply %d wait_type %d overlap" ,
1531
1533
lkb -> lkb_id , mstype , lkb -> lkb_wait_type );
1532
- atomic_dec ( & lkb -> lkb_wait_count ) ;
1534
+ lkb -> lkb_wait_count -- ;
1533
1535
unhold_lkb (lkb );
1534
1536
lkb -> lkb_wait_type = 0 ;
1535
1537
}
1536
1538
1537
- DLM_ASSERT (atomic_read ( & lkb -> lkb_wait_count ) , dlm_print_lkb (lkb ););
1539
+ DLM_ASSERT (lkb -> lkb_wait_count , dlm_print_lkb (lkb ););
1538
1540
1539
1541
clear_bit (DLM_IFL_RESEND_BIT , & lkb -> lkb_iflags );
1540
- if (atomic_dec_and_test (& lkb -> lkb_wait_count ))
1542
+ lkb -> lkb_wait_count -- ;
1543
+ if (!lkb -> lkb_wait_count )
1541
1544
list_del_init (& lkb -> lkb_wait_reply );
1542
1545
unhold_lkb (lkb );
1543
1546
return 0 ;
@@ -2666,7 +2669,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2666
2669
goto out ;
2667
2670
2668
2671
/* lock not allowed if there's any op in progress */
2669
- if (lkb -> lkb_wait_type || atomic_read ( & lkb -> lkb_wait_count ) )
2672
+ if (lkb -> lkb_wait_type || lkb -> lkb_wait_count )
2670
2673
goto out ;
2671
2674
2672
2675
if (is_overlap (lkb ))
@@ -2728,7 +2731,7 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
2728
2731
2729
2732
/* normal unlock not allowed if there's any op in progress */
2730
2733
if (!(args -> flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK )) &&
2731
- (lkb -> lkb_wait_type || atomic_read ( & lkb -> lkb_wait_count ) ))
2734
+ (lkb -> lkb_wait_type || lkb -> lkb_wait_count ))
2732
2735
goto out ;
2733
2736
2734
2737
/* an lkb may be waiting for an rsb lookup to complete where the
@@ -5011,21 +5014,32 @@ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
5011
5014
return lkb ;
5012
5015
}
5013
5016
5014
- /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
5015
- master or dir-node for r. Processing the lkb may result in it being placed
5016
- back on waiters. */
5017
-
5018
- /* We do this after normal locking has been enabled and any saved messages
5019
- (in requestqueue) have been processed. We should be confident that at
5020
- this point we won't get or process a reply to any of these waiting
5021
- operations. But, new ops may be coming in on the rsbs/locks here from
5022
- userspace or remotely. */
5023
-
5024
- /* there may have been an overlap unlock/cancel prior to recovery or after
5025
- recovery. if before, the lkb may still have a pos wait_count; if after, the
5026
- overlap flag would just have been set and nothing new sent. we can be
5027
- confident here than any replies to either the initial op or overlap ops
5028
- prior to recovery have been received. */
5017
+ /*
5018
+ * Forced state reset for locks that were in the middle of remote operations
5019
+ * when recovery happened (i.e. lkbs that were on the waiters list, waiting
5020
+ * for a reply from a remote operation.) The lkbs remaining on the waiters
5021
+ * list need to be reevaluated; some may need resending to a different node
5022
+ * than previously, and some may now need local handling rather than remote.
5023
+ *
5024
+ * First, the lkb state for the voided remote operation is forcibly reset,
5025
+ * equivalent to what remove_from_waiters() would normally do:
5026
+ * . lkb removed from ls_waiters list
5027
+ * . lkb wait_type cleared
5028
+ * . lkb waiters_count cleared
5029
+ * . lkb ref count decremented for each waiters_count (almost always 1,
5030
+ * but possibly 2 in case of cancel/unlock overlapping, which means
5031
+ * two remote replies were being expected for the lkb.)
5032
+ *
5033
+ * Second, the lkb is reprocessed like an original operation would be,
5034
+ * by passing it to _request_lock or _convert_lock, which will either
5035
+ * process the lkb operation locally, or send it to a remote node again
5036
+ * and put the lkb back onto the waiters list.
5037
+ *
5038
+ * When reprocessing the lkb, we may find that it's flagged for an overlapping
5039
+ * force-unlock or cancel, either from before recovery began, or after recovery
5040
+ * finished. If this is the case, the unlock/cancel is done directly, and the
5041
+ * original operation is not initiated again (no _request_lock/_convert_lock.)
5042
+ */
5029
5043
5030
5044
int dlm_recover_waiters_post (struct dlm_ls * ls )
5031
5045
{
@@ -5040,6 +5054,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
5040
5054
break ;
5041
5055
}
5042
5056
5057
+ /*
5058
+ * Find an lkb from the waiters list that's been affected by
5059
+ * recovery node changes, and needs to be reprocessed. Does
5060
+ * hold_lkb(), adding a refcount.
5061
+ */
5043
5062
lkb = find_resend_waiter (ls );
5044
5063
if (!lkb )
5045
5064
break ;
@@ -5048,6 +5067,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
5048
5067
hold_rsb (r );
5049
5068
lock_rsb (r );
5050
5069
5070
+ /*
5071
+ * If the lkb has been flagged for a force unlock or cancel,
5072
+ * then the reprocessing below will be replaced by just doing
5073
+ * the unlock/cancel directly.
5074
+ */
5051
5075
mstype = lkb -> lkb_wait_type ;
5052
5076
oc = test_and_clear_bit (DLM_IFL_OVERLAP_CANCEL_BIT ,
5053
5077
& lkb -> lkb_iflags );
@@ -5061,22 +5085,40 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
5061
5085
r -> res_nodeid , lkb -> lkb_nodeid , lkb -> lkb_wait_nodeid ,
5062
5086
dlm_dir_nodeid (r ), oc , ou );
5063
5087
5064
- /* At this point we assume that we won't get a reply to any
5065
- previous op or overlap op on this lock. First, do a big
5066
- remove_from_waiters() for all previous ops. */
5088
+ /*
5089
+ * No reply to the pre-recovery operation will now be received,
5090
+ * so a forced equivalent of remove_from_waiters() is needed to
5091
+ * reset the waiters state that was in place before recovery.
5092
+ */
5067
5093
5068
5094
clear_bit (DLM_IFL_RESEND_BIT , & lkb -> lkb_iflags );
5095
+
5096
+ /* Forcibly clear wait_type */
5069
5097
lkb -> lkb_wait_type = 0 ;
5070
- /* drop all wait_count references we still
5071
- * hold a reference for this iteration.
5098
+
5099
+ /*
5100
+ * Forcibly reset wait_count and associated refcount. The
5101
+ * wait_count will almost always be 1, but in case of an
5102
+ * overlapping unlock/cancel it could be 2: see where
5103
+ * add_to_waiters() finds the lkb is already on the waiters
5104
+ * list and does lkb_wait_count++; hold_lkb().
5072
5105
*/
5073
- while (!atomic_dec_and_test (& lkb -> lkb_wait_count ))
5106
+ while (lkb -> lkb_wait_count ) {
5107
+ lkb -> lkb_wait_count -- ;
5074
5108
unhold_lkb (lkb );
5109
+ }
5075
5110
5111
+ /* Forcibly remove from waiters list */
5076
5112
mutex_lock (& ls -> ls_waiters_mutex );
5077
5113
list_del_init (& lkb -> lkb_wait_reply );
5078
5114
mutex_unlock (& ls -> ls_waiters_mutex );
5079
5115
5116
+ /*
5117
+ * The lkb is now clear of all prior waiters state and can be
5118
+ * processed locally, or sent to remote node again, or directly
5119
+ * cancelled/unlocked.
5120
+ */
5121
+
5080
5122
if (oc || ou ) {
5081
5123
/* do an unlock or cancel instead of resending */
5082
5124
switch (mstype ) {
0 commit comments