@@ -50,7 +50,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
50
50
"sb_internal" ,
51
51
};
52
52
53
- static inline void super_lock (struct super_block * sb , bool excl )
53
+ static inline void __super_lock (struct super_block * sb , bool excl )
54
54
{
55
55
if (excl )
56
56
down_write (& sb -> s_umount );
@@ -66,14 +66,9 @@ static inline void super_unlock(struct super_block *sb, bool excl)
66
66
up_read (& sb -> s_umount );
67
67
}
68
68
69
- static inline void super_lock_excl (struct super_block * sb )
69
+ static inline void __super_lock_excl (struct super_block * sb )
70
70
{
71
- super_lock (sb , true);
72
- }
73
-
74
- static inline void super_lock_shared (struct super_block * sb )
75
- {
76
- super_lock (sb , false);
71
+ __super_lock (sb , true);
77
72
}
78
73
79
74
static inline void super_unlock_excl (struct super_block * sb )
@@ -86,6 +81,99 @@ static inline void super_unlock_shared(struct super_block *sb)
86
81
super_unlock (sb , false);
87
82
}
88
83
84
+ static inline bool wait_born (struct super_block * sb )
85
+ {
86
+ unsigned int flags ;
87
+
88
+ /*
89
+ * Pairs with smp_store_release() in super_wake() and ensures
90
+ * that we see SB_BORN or SB_DYING after we're woken.
91
+ */
92
+ flags = smp_load_acquire (& sb -> s_flags );
93
+ return flags & (SB_BORN | SB_DYING );
94
+ }
95
+
96
+ /**
97
+ * super_lock - wait for superblock to become ready and lock it
98
+ * @sb: superblock to wait for
99
+ * @excl: whether exclusive access is required
100
+ *
101
+ * If the superblock has neither passed through vfs_get_tree() or
102
+ * generic_shutdown_super() yet wait for it to happen. Either superblock
103
+ * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
104
+ * woken and we'll see SB_DYING.
105
+ *
106
+ * The caller must have acquired a temporary reference on @sb->s_count.
107
+ *
108
+ * Return: This returns true if SB_BORN was set, false if SB_DYING was
109
+ * set. The function acquires s_umount and returns with it held.
110
+ */
111
+ static __must_check bool super_lock (struct super_block * sb , bool excl )
112
+ {
113
+
114
+ lockdep_assert_not_held (& sb -> s_umount );
115
+
116
+ relock :
117
+ __super_lock (sb , excl );
118
+
119
+ /*
120
+ * Has gone through generic_shutdown_super() in the meantime.
121
+ * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
122
+ * grab a reference to this. Tell them so.
123
+ */
124
+ if (sb -> s_flags & SB_DYING )
125
+ return false;
126
+
127
+ /* Has called ->get_tree() successfully. */
128
+ if (sb -> s_flags & SB_BORN )
129
+ return true;
130
+
131
+ super_unlock (sb , excl );
132
+
133
+ /* wait until the superblock is ready or dying */
134
+ wait_var_event (& sb -> s_flags , wait_born (sb ));
135
+
136
+ /*
137
+ * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
138
+ * Just reacquire @sb->s_umount for the caller.
139
+ */
140
+ goto relock ;
141
+ }
142
+
143
+ /* wait and acquire read-side of @sb->s_umount */
144
+ static inline bool super_lock_shared (struct super_block * sb )
145
+ {
146
+ return super_lock (sb , false);
147
+ }
148
+
149
+ /* wait and acquire write-side of @sb->s_umount */
150
+ static inline bool super_lock_excl (struct super_block * sb )
151
+ {
152
+ return super_lock (sb , true);
153
+ }
154
+
155
+ /* wake waiters */
156
+ #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
157
+ static void super_wake (struct super_block * sb , unsigned int flag )
158
+ {
159
+ WARN_ON_ONCE ((flag & ~SUPER_WAKE_FLAGS ));
160
+ WARN_ON_ONCE (hweight32 (flag & SUPER_WAKE_FLAGS ) > 1 );
161
+
162
+ /*
163
+ * Pairs with smp_load_acquire() in super_lock() to make sure
164
+ * all initializations in the superblock are seen by the user
165
+ * seeing SB_BORN sent.
166
+ */
167
+ smp_store_release (& sb -> s_flags , sb -> s_flags | flag );
168
+ /*
169
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
170
+ * ___wait_var_event() either sees SB_BORN set or
171
+ * waitqueue_active() check in wake_up_var() sees the waiter.
172
+ */
173
+ smp_mb ();
174
+ wake_up_var (& sb -> s_flags );
175
+ }
176
+
89
177
/*
90
178
* One thing we have to be careful of with a per-sb shrinker is that we don't
91
179
* drop the last active reference to the superblock from within the shrinker.
@@ -393,7 +481,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
393
481
void deactivate_super (struct super_block * s )
394
482
{
395
483
if (!atomic_add_unless (& s -> s_active , -1 , 1 )) {
396
- super_lock_excl (s );
484
+ __super_lock_excl (s );
397
485
deactivate_locked_super (s );
398
486
}
399
487
}
@@ -415,10 +503,12 @@ EXPORT_SYMBOL(deactivate_super);
415
503
*/
416
504
static int grab_super (struct super_block * s ) __releases (sb_lock )
417
505
{
506
+ bool born ;
507
+
418
508
s -> s_count ++ ;
419
509
spin_unlock (& sb_lock );
420
- super_lock_excl (s );
421
- if (( s -> s_flags & SB_BORN ) && atomic_inc_not_zero (& s -> s_active )) {
510
+ born = super_lock_excl (s );
511
+ if (born && atomic_inc_not_zero (& s -> s_active )) {
422
512
put_super (s );
423
513
return 1 ;
424
514
}
@@ -447,8 +537,8 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
447
537
bool super_trylock_shared (struct super_block * sb )
448
538
{
449
539
if (down_read_trylock (& sb -> s_umount )) {
450
- if (!hlist_unhashed ( & sb -> s_instances ) &&
451
- sb -> s_root && (sb -> s_flags & SB_BORN ))
540
+ if (!( sb -> s_flags & SB_DYING ) && sb -> s_root &&
541
+ (sb -> s_flags & SB_BORN ))
452
542
return true;
453
543
super_unlock_shared (sb );
454
544
}
@@ -475,7 +565,7 @@ bool super_trylock_shared(struct super_block *sb)
475
565
void retire_super (struct super_block * sb )
476
566
{
477
567
WARN_ON (!sb -> s_bdev );
478
- super_lock_excl (sb );
568
+ __super_lock_excl (sb );
479
569
if (sb -> s_iflags & SB_I_PERSB_BDI ) {
480
570
bdi_unregister (sb -> s_bdi );
481
571
sb -> s_iflags &= ~SB_I_PERSB_BDI ;
@@ -557,6 +647,13 @@ void generic_shutdown_super(struct super_block *sb)
557
647
/* should be initialized for __put_super_and_need_restart() */
558
648
hlist_del_init (& sb -> s_instances );
559
649
spin_unlock (& sb_lock );
650
+ /*
651
+ * Broadcast to everyone that grabbed a temporary reference to this
652
+ * superblock before we removed it from @fs_supers that the superblock
653
+ * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
654
+ * discard this superblock and treat it as dead.
655
+ */
656
+ super_wake (sb , SB_DYING );
560
657
super_unlock_excl (sb );
561
658
if (sb -> s_bdi != & noop_backing_dev_info ) {
562
659
if (sb -> s_iflags & SB_I_PERSB_BDI )
@@ -631,6 +728,11 @@ struct super_block *sget_fc(struct fs_context *fc,
631
728
s -> s_type = fc -> fs_type ;
632
729
s -> s_iflags |= fc -> s_iflags ;
633
730
strscpy (s -> s_id , s -> s_type -> name , sizeof (s -> s_id ));
731
+ /*
732
+ * Make the superblock visible on @super_blocks and @fs_supers.
733
+ * It's in a nascent state and users should wait on SB_BORN or
734
+ * SB_DYING to be set.
735
+ */
634
736
list_add_tail (& s -> s_list , & super_blocks );
635
737
hlist_add_head (& s -> s_instances , & s -> s_type -> fs_supers );
636
738
spin_unlock (& sb_lock );
@@ -740,7 +842,8 @@ static void __iterate_supers(void (*f)(struct super_block *))
740
842
741
843
spin_lock (& sb_lock );
742
844
list_for_each_entry (sb , & super_blocks , s_list ) {
743
- if (hlist_unhashed (& sb -> s_instances ))
845
+ /* Pairs with memory marrier in super_wake(). */
846
+ if (smp_load_acquire (& sb -> s_flags ) & SB_DYING )
744
847
continue ;
745
848
sb -> s_count ++ ;
746
849
spin_unlock (& sb_lock );
@@ -770,13 +873,13 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
770
873
771
874
spin_lock (& sb_lock );
772
875
list_for_each_entry (sb , & super_blocks , s_list ) {
773
- if ( hlist_unhashed ( & sb -> s_instances ))
774
- continue ;
876
+ bool born ;
877
+
775
878
sb -> s_count ++ ;
776
879
spin_unlock (& sb_lock );
777
880
778
- super_lock_shared (sb );
779
- if (sb -> s_root && ( sb -> s_flags & SB_BORN ) )
881
+ born = super_lock_shared (sb );
882
+ if (born && sb -> s_root )
780
883
f (sb , arg );
781
884
super_unlock_shared (sb );
782
885
@@ -806,11 +909,13 @@ void iterate_supers_type(struct file_system_type *type,
806
909
807
910
spin_lock (& sb_lock );
808
911
hlist_for_each_entry (sb , & type -> fs_supers , s_instances ) {
912
+ bool born ;
913
+
809
914
sb -> s_count ++ ;
810
915
spin_unlock (& sb_lock );
811
916
812
- super_lock_shared (sb );
813
- if (sb -> s_root && ( sb -> s_flags & SB_BORN ) )
917
+ born = super_lock_shared (sb );
918
+ if (born && sb -> s_root )
814
919
f (sb , arg );
815
920
super_unlock_shared (sb );
816
921
@@ -841,14 +946,11 @@ struct super_block *get_active_super(struct block_device *bdev)
841
946
if (!bdev )
842
947
return NULL ;
843
948
844
- restart :
845
949
spin_lock (& sb_lock );
846
950
list_for_each_entry (sb , & super_blocks , s_list ) {
847
- if (hlist_unhashed (& sb -> s_instances ))
848
- continue ;
849
951
if (sb -> s_bdev == bdev ) {
850
952
if (!grab_super (sb ))
851
- goto restart ;
953
+ return NULL ;
852
954
super_unlock_excl (sb );
853
955
return sb ;
854
956
}
@@ -862,22 +964,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
862
964
struct super_block * sb ;
863
965
864
966
spin_lock (& sb_lock );
865
- rescan :
866
967
list_for_each_entry (sb , & super_blocks , s_list ) {
867
- if (hlist_unhashed (& sb -> s_instances ))
868
- continue ;
869
968
if (sb -> s_dev == dev ) {
969
+ bool born ;
970
+
870
971
sb -> s_count ++ ;
871
972
spin_unlock (& sb_lock );
872
- super_lock (sb , excl );
873
973
/* still alive? */
874
- if (sb -> s_root && (sb -> s_flags & SB_BORN ))
974
+ born = super_lock (sb , excl );
975
+ if (born && sb -> s_root )
875
976
return sb ;
876
977
super_unlock (sb , excl );
877
978
/* nope, got unmounted */
878
979
spin_lock (& sb_lock );
879
980
__put_super (sb );
880
- goto rescan ;
981
+ break ;
881
982
}
882
983
}
883
984
spin_unlock (& sb_lock );
@@ -921,7 +1022,7 @@ int reconfigure_super(struct fs_context *fc)
921
1022
if (!hlist_empty (& sb -> s_pins )) {
922
1023
super_unlock_excl (sb );
923
1024
group_pin_kill (& sb -> s_pins );
924
- super_lock_excl (sb );
1025
+ __super_lock_excl (sb );
925
1026
if (!sb -> s_root )
926
1027
return 0 ;
927
1028
if (sb -> s_writers .frozen != SB_UNFROZEN )
@@ -984,9 +1085,9 @@ int reconfigure_super(struct fs_context *fc)
984
1085
985
1086
static void do_emergency_remount_callback (struct super_block * sb )
986
1087
{
987
- super_lock_excl (sb );
988
- if ( sb -> s_root && sb -> s_bdev && ( sb -> s_flags & SB_BORN ) &&
989
- !sb_rdonly (sb )) {
1088
+ bool born = super_lock_excl (sb );
1089
+
1090
+ if ( born && sb -> s_root && sb -> s_bdev && !sb_rdonly (sb )) {
990
1091
struct fs_context * fc ;
991
1092
992
1093
fc = fs_context_for_reconfigure (sb -> s_root ,
@@ -1020,8 +1121,9 @@ void emergency_remount(void)
1020
1121
1021
1122
static void do_thaw_all_callback (struct super_block * sb )
1022
1123
{
1023
- super_lock_excl (sb );
1024
- if (sb -> s_root && sb -> s_flags & SB_BORN ) {
1124
+ bool born = super_lock_excl (sb );
1125
+
1126
+ if (born && sb -> s_root ) {
1025
1127
emergency_thaw_bdev (sb );
1026
1128
thaw_super_locked (sb );
1027
1129
} else {
@@ -1212,9 +1314,9 @@ EXPORT_SYMBOL(get_tree_keyed);
1212
1314
*/
1213
1315
static bool super_lock_shared_active (struct super_block * sb )
1214
1316
{
1215
- super_lock_shared (sb );
1216
- if (! sb -> s_root ||
1217
- ( sb -> s_flags & ( SB_ACTIVE | SB_BORN )) != ( SB_ACTIVE | SB_BORN )) {
1317
+ bool born = super_lock_shared (sb );
1318
+
1319
+ if (! born || ! sb -> s_root || !( sb -> s_flags & SB_ACTIVE )) {
1218
1320
super_unlock_shared (sb );
1219
1321
return false;
1220
1322
}
@@ -1374,7 +1476,7 @@ int get_tree_bdev(struct fs_context *fc,
1374
1476
*/
1375
1477
super_unlock_excl (s );
1376
1478
error = setup_bdev_super (s , fc -> sb_flags , fc );
1377
- super_lock_excl (s );
1479
+ __super_lock_excl (s );
1378
1480
if (!error )
1379
1481
error = fill_super (s , fc );
1380
1482
if (error ) {
@@ -1426,7 +1528,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
1426
1528
*/
1427
1529
super_unlock_excl (s );
1428
1530
error = setup_bdev_super (s , flags , NULL );
1429
- super_lock_excl (s );
1531
+ __super_lock_excl (s );
1430
1532
if (!error )
1431
1533
error = fill_super (s , data , flags & SB_SILENT ? 1 : 0 );
1432
1534
if (error ) {
@@ -1566,13 +1668,13 @@ int vfs_get_tree(struct fs_context *fc)
1566
1668
WARN_ON (!sb -> s_bdi );
1567
1669
1568
1670
/*
1569
- * Write barrier is for super_cache_count(). We place it before setting
1570
- * SB_BORN as the data dependency between the two functions is the
1571
- * superblock structure contents that we just set up, not the SB_BORN
1572
- * flag.
1671
+ * super_wake() contains a memory barrier which also care of
1672
+ * ordering for super_cache_count(). We place it before setting
1673
+ * SB_BORN as the data dependency between the two functions is
1674
+ * the superblock structure contents that we just set up, not
1675
+ * the SB_BORN flag.
1573
1676
*/
1574
- smp_wmb ();
1575
- sb -> s_flags |= SB_BORN ;
1677
+ super_wake (sb , SB_BORN );
1576
1678
1577
1679
error = security_sb_set_mnt_opts (sb , fc -> security , 0 , NULL );
1578
1680
if (unlikely (error )) {
@@ -1715,7 +1817,7 @@ int freeze_super(struct super_block *sb)
1715
1817
int ret ;
1716
1818
1717
1819
atomic_inc (& sb -> s_active );
1718
- super_lock_excl (sb );
1820
+ __super_lock_excl (sb );
1719
1821
if (sb -> s_writers .frozen != SB_UNFROZEN ) {
1720
1822
deactivate_locked_super (sb );
1721
1823
return - EBUSY ;
@@ -1737,7 +1839,7 @@ int freeze_super(struct super_block *sb)
1737
1839
/* Release s_umount to preserve sb_start_write -> s_umount ordering */
1738
1840
super_unlock_excl (sb );
1739
1841
sb_wait_write (sb , SB_FREEZE_WRITE );
1740
- super_lock_excl (sb );
1842
+ __super_lock_excl (sb );
1741
1843
1742
1844
/* Now we go and block page faults... */
1743
1845
sb -> s_writers .frozen = SB_FREEZE_PAGEFAULT ;
@@ -1820,7 +1922,7 @@ static int thaw_super_locked(struct super_block *sb)
1820
1922
*/
1821
1923
int thaw_super (struct super_block * sb )
1822
1924
{
1823
- super_lock_excl (sb );
1925
+ __super_lock_excl (sb );
1824
1926
return thaw_super_locked (sb );
1825
1927
}
1826
1928
EXPORT_SYMBOL (thaw_super );
0 commit comments