Skip to content

Commit 0d19d9e

Browse files
committed
Merge tag 'ext4_for_linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Various ext4 bug fixes and cleanups. The fixes are mostly in the fstrim and mballoc code paths. Also enable dioread_nolock in the case where the block size is less than the page size (dioread_nolock has been default in the bs == ps case for quite some time)" * tag 'ext4_for_linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix inconsistent between segment fstrim and full fstrim ext4: fallback to complex scan if aligned scan doesn't work ext4: convert ext4_da_do_write_end() to take a folio ext4: allow for the last group to be marked as trimmed ext4: move ext4_check_bdev_write_error() into nojournal mode jbd2: abort journal when detecting metadata writeback error of fs dev jbd2: remove unused 'JBD2_CHECKPOINT_IO_ERROR' and 'j_atomic_flags' jbd2: replace journal state flag by checking errseq jbd2: add errseq to detect client fs's bdev writeback error ext4: improving calculation of 'fe_{len|start}' in mb_find_extent() ext4: clarify handling of unwritten bh in __ext4_block_zero_page_range() ext4: treat end of range as exclusive in ext4_zero_range() ext4: enable dioread_nolock as default for bs < ps case ext4: delete redundant calculations in ext4_mb_get_buddy_page_lock() ext4: reduce unnecessary memory allocation in alloc_flex_gd() ext4: avoid online resizing failures due to oversized flex bg ext4: remove unnecessary check from alloc_flex_gd() ext4: unify the type of flexbg_size to unsigned int
2 parents 6bd593b + 68da4c4 commit 0d19d9e

File tree

11 files changed

+140
-101
lines changed

11 files changed

+140
-101
lines changed

fs/ext4/ext4_jbd2.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,16 +235,15 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
235235

236236
might_sleep();
237237

238-
ext4_check_bdev_write_error(sb);
239-
240238
if (ext4_handle_valid(handle)) {
241239
err = jbd2_journal_get_write_access(handle, bh);
242240
if (err) {
243241
ext4_journal_abort_handle(where, line, __func__, bh,
244242
handle, err);
245243
return err;
246244
}
247-
}
245+
} else
246+
ext4_check_bdev_write_error(sb);
248247
if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
249248
return 0;
250249
BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);

fs/ext4/extents.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4523,7 +4523,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
45234523
* Round up offset. This is not fallocate, we need to zero out
45244524
* blocks, so convert interior block aligned part of the range to
45254525
* unwritten and possibly manually zero out unaligned parts of the
4526-
* range.
4526+
* range. Here, start and partial_begin are inclusive, end and
4527+
* partial_end are exclusive.
45274528
*/
45284529
start = round_up(offset, 1 << blkbits);
45294530
end = round_down((offset + len), 1 << blkbits);
@@ -4609,7 +4610,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
46094610
* disk in case of crash before zeroing trans is committed.
46104611
*/
46114612
if (ext4_should_journal_data(inode)) {
4612-
ret = filemap_write_and_wait_range(mapping, start, end);
4613+
ret = filemap_write_and_wait_range(mapping, start,
4614+
end - 1);
46134615
if (ret) {
46144616
filemap_invalidate_unlock(mapping);
46154617
goto out_mutex;

fs/ext4/inode.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2947,7 +2947,7 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
29472947

29482948
static int ext4_da_do_write_end(struct address_space *mapping,
29492949
loff_t pos, unsigned len, unsigned copied,
2950-
struct page *page)
2950+
struct folio *folio)
29512951
{
29522952
struct inode *inode = mapping->host;
29532953
loff_t old_size = inode->i_size;
@@ -2958,12 +2958,13 @@ static int ext4_da_do_write_end(struct address_space *mapping,
29582958
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
29592959
* flag, which all that's needed to trigger page writeback.
29602960
*/
2961-
copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
2961+
copied = block_write_end(NULL, mapping, pos, len, copied,
2962+
&folio->page, NULL);
29622963
new_i_size = pos + copied;
29632964

29642965
/*
2965-
* It's important to update i_size while still holding page lock,
2966-
* because page writeout could otherwise come in and zero beyond
2966+
* It's important to update i_size while still holding folio lock,
2967+
* because folio writeout could otherwise come in and zero beyond
29672968
* i_size.
29682969
*
29692970
* Since we are holding inode lock, we are sure i_disksize <=
@@ -2981,14 +2982,14 @@ static int ext4_da_do_write_end(struct address_space *mapping,
29812982

29822983
i_size_write(inode, new_i_size);
29832984
end = (new_i_size - 1) & (PAGE_SIZE - 1);
2984-
if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
2985+
if (copied && ext4_da_should_update_i_disksize(folio, end)) {
29852986
ext4_update_i_disksize(inode, new_i_size);
29862987
disksize_changed = true;
29872988
}
29882989
}
29892990

2990-
unlock_page(page);
2991-
put_page(page);
2991+
folio_unlock(folio);
2992+
folio_put(folio);
29922993

29932994
if (old_size < pos)
29942995
pagecache_isize_extended(inode, old_size, pos);
@@ -3027,10 +3028,10 @@ static int ext4_da_write_end(struct file *file,
30273028
return ext4_write_inline_data_end(inode, pos, len, copied,
30283029
folio);
30293030

3030-
if (unlikely(copied < len) && !PageUptodate(page))
3031+
if (unlikely(copied < len) && !folio_test_uptodate(folio))
30313032
copied = 0;
30323033

3033-
return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
3034+
return ext4_da_do_write_end(mapping, pos, len, copied, folio);
30343035
}
30353036

30363037
/*
@@ -3630,6 +3631,12 @@ void ext4_set_aops(struct inode *inode)
36303631
inode->i_mapping->a_ops = &ext4_aops;
36313632
}
36323633

3634+
/*
3635+
* Here we can't skip an unwritten buffer even though it usually reads zero
3636+
* because it might have data in pagecache (eg, if called from ext4_zero_range,
3637+
* ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a
3638+
* racing writeback can come later and flush the stale pagecache to disk.
3639+
*/
36333640
static int __ext4_block_zero_page_range(handle_t *handle,
36343641
struct address_space *mapping, loff_t from, loff_t length)
36353642
{

fs/ext4/mballoc.c

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,9 +1456,8 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
14561456
return 0;
14571457
}
14581458

1459-
block++;
1460-
pnum = block / blocks_per_page;
1461-
page = find_or_create_page(inode->i_mapping, pnum, gfp);
1459+
/* blocks_per_page == 1, hence we need another page for the buddy */
1460+
page = find_or_create_page(inode->i_mapping, block + 1, gfp);
14621461
if (!page)
14631462
return -ENOMEM;
14641463
BUG_ON(page->mapping != inode->i_mapping);
@@ -1958,8 +1957,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
19581957
static int mb_find_extent(struct ext4_buddy *e4b, int block,
19591958
int needed, struct ext4_free_extent *ex)
19601959
{
1961-
int next = block;
1962-
int max, order;
1960+
int max, order, next;
19631961
void *buddy;
19641962

19651963
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1977,16 +1975,12 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
19771975

19781976
/* find actual order */
19791977
order = mb_find_order_for_block(e4b, block);
1980-
block = block >> order;
19811978

1982-
ex->fe_len = 1 << order;
1983-
ex->fe_start = block << order;
1979+
ex->fe_len = (1 << order) - (block & ((1 << order) - 1));
1980+
ex->fe_start = block;
19841981
ex->fe_group = e4b->bd_group;
19851982

1986-
/* calc difference from given start */
1987-
next = next - ex->fe_start;
1988-
ex->fe_len -= next;
1989-
ex->fe_start += next;
1983+
block = block >> order;
19901984

19911985
while (needed > ex->fe_len &&
19921986
mb_find_buddy(e4b, order, &max)) {
@@ -2895,14 +2889,19 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
28952889
ac->ac_groups_scanned++;
28962890
if (cr == CR_POWER2_ALIGNED)
28972891
ext4_mb_simple_scan_group(ac, &e4b);
2898-
else if ((cr == CR_GOAL_LEN_FAST ||
2899-
cr == CR_BEST_AVAIL_LEN) &&
2900-
sbi->s_stripe &&
2901-
!(ac->ac_g_ex.fe_len %
2902-
EXT4_B2C(sbi, sbi->s_stripe)))
2903-
ext4_mb_scan_aligned(ac, &e4b);
2904-
else
2905-
ext4_mb_complex_scan_group(ac, &e4b);
2892+
else {
2893+
bool is_stripe_aligned = sbi->s_stripe &&
2894+
!(ac->ac_g_ex.fe_len %
2895+
EXT4_B2C(sbi, sbi->s_stripe));
2896+
2897+
if ((cr == CR_GOAL_LEN_FAST ||
2898+
cr == CR_BEST_AVAIL_LEN) &&
2899+
is_stripe_aligned)
2900+
ext4_mb_scan_aligned(ac, &e4b);
2901+
2902+
if (ac->ac_status == AC_STATUS_CONTINUE)
2903+
ext4_mb_complex_scan_group(ac, &e4b);
2904+
}
29062905

29072906
ext4_unlock_group(sb, group);
29082907
ext4_mb_unload_buddy(&e4b);
@@ -6735,11 +6734,16 @@ __acquires(bitlock)
67356734
static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
67366735
ext4_group_t grp)
67376736
{
6738-
if (grp < ext4_get_groups_count(sb))
6739-
return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6740-
return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
6741-
ext4_group_first_block_no(sb, grp) - 1) >>
6742-
EXT4_CLUSTER_BITS(sb);
6737+
unsigned long nr_clusters_in_group;
6738+
6739+
if (grp < (ext4_get_groups_count(sb) - 1))
6740+
nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb);
6741+
else
6742+
nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) -
6743+
ext4_group_first_block_no(sb, grp))
6744+
>> EXT4_CLUSTER_BITS(sb);
6745+
6746+
return nr_clusters_in_group - 1;
67436747
}
67446748

67456749
static bool ext4_trim_interrupted(void)
@@ -6753,13 +6757,15 @@ static int ext4_try_to_trim_range(struct super_block *sb,
67536757
__acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
67546758
__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
67556759
{
6756-
ext4_grpblk_t next, count, free_count;
6760+
ext4_grpblk_t next, count, free_count, last, origin_start;
67576761
bool set_trimmed = false;
67586762
void *bitmap;
67596763

6764+
last = ext4_last_grp_cluster(sb, e4b->bd_group);
67606765
bitmap = e4b->bd_bitmap;
6761-
if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
6766+
if (start == 0 && max >= last)
67626767
set_trimmed = true;
6768+
origin_start = start;
67636769
start = max(e4b->bd_info->bb_first_free, start);
67646770
count = 0;
67656771
free_count = 0;
@@ -6768,7 +6774,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
67686774
start = mb_find_next_zero_bit(bitmap, max + 1, start);
67696775
if (start > max)
67706776
break;
6771-
next = mb_find_next_bit(bitmap, max + 1, start);
6777+
6778+
next = mb_find_next_bit(bitmap, last + 1, start);
6779+
if (origin_start == 0 && next >= last)
6780+
set_trimmed = true;
67726781

67736782
if ((next - start) >= minblocks) {
67746783
int ret = ext4_trim_extent(sb, start, next - start, e4b);

fs/ext4/resize.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -218,35 +218,53 @@ struct ext4_new_flex_group_data {
218218
in the flex group */
219219
__u16 *bg_flags; /* block group flags of groups
220220
in @groups */
221+
ext4_group_t resize_bg; /* number of allocated
222+
new_group_data */
221223
ext4_group_t count; /* number of groups in @groups
222224
*/
223225
};
224226

227+
/*
228+
* Avoiding memory allocation failures due to too many groups added each time.
229+
*/
230+
#define MAX_RESIZE_BG 16384
231+
225232
/*
226233
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
227234
* @flexbg_size.
228235
*
229236
* Returns NULL on failure otherwise address of the allocated structure.
230237
*/
231-
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
238+
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size,
239+
ext4_group_t o_group, ext4_group_t n_group)
232240
{
241+
ext4_group_t last_group;
233242
struct ext4_new_flex_group_data *flex_gd;
234243

235244
flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
236245
if (flex_gd == NULL)
237246
goto out3;
238247

239-
if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
240-
goto out2;
241-
flex_gd->count = flexbg_size;
248+
if (unlikely(flexbg_size > MAX_RESIZE_BG))
249+
flex_gd->resize_bg = MAX_RESIZE_BG;
250+
else
251+
flex_gd->resize_bg = flexbg_size;
252+
253+
/* Avoid allocating large 'groups' array if not needed */
254+
last_group = o_group | (flex_gd->resize_bg - 1);
255+
if (n_group <= last_group)
256+
flex_gd->resize_bg = 1 << fls(n_group - o_group + 1);
257+
else if (n_group - last_group < flex_gd->resize_bg)
258+
flex_gd->resize_bg = 1 << max(fls(last_group - o_group + 1),
259+
fls(n_group - last_group));
242260

243-
flex_gd->groups = kmalloc_array(flexbg_size,
261+
flex_gd->groups = kmalloc_array(flex_gd->resize_bg,
244262
sizeof(struct ext4_new_group_data),
245263
GFP_NOFS);
246264
if (flex_gd->groups == NULL)
247265
goto out2;
248266

249-
flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
267+
flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16),
250268
GFP_NOFS);
251269
if (flex_gd->bg_flags == NULL)
252270
goto out1;
@@ -283,7 +301,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
283301
*/
284302
static int ext4_alloc_group_tables(struct super_block *sb,
285303
struct ext4_new_flex_group_data *flex_gd,
286-
int flexbg_size)
304+
unsigned int flexbg_size)
287305
{
288306
struct ext4_new_group_data *group_data = flex_gd->groups;
289307
ext4_fsblk_t start_blk;
@@ -384,12 +402,12 @@ static int ext4_alloc_group_tables(struct super_block *sb,
384402
group = group_data[0].group;
385403

386404
printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
387-
"%d groups, flexbg size is %d:\n", flex_gd->count,
405+
"%u groups, flexbg size is %u:\n", flex_gd->count,
388406
flexbg_size);
389407

390408
for (i = 0; i < flex_gd->count; i++) {
391409
ext4_debug(
392-
"adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
410+
"adding %s group %u: %u blocks (%u free, %u mdata blocks)\n",
393411
ext4_bg_has_super(sb, group + i) ? "normal" :
394412
"no-super", group + i,
395413
group_data[i].blocks_count,
@@ -1605,8 +1623,7 @@ static int ext4_flex_group_add(struct super_block *sb,
16051623

16061624
static int ext4_setup_next_flex_gd(struct super_block *sb,
16071625
struct ext4_new_flex_group_data *flex_gd,
1608-
ext4_fsblk_t n_blocks_count,
1609-
unsigned long flexbg_size)
1626+
ext4_fsblk_t n_blocks_count)
16101627
{
16111628
struct ext4_sb_info *sbi = EXT4_SB(sb);
16121629
struct ext4_super_block *es = sbi->s_es;
@@ -1630,7 +1647,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
16301647
BUG_ON(last);
16311648
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
16321649

1633-
last_group = group | (flexbg_size - 1);
1650+
last_group = group | (flex_gd->resize_bg - 1);
16341651
if (last_group > n_group)
16351652
last_group = n_group;
16361653

@@ -1990,8 +2007,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
19902007
ext4_fsblk_t o_blocks_count;
19912008
ext4_fsblk_t n_blocks_count_retry = 0;
19922009
unsigned long last_update_time = 0;
1993-
int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
2010+
int err = 0;
19942011
int meta_bg;
2012+
unsigned int flexbg_size = ext4_flex_bg_size(sbi);
19952013

19962014
/* See if the device is actually as big as what was requested */
19972015
bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
@@ -2123,7 +2141,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
21232141
if (err)
21242142
goto out;
21252143

2126-
flex_gd = alloc_flex_gd(flexbg_size);
2144+
flex_gd = alloc_flex_gd(flexbg_size, o_group, n_group);
21272145
if (flex_gd == NULL) {
21282146
err = -ENOMEM;
21292147
goto out;
@@ -2132,8 +2150,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
21322150
/* Add flex groups. Note that a regular group is a
21332151
* flex group with 1 group.
21342152
*/
2135-
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
2136-
flexbg_size)) {
2153+
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) {
21372154
if (time_is_before_jiffies(last_update_time + HZ * 10)) {
21382155
if (last_update_time)
21392156
ext4_msg(sb, KERN_INFO,

fs/ext4/super.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2793,15 +2793,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
27932793
return -EINVAL;
27942794
}
27952795

2796-
if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2797-
int blocksize =
2798-
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2799-
if (blocksize < PAGE_SIZE)
2800-
ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2801-
"experimental mount option 'dioread_nolock' "
2802-
"for blocksize < PAGE_SIZE");
2803-
}
2804-
28052796
err = ext4_check_test_dummy_encryption(fc, sb);
28062797
if (err)
28072798
return err;
@@ -4410,7 +4401,7 @@ static void ext4_set_def_opts(struct super_block *sb,
44104401
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
44114402
set_opt(sb, DELALLOC);
44124403

4413-
if (sb->s_blocksize == PAGE_SIZE)
4404+
if (sb->s_blocksize <= PAGE_SIZE)
44144405
set_opt(sb, DIOREAD_NOLOCK);
44154406
}
44164407

0 commit comments

Comments
 (0)