Commit e913fc82 authored by Jens Axboe's avatar Jens Axboe

writeback: fix WB_SYNC_NONE writeback from umount

When umount calls sync_filesystem(), we first do a WB_SYNC_NONE
writeback to kick off writeback of pending dirty inodes, then follow
that up with a WB_SYNC_ALL to wait for it. Since umount already holds
the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all
writeback happens as WB_SYNC_ALL. This can greatly slow down umount,
since WB_SYNC_ALL writeback is a data integrity operation and thus
a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems
it's a lot slower.
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 69b62d01
......@@ -45,6 +45,7 @@ struct wb_writeback_args {
int for_kupdate:1;
int range_cyclic:1;
int for_background:1;
int sb_pinned:1;
};
/*
......@@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
.range_cyclic = 0,
/*
* Setting sb_pinned is not necessary for WB_SYNC_ALL, but
* lets make it explicitly clear.
*/
.sb_pinned = 1,
};
struct bdi_work work;
......@@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
* @bdi: the backing device to write from
* @sb: write inodes from this super_block
* @nr_pages: the number of pages to write
* @sb_locked: caller already holds sb umount sem.
*
* Description:
* This does WB_SYNC_NONE opportunistic writeback. The IO is only
* started when this function returns, we make no guarentees on
* completion. Caller need not hold sb s_umount semaphore.
* completion. Caller specifies whether sb umount sem is held already or not.
*
*/
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
long nr_pages)
long nr_pages, int sb_locked)
{
struct wb_writeback_args args = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.nr_pages = nr_pages,
.range_cyclic = 1,
.sb_pinned = sb_locked,
};
/*
......@@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
/*
* Caller must already hold the ref for this
*/
if (wbc->sync_mode == WB_SYNC_ALL) {
if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
WARN_ON(!rwsem_is_locked(&sb->s_umount));
return SB_NOT_PINNED;
}
......@@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb,
.for_kupdate = args->for_kupdate,
.for_background = args->for_background,
.range_cyclic = args->range_cyclic,
.sb_pinned = args->sb_pinned,
};
unsigned long oldest_jif;
long wrote = 0;
......@@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
long nr_to_write;
nr_to_write = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
}
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
......@@ -1204,17 +1225,22 @@ static void wait_sb_inodes(struct super_block *sb)
*/
void writeback_inodes_sb(struct super_block *sb)
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
long nr_to_write;
nr_to_write = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
__writeback_inodes_sb(sb, 0);
}
EXPORT_SYMBOL(writeback_inodes_sb);
/**
* writeback_inodes_sb_locked - writeback dirty inodes from given super_block
* @sb: the superblock
*
* Like writeback_inodes_sb(), except the caller already holds the
* sb umount sem.
*/
void writeback_inodes_sb_locked(struct super_block *sb)
{
__writeback_inodes_sb(sb, 1);
}
/**
* writeback_inodes_sb_if_idle - start writeback if none underway
* @sb: the superblock
......
......@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
if (wait)
sync_inodes_sb(sb);
else
writeback_inodes_sb(sb);
writeback_inodes_sb_locked(sb);
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
......
......@@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
void bdi_unregister(struct backing_dev_info *bdi);
int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
long nr_pages);
long nr_pages, int sb_locked);
int bdi_writeback_task(struct bdi_writeback *wb);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
......
......@@ -65,6 +65,15 @@ struct writeback_control {
* so we use a single control to update them
*/
unsigned no_nrwrite_index_update:1;
/*
* For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE,
* the writeback code will pin the sb for the caller. However,
* for eg umount, the caller does WB_SYNC_NONE but already has
* the sb pinned. If the below is set, caller already has the
* sb pinned.
*/
unsigned sb_pinned:1;
};
/*
......@@ -73,6 +82,7 @@ struct writeback_control {
struct bdi_writeback;
int inode_wait(void *);
void writeback_inodes_sb(struct super_block *);
void writeback_inodes_sb_locked(struct super_block *);
int writeback_inodes_sb_if_idle(struct super_block *);
void sync_inodes_sb(struct super_block *);
void writeback_inodes_wbc(struct writeback_control *wbc);
......
......@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping,
(!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
+ global_page_state(NR_UNSTABLE_NFS))
> background_thresh)))
bdi_start_writeback(bdi, NULL, 0);
bdi_start_writeback(bdi, NULL, 0, 0);
}
void set_page_dirty_balance(struct page *page, int page_mkwrite)
......@@ -705,7 +705,7 @@ void laptop_mode_timer_fn(unsigned long data)
*/
if (bdi_has_dirty_io(&q->backing_dev_info))
bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
bdi_start_writeback(&q->backing_dev_info, NULL, 0, nr_pages);
}
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment