Commit 4b1cfa2e authored by Qu Wenruo's avatar Qu Wenruo Committed by Greg Kroah-Hartman

btrfs: fiemap: Cache and merge fiemap extent before submit it to user

[ Upstream commit 4751832d ]

[BUG]
Cycle mount btrfs can cause fiemap to return different result.
Like:
 # mount /dev/vdb5 /mnt/btrfs
 # dd if=/dev/zero bs=16K count=4 oflag=dsync of=/mnt/btrfs/file
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        25088..25215       128   0x1
 # umount /mnt/btrfs
 # mount /dev/vdb5 /mnt/btrfs
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..31]:         25088..25119        32   0x0
   1: [32..63]:        25120..25151        32   0x0
   2: [64..95]:        25152..25183        32   0x0
   3: [96..127]:       25184..25215        32   0x1
But after above fiemap, we get correct merged result if we call fiemap
again.
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        25088..25215       128   0x1

[REASON]
Btrfs will try to merge extent map when inserting new extent map.

btrfs_fiemap(start=0 len=(u64)-1)
|- extent_fiemap(start=0 len=(u64)-1)
   |- get_extent_skip_holes(start=0 len=64k)
   |  |- btrfs_get_extent_fiemap(start=0 len=64k)
   |     |- btrfs_get_extent(start=0 len=64k)
   |        |  Found on-disk (ino, EXTENT_DATA, 0)
   |        |- add_extent_mapping()
   |        |- Return (em->start=0, len=16k)
   |
   |- fiemap_fill_next_extent(logic=0 phys=X len=16k)
   |
   |- get_extent_skip_holes(start=0 len=64k)
   |  |- btrfs_get_extent_fiemap(start=0 len=64k)
   |     |- btrfs_get_extent(start=16k len=48k)
   |        |  Found on-disk (ino, EXTENT_DATA, 16k)
   |        |- add_extent_mapping()
   |        |  |- try_merge_map()
   |        |     Merge with previous em start=0 len=16k
   |        |     resulting em start=0 len=32k
   |        |- Return (em->start=0, len=32K)    << Merged result
   |- Stripe off the unrelated range (0~16K) of return em
   |- fiemap_fill_next_extent(logic=16K phys=X+16K len=16K)
      ^^^ Causing split fiemap extent.

And since in add_extent_mapping(), em is already merged, in next
fiemap() call, we will get merged result.

[FIX]
Here we introduce a new structure, fiemap_cache, which records previous
fiemap extent.

And will always try to merge current fiemap_cache result before calling
fiemap_fill_next_extent().
Only when we failed to merge current fiemap extent with cached one, we
will call fiemap_fill_next_extent() to submit cached one.

So by this method, we can merge all fiemap extents.

It can also be done in fs/ioctl.c, however the problem is if
fieinfo->fi_extents_max == 0, we have no space to cache previous fiemap
extent.
So I choose to merge it in btrfs.
Signed-off-by: default avatarQu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: default avatarLiu Bo <bo.li.liu@oracle.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent 31105815
......@@ -4377,6 +4377,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
/*
* To cache previous fiemap extent
*
* Will be used for merging fiemap extent
*/
struct fiemap_cache {
u64 offset;
u64 phys;
u64 len;
u32 flags;
bool cached;
};
/*
* Helper to submit fiemap extent.
*
* Will try to merge current fiemap extent specified by @offset, @phys,
* @len and @flags with cached one.
* And only when we fails to merge, cached one will be submitted as
* fiemap extent.
*
* Return value is the same as fiemap_fill_next_extent().
*/
static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
int ret = 0;
if (!cache->cached)
goto assign;
/*
* Sanity check, extent_fiemap() should have ensured that new
* fiemap extent won't overlap with cahced one.
* Not recoverable.
*
* NOTE: Physical address can overlap, due to compression
*/
if (cache->offset + cache->len > offset) {
WARN_ON(1);
return -EINVAL;
}
/*
* Only merges fiemap extents if
* 1) Their logical addresses are continuous
*
* 2) Their physical addresses are continuous
* So truly compressed (physical size smaller than logical size)
* extents won't get merged with each other
*
* 3) Share same flags except FIEMAP_EXTENT_LAST
* So regular extent won't get merged with prealloc extent
*/
if (cache->offset + cache->len == offset &&
cache->phys + cache->len == phys &&
(cache->flags & ~FIEMAP_EXTENT_LAST) ==
(flags & ~FIEMAP_EXTENT_LAST)) {
cache->len += len;
cache->flags |= flags;
goto try_submit_last;
}
/* Not mergeable, need to submit cached one */
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
cache->len, cache->flags);
cache->cached = false;
if (ret)
return ret;
assign:
cache->cached = true;
cache->offset = offset;
cache->phys = phys;
cache->len = len;
cache->flags = flags;
try_submit_last:
if (cache->flags & FIEMAP_EXTENT_LAST) {
ret = fiemap_fill_next_extent(fieinfo, cache->offset,
cache->phys, cache->len, cache->flags);
cache->cached = false;
}
return ret;
}
/*
* Sanity check for fiemap cache
*
* All fiemap cache should be submitted by emit_fiemap_extent()
* Iteration should be terminated either by last fiemap extent or
* fieinfo->fi_extents_max.
* So no cached fiemap should exist.
*/
static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache)
{
int ret;
if (!cache->cached)
return 0;
/* Small and recoverbale problem, only to info developer */
#ifdef CONFIG_BTRFS_DEBUG
WARN_ON(1);
#endif
btrfs_warn(fs_info,
"unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
cache->offset, cache->phys, cache->len, cache->flags);
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
cache->len, cache->flags);
cache->cached = false;
if (ret > 0)
ret = 0;
return ret;
}
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
......@@ -4394,6 +4511,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fiemap_cache cache = { 0 };
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
......@@ -4573,8 +4691,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
em_len, flags);
ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
em_len, flags);
if (ret) {
if (ret == 1)
ret = 0;
......@@ -4582,6 +4700,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
}
out_free:
if (!ret)
ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
free_extent_map(em);
out:
btrfs_free_path(path);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment