Commit 76dda93c authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: add snapshot/subvolume destroy ioctl

This patch adds snapshot/subvolume destroy ioctl.  A subvolume that isn't being
used and doesn't contains links to other subvolumes can be destroyed.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 4df27c4d
......@@ -839,9 +839,7 @@ struct btrfs_fs_info {
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
struct mutex drop_mutex;
struct mutex volume_mutex;
struct mutex tree_reloc_mutex;
/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
......@@ -852,6 +850,10 @@ struct btrfs_fs_info {
struct mutex ordered_operations_mutex;
struct rw_semaphore extent_commit_sem;
struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
......@@ -2142,6 +2144,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
int btrfs_search_root(struct btrfs_root *root, u64 search_start,
u64 *found_objectid);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
int btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
/* dir-item.c */
......@@ -2273,7 +2276,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root, struct dentry *dentry,
struct btrfs_root *new_root,
u64 new_dirid, u64 alloc_hint);
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);
......@@ -2289,6 +2292,7 @@ int btrfs_write_inode(struct inode *inode, int wait);
void btrfs_dirty_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
void btrfs_drop_inode(struct inode *inode);
int btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
......@@ -2306,6 +2310,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
extern struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
......
......@@ -1378,8 +1378,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
err = bdi_register(bdi, NULL, "btrfs-%d",
atomic_inc_return(&btrfs_bdi_num));
if (err)
if (err) {
bdi_destroy(bdi);
return err;
}
bdi->ra_pages = default_backing_dev_info.ra_pages;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
......@@ -1469,9 +1471,12 @@ static int cleaner_kthread(void *arg)
break;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
mutex_trylock(&root->fs_info->cleaner_mutex)) {
btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
}
if (freezing(current)) {
refrigerator();
......@@ -1576,7 +1581,26 @@ struct btrfs_root *open_ctree(struct super_block *sb,
err = -ENOMEM;
goto fail;
}
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
ret = init_srcu_struct(&fs_info->subvol_srcu);
if (ret) {
err = ret;
goto fail;
}
ret = setup_bdi(fs_info, &fs_info->bdi);
if (ret) {
err = ret;
goto fail_srcu;
}
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bdi;
}
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->hashers);
......@@ -1586,6 +1610,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
......@@ -1604,11 +1629,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
if (setup_bdi(fs_info, &fs_info->bdi))
goto fail_bdi;
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
fs_info->metadata_ratio = 8;
fs_info->thread_pool_size = min_t(unsigned long,
......@@ -1620,6 +1640,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
fs_info->btree_inode->i_nlink = 1;
/*
* we set the i_size on the btree inode to the max possible int.
* the real end of the address space is determined by all of
......@@ -1638,6 +1660,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree.rb_node = NULL;
......@@ -1648,21 +1675,16 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->pinned_extents = &fs_info->freed_extents[0];
fs_info->do_barriers = 1;
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
insert_inode_hash(fs_info->btree_inode);
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->drop_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
mutex_init(&fs_info->tree_reloc_mutex);
init_rwsem(&fs_info->extent_commit_sem);
init_rwsem(&fs_info->subvol_sem);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
......@@ -1941,6 +1963,9 @@ printk("thread pool is %d\n", fs_info->thread_pool_size);
}
}
ret = btrfs_find_orphan_roots(tree_root);
BUG_ON(ret);
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_recover_relocation(tree_root);
BUG_ON(ret);
......@@ -2000,6 +2025,8 @@ fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
fail_bdi:
bdi_destroy(&fs_info->bdi);
fail_srcu:
cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
kfree(extent_root);
kfree(tree_root);
......@@ -2263,6 +2290,10 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
radix_tree_delete(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid);
spin_unlock(&fs_info->fs_roots_radix_lock);
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
free_fs_root(root);
return 0;
}
......@@ -2286,6 +2317,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root *gang[8];
int i;
while (!list_empty(&fs_info->dead_roots)) {
gang[0] = list_entry(fs_info->dead_roots.next,
struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
if (gang[0]->in_radix) {
btrfs_free_fs_root(fs_info, gang[0]);
} else {
free_extent_buffer(gang[0]->node);
free_extent_buffer(gang[0]->commit_root);
kfree(gang[0]);
}
}
while (1) {
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, 0,
......@@ -2315,9 +2360,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
root_objectid = gang[ret - 1]->root_key.objectid + 1;
for (i = 0; i < ret; i++) {
root_objectid = gang[i]->root_key.objectid;
ret = btrfs_find_dead_roots(fs_info->tree_root,
root_objectid);
BUG_ON(ret);
btrfs_orphan_cleanup(gang[i]);
}
root_objectid++;
......@@ -2405,6 +2447,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_mapping_tree_free(&fs_info->mapping_tree);
bdi_destroy(&fs_info->bdi);
cleanup_srcu_struct(&fs_info->subvol_srcu);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
......
......@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
fid->objectid = BTRFS_I(inode)->location.objectid;
fid->objectid = inode->i_ino;
fid->root_objectid = BTRFS_I(inode)->root->objectid;
fid->gen = inode->i_generation;
......@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
}
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
u64 root_objectid, u32 generation)
u64 root_objectid, u32 generation,
int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
struct btrfs_root *root;
struct dentry *dentry;
struct inode *inode;
struct btrfs_key key;
int index;
int err = 0;
if (objectid < BTRFS_FIRST_FREE_OBJECTID)
return ERR_PTR(-ESTALE);
key.objectid = root_objectid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1;
root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
if (IS_ERR(root))
return ERR_CAST(root);
index = srcu_read_lock(&fs_info->subvol_srcu);
root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto fail;
}
if (btrfs_root_refs(&root->root_item) == 0) {
err = -ENOENT;
goto fail;
}
key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
inode = btrfs_iget(sb, &key, root);
if (IS_ERR(inode))
return (void *)inode;
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail;
}
srcu_read_unlock(&fs_info->subvol_srcu, index);
if (generation != inode->i_generation) {
if (check_generation && generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
}
return d_obtain_alias(inode);
dentry = d_obtain_alias(inode);
if (!IS_ERR(dentry))
dentry->d_op = &btrfs_dentry_operations;
return dentry;
fail:
srcu_read_unlock(&fs_info->subvol_srcu, index);
return ERR_PTR(err);
}
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
......@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
objectid = fid->parent_objectid;
generation = fid->parent_gen;
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
......@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
root_objectid = fid->root_objectid;
generation = fid->gen;
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
static struct dentry *dentry;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
int slot;
u64 objectid;
struct btrfs_root_ref *ref;
struct btrfs_key key;
struct btrfs_key found_key;
int ret;
path = btrfs_alloc_path();
key.objectid = dir->i_ino;
btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
key.offset = (u64)-1;
if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = root->fs_info->tree_root;
} else {
key.objectid = dir->i_ino;
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
/* Error */
btrfs_free_path(path);
return ERR_PTR(ret);
if (ret < 0)
goto fail;
BUG_ON(ret == 0);
if (path->slots[0] == 0) {
ret = -ENOENT;
goto fail;
}
path->slots[0]--;
leaf = path->nodes[0];
slot = path->slots[0];
if (ret) {
/* btrfs_search_slot() returns the slot where we'd want to
insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
The _real_ backref, telling us what the parent inode
_actually_ is, will be in the slot _before_ the one
that btrfs_search_slot() returns. */
if (!slot) {
/* Unless there is _no_ key in the tree before... */
btrfs_free_path(path);
return ERR_PTR(-EIO);
}
slot--;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != key.objectid || found_key.type != key.type) {
ret = -ENOENT;
goto fail;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
key.objectid = btrfs_root_ref_dirid(leaf, ref);
} else {
key.objectid = found_key.offset;
}
btrfs_free_path(path);
if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
return ERR_PTR(-EINVAL);
objectid = key.offset;
/* If we are already at the root of a subvol, return the real root */
if (objectid == dir->i_ino)
return dget(dir->i_sb->s_root);
if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
return btrfs_get_dentry(root->fs_info->sb, key.objectid,
found_key.offset, 0, 0);
}
/* Build a new key for the inode item */
key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
if (!IS_ERR(dentry))
dentry->d_op = &btrfs_dentry_operations;
return dentry;
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
}
const struct export_operations btrfs_export_ops = {
......
......@@ -5463,9 +5463,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
ret = btrfs_del_root(trans, tree_root, &root->root_key);
BUG_ON(ret);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root);
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
NULL, NULL);
BUG_ON(ret < 0);
if (ret > 0) {
ret = btrfs_del_orphan_item(trans, tree_root,
root->root_key.objectid);
BUG_ON(ret);
}
}
if (root->in_radix) {
btrfs_free_fs_root(tree_root->fs_info, root);
} else {
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root);
}
out:
btrfs_end_transaction(trans, tree_root);
kfree(wc);
......
......@@ -3089,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode)
}
btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (inode->i_nlink > 0) {
BUG_ON(btrfs_root_refs(&root->root_item) != 0);
goto no_delete;
}
btrfs_i_size_write(inode, 0);
trans = btrfs_join_transaction(root, 1);
......@@ -3225,11 +3230,13 @@ static void inode_tree_add(struct inode *inode)
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
again:
p = &root->inode_tree.rb_node;
parent = NULL;
if (hlist_unhashed(&inode->i_hash))
return;
spin_lock(&root->inode_lock);
while (*p) {
parent = *p;
......@@ -3256,13 +3263,87 @@ again:
static void inode_tree_del(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int empty = 0;
spin_lock(&root->inode_lock);
if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
empty = RB_EMPTY_ROOT(&root->inode_tree);
}
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
if (empty)
btrfs_add_dead_root(root);
}
}
int btrfs_invalidate_inodes(struct btrfs_root *root)
{
struct rb_node *node;
struct rb_node *prev;
struct btrfs_inode *entry;
struct inode *inode;
u64 objectid = 0;
WARN_ON(btrfs_root_refs(&root->root_item) != 0);
spin_lock(&root->inode_lock);
again:
node = root->inode_tree.rb_node;
prev = NULL;
while (node) {
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
if (objectid < entry->vfs_inode.i_ino)
node = node->rb_left;
else if (objectid > entry->vfs_inode.i_ino)
node = node->rb_right;
else
break;
}
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
if (objectid <= entry->vfs_inode.i_ino) {
node = prev;
break;
}
prev = rb_next(prev);
}
}
while (node) {
entry = rb_entry(node, struct btrfs_inode, rb_node);
objectid = entry->vfs_inode.i_ino + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
if (atomic_read(&inode->i_count) > 1)
d_prune_aliases(inode);
/*
* btrfs_drop_inode will remove it from
* the inode cache when its usage count
* hits zero.
*/
iput(inode);
cond_resched();
spin_lock(&root->inode_lock);
goto again;
}
if (cond_resched_lock(&root->inode_lock))
goto again;
node = rb_next(node);
}
spin_unlock(&root->inode_lock);
return 0;
}
static noinline void init_btrfs_i(struct inode *inode)
......@@ -3379,8 +3460,11 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
int index;
int ret;
dentry->d_op = &btrfs_dentry_operations;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
......@@ -3399,6 +3483,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
index = srcu_read_lock(&root->fs_info->subvol_srcu);
ret = fixup_tree_root_location(root, dir, dentry,
&location, &sub_root);
if (ret < 0) {
......@@ -3409,9 +3494,24 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
} else {
inode = btrfs_iget(dir->i_sb, &location, sub_root);
}
srcu_read_unlock(&root->fs_info->subvol_srcu, index);
return inode;
}
static int btrfs_dentry_delete(struct dentry *dentry)
{
struct btrfs_root *root;
if (!dentry->d_inode)
return 0;
root = BTRFS_I(dentry->d_inode)->root;
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
return 0;
}
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
......@@ -4773,11 +4873,11 @@ out:
* create a new subvolume directory/inode (helper for the ioctl).
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root, struct dentry *dentry,
struct btrfs_root *new_root,
u64 new_dirid, u64 alloc_hint)
{
struct inode *inode;
int error;
int err;
u64 index = 0;
inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
......@@ -4790,11 +4890,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
inode->i_nlink = 1;
btrfs_i_size_write(inode, 0);
error = btrfs_update_inode(trans, new_root, inode);
if (error)
return error;
err = btrfs_update_inode(trans, new_root, inode);
BUG_ON(err);