Commit cab7076a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "For this cycle we add support for the shutdown ioctl, which is
  primarily used for testing, but which can be useful on production
  systems when a scratch volume is being destroyed and the data on it
  doesn't need to be saved.

  This found (and we fixed) a number of bugs with ext4's recovery to
  corrupted file system --- the bugs increased the amount of data that
  could be potentially lost, and in the case of the inline data feature,
  could cause the kernel to BUG.

  Also included are a number of other bug fixes, including in ext4's
  fscrypt, DAX, inline data support"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (26 commits)
  ext4: rename EXT4_IOC_GOINGDOWN to EXT4_IOC_SHUTDOWN
  ext4: fix fencepost in s_first_meta_bg validation
  ext4: don't BUG when truncating encrypted inodes on the orphan list
  ext4: do not use stripe_width if it is not set
  ext4: fix stripe-unaligned allocations
  dax: assert that i_rwsem is held exclusive for writes
  ext4: fix DAX write locking
  ext4: add EXT4_IOC_GOINGDOWN ioctl
  ext4: add shutdown bit and check for it
  ext4: rename s_resize_flags to s_ext4_flags
  ext4: return EROFS if device is r/o and journal replay is needed
  ext4: preserve the needs_recovery flag when the journal is aborted
  jbd2: don't leak modified metadata buffers on an aborted journal
  ext4: fix inline data error paths
  ext4: move halfmd4 into hash.c directly
  ext4: fix use-after-iput when fscrypt contexts are inconsistent
  jbd2: fix use after free in kjournald2()
  ext4: fix data corruption in data=journal mode
  ext4: trim allocation requests to group size
  ext4: replace BUG_ON with WARN_ON in mb_find_extent()
  ...
parents 6c24337f e9be2ac7
......@@ -1086,8 +1086,12 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;
if (iov_iter_rw(iter) == WRITE)
if (iov_iter_rw(iter) == WRITE) {
lockdep_assert_held_exclusive(&inode->i_rwsem);
flags |= IOMAP_WRITE;
} else {
lockdep_assert_held(&inode->i_rwsem);
}
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
......
......@@ -683,6 +683,16 @@ struct fsxattr {
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
/*
* Flags for going down operation
*/
#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */
#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
......@@ -1403,8 +1413,7 @@ struct ext4_sb_info {
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
unsigned long s_resize_flags; /* Flags indicating if there
is a resizer */
unsigned long s_ext4_flags; /* Ext4 superblock flags */
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
......@@ -1837,6 +1846,18 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
return (EXT4_SB(sb)->s_es->s_feature_incompat != 0);
}
/*
* Superblock flags
*/
#define EXT4_FLAGS_RESIZING 0
#define EXT4_FLAGS_SHUTDOWN 1
static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
{
return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
}
/*
* Default values for user and/or group using reserved blocks
*/
......@@ -3005,7 +3026,7 @@ extern int ext4_inline_data_fiemap(struct inode *inode,
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode);
......@@ -3199,7 +3220,6 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
#define EXT4_RESIZING 0
extern int ext4_resize_begin(struct super_block *sb);
extern void ext4_resize_end(struct super_block *sb);
......
......@@ -43,6 +43,10 @@ static int ext4_journal_check_start(struct super_block *sb)
journal_t *journal;
might_sleep();
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return -EIO;
if (sb->s_flags & MS_RDONLY)
return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
......@@ -161,6 +165,13 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
might_sleep();
if (ext4_handle_valid(handle)) {
struct super_block *sb;
sb = handle->h_transaction->t_journal->j_private;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) {
jbd2_journal_abort_handle(handle);
return -EIO;
}
err = jbd2_journal_get_write_access(handle, bh);
if (err)
ext4_journal_abort_handle(where, line, __func__, bh,
......
......@@ -5334,7 +5334,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t stop, *iterator, ex_start, ex_end;
/* Let path point to the last extent */
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
......@@ -5343,15 +5344,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent)
goto out;
stop = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
stop = le32_to_cpu(extent->ee_block);
/*
* In case of left shift, Don't start shifting extents until we make
* sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
path = ext4_find_extent(inode, start - 1, &path, 0);
path = ext4_find_extent(inode, start - 1, &path,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
......@@ -5383,9 +5384,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
else
iterator = &stop;
/* Its safe to start updating extents */
while (start < stop) {
path = ext4_find_extent(inode, *iterator, &path, 0);
/*
* Its safe to start updating extents. Start and stop are unsigned, so
* in case of right shift if extent with 0 block is reached, iterator
* becomes NULL to indicate the end of the loop.
*/
while (iterator && start <= stop) {
path = ext4_find_extent(inode, *iterator, &path,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
......@@ -5412,8 +5418,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
*iterator = le32_to_cpu(extent->ee_block) > 0 ?
le32_to_cpu(extent->ee_block) - 1 : 0;
if (le32_to_cpu(extent->ee_block) > 0)
*iterator = le32_to_cpu(extent->ee_block) - 1;
else
/* Beginning is reached, end of the loop */
iterator = NULL;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
......
......@@ -57,6 +57,9 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
return -EIO;
if (!iov_iter_count(to))
return 0; /* skip atime */
......@@ -175,7 +178,6 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
bool overwrite = false;
inode_lock(inode);
ret = ext4_write_checks(iocb, from);
......@@ -188,16 +190,9 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret)
goto out;
if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
overwrite = true;
downgrade_write(&inode->i_rwsem);
}
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
out:
if (!overwrite)
inode_unlock(inode);
else
inode_unlock_shared(inode);
inode_unlock(inode);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
......@@ -213,6 +208,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
int overwrite = 0;
ssize_t ret;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
#ifdef CONFIG_FS_DAX
if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from);
......@@ -348,6 +346,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (ext4_encrypted_inode(inode)) {
int err = fscrypt_get_encryption_info(inode);
if (err)
......@@ -375,6 +376,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
char buf[64], *cp;
int ret;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
!(sb->s_flags & MS_RDONLY))) {
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
......
......@@ -100,6 +100,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
tid_t commit_tid;
bool needs_barrier = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
J_ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file_enter(file, datasync);
......
......@@ -10,7 +10,8 @@
*/
#include <linux/fs.h>
#include <linux/cryptohash.h>
#include <linux/compiler.h>
#include <linux/bitops.h>
#include "ext4.h"
#define DELTA 0x9E3779B9
......@@ -32,6 +33,74 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
buf[1] += b1;
}
/* F, G and H are basic MD4 functions: selection, majority, parity */
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
/*
* The generic round function. The application is so specific that
* we don't bother protecting all the arguments with parens, as is generally
* good macro practice, in favor of extra legibility.
* Rotation is separate from addition to prevent recomputation
*/
#define ROUND(f, a, b, c, d, x, s) \
(a += f(b, c, d) + x, a = rol32(a, s))
#define K1 0
#define K2 013240474631UL
#define K3 015666365641UL
/*
* Basic cut-down MD4 transform. Returns only 32 bits of result.
*/
static __u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
{
__u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
/* Round 1 */
ROUND(F, a, b, c, d, in[0] + K1, 3);
ROUND(F, d, a, b, c, in[1] + K1, 7);
ROUND(F, c, d, a, b, in[2] + K1, 11);
ROUND(F, b, c, d, a, in[3] + K1, 19);
ROUND(F, a, b, c, d, in[4] + K1, 3);
ROUND(F, d, a, b, c, in[5] + K1, 7);
ROUND(F, c, d, a, b, in[6] + K1, 11);
ROUND(F, b, c, d, a, in[7] + K1, 19);
/* Round 2 */
ROUND(G, a, b, c, d, in[1] + K2, 3);
ROUND(G, d, a, b, c, in[3] + K2, 5);
ROUND(G, c, d, a, b, in[5] + K2, 9);
ROUND(G, b, c, d, a, in[7] + K2, 13);
ROUND(G, a, b, c, d, in[0] + K2, 3);
ROUND(G, d, a, b, c, in[2] + K2, 5);
ROUND(G, c, d, a, b, in[4] + K2, 9);
ROUND(G, b, c, d, a, in[6] + K2, 13);
/* Round 3 */
ROUND(H, a, b, c, d, in[3] + K3, 3);
ROUND(H, d, a, b, c, in[7] + K3, 9);
ROUND(H, c, d, a, b, in[2] + K3, 11);
ROUND(H, b, c, d, a, in[6] + K3, 15);
ROUND(H, a, b, c, d, in[1] + K3, 3);
ROUND(H, d, a, b, c, in[5] + K3, 9);
ROUND(H, c, d, a, b, in[0] + K3, 11);
ROUND(H, b, c, d, a, in[4] + K3, 15);
buf[0] += a;
buf[1] += b;
buf[2] += c;
buf[3] += d;
return buf[1]; /* "most hashed" word */
}
#undef ROUND
#undef K1
#undef K2
#undef K3
#undef F
#undef G
#undef H
/* The old legacy hash */
static __u32 dx_hack_hash_unsigned(const char *name, int len)
......
......@@ -764,6 +764,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
return ERR_PTR(-EIO);
if ((ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
......
This diff is collapsed.
......@@ -1189,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
......@@ -1330,8 +1333,11 @@ static int ext4_write_end(struct file *file,
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0)
if (ret < 0) {
unlock_page(page);
put_page(page);
goto errout;
}
copied = ret;
} else
copied = block_write_end(file, mapping, pos,
......@@ -1385,7 +1391,9 @@ static int ext4_write_end(struct file *file,
* set the buffer to be dirty, since in data=journalled mode we need
* to call ext4_handle_dirty_metadata() instead.
*/
static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
static void ext4_journalled_zero_new_buffers(handle_t *handle,
struct page *page,
unsigned from, unsigned to)
{
unsigned int block_start = 0, block_end;
struct buffer_head *head, *bh;
......@@ -1402,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
size = min(to, block_end) - start;
zero_user(page, start, size);
set_buffer_uptodate(bh);
write_end_fn(handle, bh);
}
clear_buffer_new(bh);
}
......@@ -1431,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
if (ext4_has_inline_data(inode))
copied = ext4_write_inline_data_end(inode, pos, len,
copied, page);
else {
if (copied < len) {
if (!PageUptodate(page))
copied = 0;
zero_new_buffers(page, from+copied, to);
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
unlock_page(page);
put_page(page);
goto errout;
}
copied = ret;
} else if (unlikely(copied < len) && !PageUptodate(page)) {
copied = 0;
ext4_journalled_zero_new_buffers(handle, page, from, to);
} else {
if (unlikely(copied < len))
ext4_journalled_zero_new_buffers(handle, page,
from + copied, to);
ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
to, &partial, write_end_fn);
from + copied, &partial,
write_end_fn);
if (!partial)
SetPageUptodate(page);
}
......@@ -1468,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file,
*/
ext4_orphan_add(handle, inode);
errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
......@@ -2034,6 +2050,12 @@ static int ext4_writepage(struct page *page,
struct ext4_io_submit io_submit;
bool keep_towrite = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
ext4_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return -EIO;
}
trace_ext4_writepage(page);
size = i_size_read(inode);
if (page->index == size >> PAGE_SHIFT)
......@@ -2409,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle,
if (err < 0) {
struct super_block *sb = inode->i_sb;
if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
if (ext4_forced_shutdown(EXT4_SB(sb)) ||
EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
......@@ -2464,8 +2487,8 @@ static int mpage_map_and_submit_extent(handle_t *handle,
disksize = i_size;
if (disksize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = disksize;
err2 = ext4_mark_inode_dirty(handle, inode);
up_write(&EXT4_I(inode)->i_data_sem);
err2 = ext4_mark_inode_dirty(handle, inode);
if (err2)
ext4_error(inode->i_sb,
"Failed to mark inode %lu dirty",
......@@ -2631,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping,
struct blk_plug plug;
bool give_up_on_write = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
percpu_down_read(&sbi->s_journal_flag_rwsem);
trace_ext4_writepages(inode, wbc);
......@@ -2667,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping,
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
ret = -EROFS;
goto out_writepages;
}
......@@ -2892,6 +2919,9 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
handle_t *handle;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
index = pos >> PAGE_SHIFT;
if (ext4_nonda_switch(inode->i_sb) ||
......@@ -3914,6 +3944,10 @@ static int ext4_block_truncate_page(handle_t *handle,
unsigned blocksize;
struct inode *inode = mapping->host;
/* If we are processing an encrypted inode during orphan list handling */
if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode))
return 0;
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
......@@ -4222,7 +4256,9 @@ int ext4_truncate(struct inode *inode)
if (ext4_has_inline_data(inode)) {
int has_inline = 1;
ext4_inline_data_truncate(inode, &has_inline);
err = ext4_inline_data_truncate(inode, &has_inline);
if (err)
return err;
if (has_inline)
return 0;
}
......@@ -5197,6 +5233,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
error = setattr_prepare(dentry, attr);
if (error)
return error;
......@@ -5483,6 +5522,9 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
......@@ -5506,6 +5548,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
{
int err;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
err = ext4_get_inode_loc(inode, iloc);
if (!err) {
BUFFER_TRACE(iloc->bh, "get_write_access");
......
......@@ -16,6 +16,7 @@
#include <linux/quotaops.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include "ext4_jbd2.h"
#include "ext4.h"
......@@ -442,6 +443,52 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
return iflags;
}
int ext4_shutdown(struct super_block *sb, unsigned long arg)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
__u32 flags;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(flags, (__u32 __user *)arg))
return -EFAULT;
if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
return -EINVAL;
if (ext4_forced_shutdown(sbi))
return 0;
ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
switch (flags) {
case EXT4_GOING_FLAGS_DEFAULT:
freeze_bdev(sb->s_bdev);
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
thaw_bdev(sb->s_bdev, sb);
break;
case EXT4_GOING_FLAGS_LOGFLUSH:
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
(void) ext4_force_commit(sb);
jbd2_journal_abort(sbi->s_journal, 0);
}
break;
case EXT4_GOING_FLAGS_NOLOGFLUSH:
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
msleep(100);
jbd2_journal_abort(sbi->s_journal, 0);
}
break;
default:
return -EINVAL;
}
clear_opt(sb, DISCARD);
return 0;
}
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
......@@ -893,6 +940,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return 0;
}
case EXT4_IOC_SHUTDOWN:
return ext4_shutdown(sb, arg);
default:
return -ENOTTY;
}
......@@ -959,6 +1008,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY:
case EXT4_IOC_SHUTDOWN:
break;
default:
return -ENOIOCTLCMD;
......
......@@ -1556,7 +1556,17 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
ex->fe_len += 1 << order;
}
BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) {
/* Should never happen! (but apparently sometimes does?!?) */
WARN_ON(1);
ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
"block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
block, order, needed, ex->fe_group, ex->fe_start,
ex->fe_len, ex->fe_logical);
ex->fe_len = 0;
ex->fe_start = 0;
ex->fe_group = 0;
}
return ex->fe_len;
}
......@@ -2136,8 +2146,10 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* We search using buddy data only if the order of the request
* is greater than equal to the sbi_s_mb_order2_reqs
* You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
* We also support searching for power-of-two requests only for
* requests upto maximum buddy size we have constructed.
*/
if (i >= sbi->s_mb_order2_reqs) {
if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
/*
* This should tell if fe_len is exactly power of 2
*/
......@@ -2207,7 +2219,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
}
ac->ac_groups_scanned++;
if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
if (cr == 0)
ext4_mb_simple_scan_group(ac, &e4b);
else if (cr == 1 && sbi->s_stripe &&
!(ac->ac_g_ex.fe_len % sbi->s_stripe))
......@@ -3123,6 +3135,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
if (ar->pright && start + size - 1 >= ar->lright)
size -= start + size - ar->lright;
/*
* Trim allocation request for filesystems with artificially small
* groups.