Commit d0164adc authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep...

mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd

__GFP_WAIT has been used to identify atomic context in callers that hold
spinlocks or are in interrupts.  They are expected to be high priority and
have access one of two watermarks lower than "min" which can be referred
to as the "atomic reserve".  __GFP_HIGH users get access to the first
lower watermark and can be called the "high priority reserve".

Over time, callers had a requirement to not block when fallback options
were available.  Some have abused __GFP_WAIT leading to a situation where
an optimisitic allocation with a fallback option can access atomic
reserves.

This patch uses __GFP_ATOMIC to identify callers that are truely atomic,
cannot sleep and have no alternative.  High priority users continue to use
__GFP_HIGH.  __GFP_DIRECT_RECLAIM identifies callers that can sleep and
are willing to enter direct reclaim.  __GFP_KSWAPD_RECLAIM to identify
callers that want to wake kswapd for background reclaim.  __GFP_WAIT is
redefined as a caller that is willing to enter direct reclaim and wake
kswapd for background reclaim.

This patch then converts a number of sites

o __GFP_ATOMIC is used by callers that are high priority and have memory
  pools for those requests. GFP_ATOMIC uses this flag.

o Callers that have a limited mempool to guarantee forward progress clear
  __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall
  into this category where kswapd will still be woken but atomic reserves
  are not used as there is a one-entry mempool to guarantee progress.

o Callers that are checking if they are non-blocking should use the
  helper gfpflags_allow_blocking() where possible. This is because
  checking for __GFP_WAIT as was done historically now can trigger false
  positives. Some exceptions like dm-crypt.c exist where the code intent
  is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to
  flag manipulations.

o Callers that built their own GFP flags instead of starting with GFP_KERNEL
  and friends now also need to specify __GFP_KSWAPD_RECLAIM.

The first key hazard to watch out for is callers that removed __GFP_WAIT
and was depending on access to atomic reserves for inconspicuous reasons.
In some cases it may be appropriate for them to use __GFP_HIGH.

The second key hazard is callers that assembled their own combination of
GFP flags instead of starting with something like GFP_KERNEL.  They may
now wish to specify __GFP_KSWAPD_RECLAIM.  It's almost certainly harmless
if it's missed in most cases as other activity will wake kswapd.
Signed-off-by: 's avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: 's avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: 's avatarMichal Hocko <mhocko@suse.com>
Acked-by: 's avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 016c13da
Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
Memory balancing is needed for non __GFP_WAIT as well as for non
__GFP_IO allocations.
Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
well as for non __GFP_IO allocations.
There are two reasons to be requesting non __GFP_WAIT allocations:
the caller can not sleep (typically intr context), or does not want
to incur cost overheads of page stealing and possible swap io for
whatever reasons.
The first reason why a caller may avoid reclaim is that the caller can not
sleep due to holding a spinlock or is in interrupt context. The second may
be that the caller is willing to fail the allocation without incurring the
overhead of page reclaim. This may happen for opportunistic high-order
allocation requests that have order-0 fallback options. In such cases,
the caller may also wish to avoid waking kswapd.
__GFP_IO allocation requests are made to prevent file system deadlocks.
......
......@@ -651,12 +651,12 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (nommu())
addr = __alloc_simple_buffer(dev, size, gfp, &page);
else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT))
else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
addr = __alloc_from_contiguous(dev, size, prot, &page,
caller, want_vaddr);
else if (is_coherent)
addr = __alloc_simple_buffer(dev, size, gfp, &page);
else if (!(gfp & __GFP_WAIT))
else if (!gfpflags_allow_blocking(gfp))
addr = __alloc_from_pool(size, &page);
else
addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
......@@ -1363,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
*handle = DMA_ERROR_CODE;
size = PAGE_ALIGN(size);
if (!(gfp & __GFP_WAIT))
if (!gfpflags_allow_blocking(gfp))
return __iommu_alloc_atomic(dev, size, handle);
/*
......
......@@ -25,7 +25,7 @@
unsigned long xen_get_swiotlb_free_pages(unsigned int order)
{
struct memblock_region *reg;
gfp_t flags = __GFP_NOWARN;
gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
for_each_memblock(memory, reg) {
if (reg->base < (phys_addr_t)0xffffffff) {
......
......@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
struct page *page;
void *addr;
......@@ -148,7 +148,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
size = PAGE_ALIGN(size);
if (!coherent && !(flags & __GFP_WAIT)) {
if (!coherent && !gfpflags_allow_blocking(flags)) {
struct page *page = NULL;
void *addr = __alloc_from_pool(size, &page, flags);
......
......@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
if (flag & __GFP_WAIT) {
if (gfpflags_allow_blocking(flag)) {
page = dma_alloc_from_contiguous(dev, count, get_order(size));
if (page && page_to_phys(page) + size > dma_mask) {
dma_release_from_contiguous(dev, page, count);
......
......@@ -211,7 +211,7 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
bvl = mempool_alloc(pool, gfp_mask);
} else {
struct biovec_slab *bvs = bvec_slabs + *idx;
gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
/*
* Make this allocation restricted and don't dump info on
......@@ -221,11 +221,11 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
__gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
/*
* Try a slab allocation. If this fails and __GFP_WAIT
* Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
* is set, retry with the 1-entry mempool
*/
bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
*idx = BIOVEC_MAX_IDX;
goto fallback;
}
......@@ -395,12 +395,12 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
* backed by the @bs's mempool.
*
* When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
* able to allocate a bio. This is due to the mempool guarantees. To make this
* work, callers must never allocate more than 1 bio at a time from this pool.
* Callers that need to allocate more than 1 bio must always submit the
* previously allocated bio for IO before attempting to allocate a new one.
* Failure to do so can cause deadlocks under memory pressure.
* When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
* always be able to allocate a bio. This is due to the mempool guarantees.
* To make this work, callers must never allocate more than 1 bio at a time
* from this pool. Callers that need to allocate more than 1 bio must always
* submit the previously allocated bio for IO before attempting to allocate
* a new one. Failure to do so can cause deadlocks under memory pressure.
*
* Note that when running under generic_make_request() (i.e. any block
* driver), bios are not submitted until after you return - see the code in
......@@ -459,13 +459,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
* We solve this, and guarantee forward progress, with a rescuer
* workqueue per bio_set. If we go to allocate and there are
* bios on current->bio_list, we first try the allocation
* without __GFP_WAIT; if that fails, we punt those bios we
* would be blocking to the rescuer workqueue before we retry
* with the original gfp_flags.
* without __GFP_DIRECT_RECLAIM; if that fails, we punt those
* bios we would be blocking to the rescuer workqueue before
* we retry with the original gfp_flags.
*/
if (current->bio_list && !bio_list_empty(current->bio_list))
gfp_mask &= ~__GFP_WAIT;
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
if (!p && gfp_mask != saved_gfp) {
......
......@@ -1206,8 +1206,8 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
* @bio: bio to allocate request for (can be %NULL)
* @gfp_mask: allocation mask
*
* Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
* function keeps retrying under memory pressure and fails iff @q is dead.
* Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
* this function keeps retrying under memory pressure and fails iff @q is dead.
*
* Must be called with @q->queue_lock held and,
* Returns ERR_PTR on failure, with @q->queue_lock held.
......@@ -1227,7 +1227,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (!IS_ERR(rq))
return rq;
if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return rq;
}
......@@ -1305,11 +1305,11 @@ EXPORT_SYMBOL(blk_get_request);
* BUG.
*
* WARNING: When allocating/cloning a bio-chain, careful consideration should be
* given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
* anything but the first bio in the chain. Otherwise you risk waiting for IO
* completion of a bio that hasn't been submitted yet, thus resulting in a
* deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
* of bio_alloc(), as that avoids the mempool deadlock.
* given to how you allocate bios. In particular, you cannot use
* __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
* you risk waiting for IO completion of a bio that hasn't been submitted yet,
* thus resulting in a deadlock. Alternatively bios should be allocated using
* bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
* If possible a big IO should be split into smaller parts when allocation
* fails. Partial allocation should not be an error, or you risk a live-lock.
*/
......
......@@ -289,7 +289,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
{
struct io_context *ioc;
might_sleep_if(gfp_flags & __GFP_WAIT);
might_sleep_if(gfpflags_allow_blocking(gfp_flags));
do {
task_lock(task);
......
......@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
if (tag != -1)
return tag;
if (!(data->gfp & __GFP_WAIT))
if (!gfpflags_allow_blocking(data->gfp))
return -1;
bs = bt_wait_ptr(bt, hctx);
......
......@@ -244,11 +244,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT,
blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
reserved, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
if (!rq && (gfp & __GFP_WAIT)) {
if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
......@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q,
__GFP_WAIT|GFP_ATOMIC, false, ctx, hctx);
__GFP_WAIT|__GFP_HIGH, false, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
ctx = alloc_data.ctx;
hctx = alloc_data.hctx;
......
......@@ -357,7 +357,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
}
if (has_payload && data_size) {
page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
page = drbd_alloc_pages(peer_device, nr_pages,
gfpflags_allow_blocking(gfp_mask));
if (!page)
goto fail;
}
......
......@@ -271,7 +271,7 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
goto err_out;
tmp->bi_bdev = NULL;
gfpmask &= ~__GFP_WAIT;
gfpmask &= ~__GFP_DIRECT_RECLAIM;
tmp->bi_next = NULL;
if (!new_chain)
......
......@@ -124,7 +124,8 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
if (group)
return netlink_broadcast(dev->nls, skb, portid, group,
gfp_mask);
return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT));
return netlink_unicast(dev->nls, skb, portid,
!gfpflags_allow_blocking(gfp_mask));
}
EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
......
......@@ -486,7 +486,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
static int add_client_resource(struct client *client,
struct client_resource *resource, gfp_t gfp_mask)
{
bool preload = !!(gfp_mask & __GFP_WAIT);
bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
int ret;
......
......@@ -2215,7 +2215,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
*/
mapping = file_inode(obj->base.filp)->i_mapping;
gfp = mapping_gfp_mask(mapping);
gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
gfp |= __GFP_NORETRY | __GFP_NOWARN;
gfp &= ~(__GFP_IO | __GFP_WAIT);
sg = st->sgl;
st->nents = 0;
......
......@@ -1083,7 +1083,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
{
bool preload = !!(gfp_mask & __GFP_WAIT);
bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
int ret, id;
......
......@@ -2668,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
if (!page) {
if (!(flag & __GFP_WAIT))
if (!gfpflags_allow_blocking(flag))
return NULL;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
......
......@@ -3647,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
flags |= GFP_DMA32;
}
if (flags & __GFP_WAIT) {
if (gfpflags_allow_blocking(flags)) {
unsigned int count = size >> PAGE_SHIFT;
page = dma_alloc_from_contiguous(dev, count, order);
......
......@@ -994,7 +994,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
struct bio_vec *bvec;
retry:
if (unlikely(gfp_mask & __GFP_WAIT))
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_lock(&cc->bio_alloc_lock);
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
......@@ -1010,7 +1010,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
if (!page) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
gfp_mask |= __GFP_WAIT;
gfp_mask |= __GFP_DIRECT_RECLAIM;
goto retry;
}
......@@ -1027,7 +1027,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
}
return_clone:
if (unlikely(gfp_mask & __GFP_WAIT))
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_unlock(&cc->bio_alloc_lock);
return clone;
......
......@@ -244,7 +244,7 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
*pages = NULL;
do {
pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
if (unlikely(!pl)) {
/* Use reserved pages */
pl = kc->pages;
......
......@@ -1297,7 +1297,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
solo_enc->vidq.ops = &solo_enc_video_qops;
solo_enc->vidq.mem_ops = &vb2_dma_sg_memops;
solo_enc->vidq.drv_priv = solo_enc;
solo_enc->vidq.gfp_flags = __GFP_DMA32;
solo_enc->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
solo_enc->vidq.lock = &solo_enc->lock;
......
......@@ -678,7 +678,7 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
solo_dev->vidq.mem_ops = &vb2_dma_contig_memops;
solo_dev->vidq.drv_priv = solo_dev;
solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
solo_dev->vidq.gfp_flags = __GFP_DMA32;
solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
solo_dev->vidq.lock = &solo_dev->lock;
ret = vb2_queue_init(&solo_dev->vidq);
......
......@@ -979,7 +979,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
dev->vidq.ops = &tw68_video_qops;
dev->vidq.mem_ops = &vb2_dma_sg_memops;
dev->vidq.drv_priv = dev;
dev->vidq.gfp_flags = __GFP_DMA32;
dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
dev->vidq.lock = &dev->lock;
dev->vidq.min_buffers_needed = 2;
......
......@@ -1188,8 +1188,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
*/
void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
{
gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
__GFP_NORETRY | __GFP_NO_KSWAPD;
gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY;
size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
void *kbuf;
......
......@@ -691,7 +691,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
{
if (fp->rx_frag_size) {
/* GFP_KERNEL allocations are used only during initialization */
if (unlikely(gfp_mask & __GFP_WAIT))
if (unlikely(gfpflags_allow_blocking(gfp_mask)))
return (void *)__get_free_page(gfp_mask);
return netdev_alloc_frag(fp->rx_frag_size);
......
......@@ -27,7 +27,7 @@
#include "ion_priv.h"
static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
__GFP_NORETRY) & ~__GFP_WAIT;
__GFP_NORETRY) & ~__GFP_DIRECT_RECLAIM;
static gfp_t low_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
static const unsigned int orders[] = {8, 4, 0};
static const int num_orders = ARRAY_SIZE(orders);
......
......@@ -95,7 +95,7 @@ do { \
do { \
LASSERT(!in_interrupt() || \
((size) <= LIBCFS_VMALLOC_SIZE && \
((mask) & __GFP_WAIT) == 0)); \
!gfpflags_allow_blocking(mask))); \
} while (0)
#define LIBCFS_ALLOC_POST(ptr, size) \
......
......@@ -2244,7 +2244,7 @@ static int u132_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
{
struct u132 *u132 = hcd_to_u132(hcd);
if (irqs_disabled()) {
if (__GFP_WAIT & mem_flags) {
if (gfpflags_allow_blocking(mem_flags)) {
printk(KERN_ERR "invalid context for function that might sleep\n");
return -EINVAL;
}
......
......@@ -99,7 +99,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
* below the first 16MB.
*/
flags = __GFP_DMA | __GFP_HIGH;
flags = __GFP_DMA | __GFP_HIGH | __GFP_KSWAPD_RECLAIM;
va->logical =
__get_free_pages(flags, --max_order);
} while (va->logical == 0 && max_order > min_order);
......
......@@ -2572,7 +2572,7 @@ int open_ctree(struct super_block *sb,
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
spin_lock_init(&fs_info->reada_lock);
fs_info->thread_pool_size = min_t(unsigned long,
......
......@@ -594,7 +594,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
if (!prealloc && gfpflags_allow_blocking(mask)) {
/*
* Don't care for allocation failure here because we might end
* up not needing the pre-allocated extent state at all, which
......@@ -718,7 +718,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (start > end)
goto out;
spin_unlock(&tree->lock);
if (mask & __GFP_WAIT)
if (gfpflags_allow_blocking(mask))
cond_resched();
goto again;
}
......@@ -850,7 +850,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
if (!prealloc && gfpflags_allow_blocking(mask)) {
prealloc = alloc_extent_state(mask);
BUG_ON(!prealloc);
}
......@@ -1028,7 +1028,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (start > end)
goto out;
spin_unlock(&tree->lock);
if (mask & __GFP_WAIT)
if (gfpflags_allow_blocking(mask))
cond_resched();
goto again;
}
......@@ -1076,7 +1076,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
btrfs_debug_check_extent_io_range(tree, start, end);
again:
if (!prealloc && (mask & __GFP_WAIT)) {
if (!prealloc && gfpflags_allow_blocking(mask)) {
/*
* Best effort, don't worry if extent state allocation fails
* here for the first iteration. We might have a cached state
......@@ -1253,7 +1253,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (start > end)
goto out;
spin_unlock(&tree->lock);
if (mask & __GFP_WAIT)
if (gfpflags_allow_blocking(mask))
cond_resched();
first_iteration = false;
goto again;
......@@ -4319,7 +4319,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if ((mask & __GFP_WAIT) &&
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > 16 * 1024 * 1024) {
u64 len;
while (start <= end) {
......
......@@ -156,8 +156,8 @@ static struct btrfs_device *__alloc_device(void)
spin_lock_init(&dev->reada_lock);
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
return dev;
}
......
......@@ -1058,7 +1058,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
return 0;
if (journal)
return jbd2_journal_try_to_free_buffers(journal, page,
wait & ~__GFP_WAIT);
wait & ~__GFP_DIRECT_RECLAIM);
return try_to_free_buffers(page);
}
......
......@@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
/* radix tree insertion won't use the preallocation pool unless it's
* told it may not wait */
INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT);
INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
switch (cookie->def->type) {
case FSCACHE_COOKIE_TYPE_INDEX:
......
......@@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page)
/*
* decide whether a page can be released, possibly by cancelling a store to it
* - we're allowed to sleep if __GFP_WAIT is flagged
* - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged
*/
bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
struct page *page,
......@@ -122,7 +122,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
* allocator as the work threads writing to the cache may all end up
* sleeping on memory allocation, so we may need to impose a timeout
* too. */
if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) {
if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) {
fscache_stat(&fscache_n_store_vmscan_busy);
return false;
}
......@@ -132,7 +132,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
_debug("fscache writeout timeout page: %p{%lx}",
page, page->index);
gfp &= ~__GFP_WAIT;
gfp &= ~__GFP_DIRECT_RECLAIM;
goto try_again;
}
EXPORT_SYMBOL(__fscache_maybe_release_page);
......
......@@ -1937,8 +1937,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
* @journal: journal for operation
* @page: to try and free
* @gfp_mask: we use the mask to detect how hard should we try to release
* buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
* release the buffers.
* buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
* code to release the buffers.
*
*
* For all the buffers on this page,
......
......@@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
/* Always try to initiate a 'commit' if relevant, but only
* wait for it if __GFP_WAIT is set. Even then, only wait 1
* second and only if the 'bdi' is not congested.
* wait for it if the caller allows blocking. Even then,
* only wait 1 second and only if the 'bdi' is not congested.
* Waiting indefinitely can cause deadlocks when the NFS
* server is on this machine, when a new TCP connection is
* needed and in other rare cases. There is no particular
......@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
if (mapping) {
struct nfs_server *nfss = NFS_SERVER(mapping->host);
nfs_commit_inode(mapping->host, 0);
if ((gfp & __GFP_WAIT) &&
if (gfpflags_allow_blocking(gfp) &&
!bdi_write_congested(&nfss->backing_dev_info)) {
wait_on_page_bit_killable_timeout(page, PG_private,
HZ);
......
......@@ -525,7 +525,7 @@ xfs_qm_shrink_scan(
unsigned long freed;
int error;
if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
return 0;
INIT_LIST_HEAD(&isol.buffers);
......
......@@ -29,12 +29,13 @@ struct vm_area_struct;
#define ___GFP_NOMEMALLOC 0x10000u
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_WAIT 0x80000u
#define ___GFP_ATOMIC 0x80000u
#define ___GFP_NOACCOUNT 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_DIRECT_RECLAIM 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
#define ___GFP_WRITE 0x1000000u
#define ___GFP_KSWAPD_RECLAIM 0x2000000u
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
/*
......@@ -71,7 +72,7 @@ struct vm_area_struct;
* __GFP_MOVABLE: Flag that this page will be movable by the page migration
* mechanism or reclaimed
*/
#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */
#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) /* Caller cannot wait or reschedule */
#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */
#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */
#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */
......@@ -94,23 +95,37 @@ struct vm_area_struct;
#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
* A caller that is willing to wait may enter direct reclaim and will
* wake kswapd to reclaim pages in the background until the high
* watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to
* avoid unnecessary delays when a fallback option is available but
* still allow kswapd to reclaim in the background. The kswapd flag
* can be cleared when the reclaiming of pages would cause unnecessary
* disruption.
*/
#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
/*
* This may seem redundant, but it's a way of annotating false positives vs.
* allocations that simply cannot be supported (e.g. page tables).
*/
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */
#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
#define GFP_ATOMIC (__GFP_HIGH)
/*
* GFP_ATOMIC callers can not sleep, need the allocation to succeed.
* A lower watermark is applied to allow access to "atomic reserves"
*/
#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
#define GFP_NOIO (__GFP_WAIT)
#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
......@@ -119,10 +134,10 @@ struct vm_area_struct;
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS)
#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
__GFP_NO_KSWAPD)
#define GFP_IOFS (__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM)
#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
~__GFP_KSWAPD_RECLAIM)
/* This mask makes up all the page movable related flags */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
......@@ -164,6 +179,11 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
}
static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
{
return gfp_flags & __GFP_DIRECT_RECLAIM;
}
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
......
......@@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb)
static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
{
might_sleep_if(pri & __GFP_WAIT);
might_sleep_if(gfpflags_allow_blocking(pri));
if (skb_cloned(skb))
return pskb_expand_head(skb, 0, 0, pri);
......@@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb)
*/
static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)