Commit 0a835c4f authored by Matthew Wilcox's avatar Matthew Wilcox

Reimplement IDR and IDA using the radix tree

The IDR is very similar to the radix tree.  It has some functionality that
the radix tree did not have (alloc next free, cyclic allocation, a
callback-based for_each, destroy tree), which is readily implementable on
top of the radix tree.  A few small changes were needed in order to use a
tag to represent nodes with free space below them.  More extensive
changes were needed to support storing NULL as a valid entry in an IDR.
Plain radix trees still interpret NULL as a not-present entry.

The IDA is reimplemented as a client of the newly enhanced radix tree.  As
in the current implementation, it uses a bitmap at the last level of the
tree.
Signed-off-by: default avatarMatthew Wilcox <willy@infradead.org>
Signed-off-by: default avatarMatthew Wilcox <mawilcox@microsoft.com>
Tested-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 0ac398ef
......@@ -12,47 +12,28 @@
#ifndef __IDR_H__
#define __IDR_H__
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/radix-tree.h>
#include <linux/gfp.h>
struct idr {
struct radix_tree_root idr_rt;
unsigned int idr_next;
};
/*
* Using 6 bits at each layer allows us to allocate 7 layers out of each page.
* 8 bits only gave us 3 layers out of every pair of pages, which is less
* efficient except for trees with a largest element between 192-255 inclusive.
* The IDR API does not expose the tagging functionality of the radix tree
* to users. Use tag 0 to track whether a node has free space below it.
*/
#define IDR_BITS 6
#define IDR_SIZE (1 << IDR_BITS)
#define IDR_MASK ((1 << IDR_BITS)-1)
struct idr_layer {
int prefix; /* the ID prefix of this idr_layer */
int layer; /* distance from leaf */
struct idr_layer __rcu *ary[1<<IDR_BITS];
int count; /* When zero, we can release it */
union {
/* A zero bit means "space here" */
DECLARE_BITMAP(bitmap, IDR_SIZE);
struct rcu_head rcu_head;
};
};
#define IDR_FREE 0
struct idr {
struct idr_layer __rcu *hint; /* the last layer allocated from */
struct idr_layer __rcu *top;
int layers; /* only valid w/o concurrent changes */
int cur; /* current pos for cyclic allocation */
spinlock_t lock;
int id_free_cnt;
struct idr_layer *id_free;
};
/* Set the IDR flag and the IDR_FREE tag */
#define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
#define IDR_INIT(name) \
#define IDR_INIT \
{ \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER) \
}
#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
#define DEFINE_IDR(name) struct idr name = IDR_INIT
/**
* idr_get_cursor - Return the current position of the cyclic allocator
......@@ -62,9 +43,9 @@ struct idr {
* idr_alloc_cyclic() if it is free (otherwise the search will start from
* this position).
*/
static inline unsigned int idr_get_cursor(struct idr *idr)
static inline unsigned int idr_get_cursor(const struct idr *idr)
{
return READ_ONCE(idr->cur);
return READ_ONCE(idr->idr_next);
}
/**
......@@ -77,7 +58,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr)
*/
static inline void idr_set_cursor(struct idr *idr, unsigned int val)
{
WRITE_ONCE(idr->cur, val);
WRITE_ONCE(idr->idr_next, val);
}
/**
......@@ -97,22 +78,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
* period).
*/
/*
* This is what we export.
*/
void *idr_find_slowpath(struct idr *idp, int id);
void idr_preload(gfp_t gfp_mask);
int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask);
int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
int idr_for_each(struct idr *idp,
int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
int idr_for_each(const struct idr *,
int (*fn)(int id, void *p, void *data), void *data);
void *idr_get_next(struct idr *idp, int *nextid);
void *idr_replace(struct idr *idp, void *ptr, int id);
void idr_remove(struct idr *idp, int id);
void idr_destroy(struct idr *idp);
void idr_init(struct idr *idp);
bool idr_is_empty(struct idr *idp);
void *idr_get_next(struct idr *, int *nextid);
void *idr_replace(struct idr *, void *, int id);
void idr_destroy(struct idr *);
static inline void idr_remove(struct idr *idr, int id)
{
radix_tree_delete(&idr->idr_rt, id);
}
static inline void idr_init(struct idr *idr)
{
INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
idr->idr_next = 0;
}
static inline bool idr_is_empty(const struct idr *idr)
{
return radix_tree_empty(&idr->idr_rt) &&
radix_tree_tagged(&idr->idr_rt, IDR_FREE);
}
/**
* idr_preload_end - end preload section started with idr_preload()
......@@ -137,19 +127,14 @@ static inline void idr_preload_end(void)
* This function can be called under rcu_read_lock(), given that the leaf
* pointers lifetimes are correctly managed.
*/
static inline void *idr_find(struct idr *idr, int id)
static inline void *idr_find(const struct idr *idr, int id)
{
struct idr_layer *hint = rcu_dereference_raw(idr->hint);
if (hint && (id & ~IDR_MASK) == hint->prefix)
return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
return idr_find_slowpath(idr, id);
return radix_tree_lookup(&idr->idr_rt, id);
}
/**
* idr_for_each_entry - iterate over an idr's elements of a given type
* @idp: idr handle
* @idr: idr handle
* @entry: the type * to use as cursor
* @id: id entry's key
*
......@@ -157,57 +142,60 @@ static inline void *idr_find(struct idr *idr, int id)
* after normal terminatinon @entry is left with the value NULL. This
* is convenient for a "not found" value.
*/
#define idr_for_each_entry(idp, entry, id) \
for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
#define idr_for_each_entry(idr, entry, id) \
for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
/**
* idr_for_each_entry - continue iteration over an idr's elements of a given type
* @idp: idr handle
* idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
* @idr: idr handle
* @entry: the type * to use as cursor
* @id: id entry's key
*
* Continue to iterate over list of given type, continuing after
* the current position.
*/
#define idr_for_each_entry_continue(idp, entry, id) \
for ((entry) = idr_get_next((idp), &(id)); \
#define idr_for_each_entry_continue(idr, entry, id) \
for ((entry) = idr_get_next((idr), &(id)); \
entry; \
++id, (entry) = idr_get_next((idp), &(id)))
++id, (entry) = idr_get_next((idr), &(id)))
/*
* IDA - IDR based id allocator, use when translation from id to
* pointer isn't necessary.
*
* IDA_BITMAP_LONGS is calculated to be one less to accommodate
* ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
*/
#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */
#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1)
#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long))
#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8)
struct ida_bitmap {
long nr_busy;
unsigned long bitmap[IDA_BITMAP_LONGS];
};
struct ida {
struct idr idr;
struct radix_tree_root ida_rt;
struct ida_bitmap *free_bitmap;
};
#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
#define IDA_INIT { \
.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \
}
#define DEFINE_IDA(name) struct ida name = IDA_INIT
int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
void ida_remove(struct ida *ida, int id);
void ida_destroy(struct ida *ida);
void ida_init(struct ida *ida);
int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
gfp_t gfp_mask);
void ida_simple_remove(struct ida *ida, unsigned int id);
static inline void ida_init(struct ida *ida)
{
INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
ida->free_bitmap = NULL;
}
/**
* ida_get_new - allocate new ID
* @ida: idr handle
......@@ -220,11 +208,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
return ida_get_new_above(ida, 0, p_id);
}
static inline bool ida_is_empty(struct ida *ida)
static inline bool ida_is_empty(const struct ida *ida)
{
return idr_is_empty(&ida->idr);
return radix_tree_empty(&ida->ida_rt);
}
void __init idr_init_cache(void);
#endif /* __IDR_H__ */
......@@ -105,7 +105,10 @@ struct radix_tree_node {
unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
};
/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
#define ROOT_IS_IDR ((__force gfp_t)(1 << __GFP_BITS_SHIFT))
#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1)
struct radix_tree_root {
gfp_t gfp_mask;
struct radix_tree_node __rcu *rnode;
......@@ -358,10 +361,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
unsigned new_order);
int radix_tree_join(struct radix_tree_root *, unsigned long index,
unsigned new_order, void *);
void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
gfp_t, int end);
#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */
#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */
#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */
enum {
RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */
RADIX_TREE_ITER_TAGGED = 0x10, /* lookup tagged slots */
RADIX_TREE_ITER_CONTIG = 0x20, /* stop at first hole */
};
/**
* radix_tree_iter_init - initialize radix tree iterator
......@@ -402,6 +409,40 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
void **radix_tree_next_chunk(const struct radix_tree_root *,
struct radix_tree_iter *iter, unsigned flags);
/**
* radix_tree_iter_lookup - look up an index in the radix tree
* @root: radix tree root
* @iter: iterator state
* @index: key to look up
*
* If @index is present in the radix tree, this function returns the slot
* containing it and updates @iter to describe the entry. If @index is not
* present, it returns NULL.
*/
static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
struct radix_tree_iter *iter, unsigned long index)
{
radix_tree_iter_init(iter, index);
return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
}
/**
* radix_tree_iter_find - find a present entry
* @root: radix tree root
* @iter: iterator state
* @index: start location
*
* This function returns the slot containing the entry with the lowest index
* which is at least @index. If @index is larger than any present entry, this
* function returns NULL. The @iter is updated to describe the entry found.
*/
static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
struct radix_tree_iter *iter, unsigned long index)
{
radix_tree_iter_init(iter, index);
return radix_tree_next_chunk(root, iter, 0);
}
/**
* radix_tree_iter_retry - retry this chunk of the iteration
* @iter: iterator state
......
......@@ -553,7 +553,7 @@ asmlinkage __visible void __init start_kernel(void)
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
radix_tree_init();
/*
* Allow workqueue creation and work item queueing/cancelling
......@@ -568,7 +568,6 @@ asmlinkage __visible void __init start_kernel(void)
trace_init();
context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
......
This diff is collapsed.
This diff is collapsed.
......@@ -2,8 +2,8 @@
CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
LDFLAGS += -lpthread -lurcu
TARGETS = main
OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_bit.o \
regression1.o regression2.o regression3.o multiorder.o \
OFILES = main.o radix-tree.o idr.o linux.o test.o tag_check.o find_bit.o \
regression1.o regression2.o regression3.o multiorder.o idr-test.o \
iteration_check.o benchmark.o
ifdef BENCHMARK
......@@ -23,7 +23,11 @@ vpath %.c ../../lib
$(OFILES): *.h */*.h \
../../include/linux/*.h \
../../include/asm/*.h \
../../../include/linux/radix-tree.h
../../../include/linux/radix-tree.h \
../../../include/linux/idr.h
radix-tree.c: ../../../lib/radix-tree.c
sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
idr.c: ../../../lib/idr.c
sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
/*
* idr-test.c: Test the IDR API
* Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include "test.h"
#define DUMMY_PTR ((void *)0x12)
int item_idr_free(int id, void *p, void *data)
{
struct item *item = p;
assert(item->index == id);
free(p);
return 0;
}
void item_idr_remove(struct idr *idr, int id)
{
struct item *item = idr_find(idr, id);
assert(item->index == id);
idr_remove(idr, id);
free(item);
}
void idr_alloc_test(void)
{
unsigned long i;
DEFINE_IDR(idr);
assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
idr_remove(&idr, 0x3ffd);
idr_remove(&idr, 0);
for (i = 0x3ffe; i < 0x4003; i++) {
int id;
struct item *item;
if (i < 0x4000)
item = item_create(i, 0);
else
item = item_create(i - 0x3fff, 0);
id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
assert(id == item->index);
}
idr_for_each(&idr, item_idr_free, &idr);
idr_destroy(&idr);
}
void idr_replace_test(void)
{
DEFINE_IDR(idr);
idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
idr_replace(&idr, &idr, 10);
idr_destroy(&idr);
}
/*
* Unlike the radix tree, you can put a NULL pointer -- with care -- into
* the IDR. Some interfaces, like idr_find() do not distinguish between
* "present, value is NULL" and "not present", but that's exactly what some
* users want.
*/
void idr_null_test(void)
{
int i;
DEFINE_IDR(idr);
assert(idr_is_empty(&idr));
assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
assert(!idr_is_empty(&idr));
idr_remove(&idr, 0);
assert(idr_is_empty(&idr));
assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
assert(!idr_is_empty(&idr));
idr_destroy(&idr);
assert(idr_is_empty(&idr));
for (i = 0; i < 10; i++) {
assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
}
assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
idr_remove(&idr, 5);
assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
idr_remove(&idr, 5);
for (i = 0; i < 9; i++) {
idr_remove(&idr, i);
assert(!idr_is_empty(&idr));
}
idr_remove(&idr, 8);
assert(!idr_is_empty(&idr));
idr_remove(&idr, 9);
assert(idr_is_empty(&idr));
assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
idr_destroy(&idr);
assert(idr_is_empty(&idr));
for (i = 1; i < 10; i++) {
assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
}
idr_destroy(&idr);
assert(idr_is_empty(&idr));
}
void idr_nowait_test(void)
{
unsigned int i;
DEFINE_IDR(idr);
idr_preload(GFP_KERNEL);
for (i = 0; i < 3; i++) {
struct item *item = item_create(i, 0);
assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
}
idr_preload_end();
idr_for_each(&idr, item_idr_free, &idr);
idr_destroy(&idr);
}
void idr_checks(void)
{
unsigned long i;
DEFINE_IDR(idr);
for (i = 0; i < 10000; i++) {
struct item *item = item_create(i, 0);
assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
}
assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
for (i = 0; i < 5000; i++)
item_idr_remove(&idr, i);
idr_remove(&idr, 3);
idr_for_each(&idr, item_idr_free, &idr);
idr_destroy(&idr);
assert(idr_is_empty(&idr));
idr_remove(&idr, 3);
idr_remove(&idr, 0);
for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
struct item *item = item_create(i, 0);
assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
}
assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
idr_for_each(&idr, item_idr_free, &idr);
idr_destroy(&idr);
idr_destroy(&idr);
assert(idr_is_empty(&idr));
for (i = 1; i < 10000; i++) {
struct item *item = item_create(i, 0);
assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
}
idr_for_each(&idr, item_idr_free, &idr);
idr_destroy(&idr);
idr_replace_test();
idr_alloc_test();
idr_null_test();
idr_nowait_test();
}
/*
* Check that we get the correct error when we run out of memory doing
* allocations. To ensure we run out of memory, just "forget" to preload.
* The first test is for not having a bitmap available, and the second test
* is for not being able to allocate a level of the radix tree.
*/
void ida_check_nomem(void)
{
DEFINE_IDA(ida);
int id, err;
err = ida_get_new(&ida, &id);
assert(err == -EAGAIN);
err = ida_get_new_above(&ida, 1UL << 30, &id);
assert(err == -EAGAIN);
}
/*
* Check what happens when we fill a leaf and then delete it. This may
* discover mishandling of IDR_FREE.
*/
void ida_check_leaf(void)
{
DEFINE_IDA(ida);
int id;
unsigned long i;
for (i = 0; i < IDA_BITMAP_BITS; i++) {
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new(&ida, &id));
assert(id == i);
}
ida_destroy(&ida);
assert(ida_is_empty(&ida));
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new(&ida, &id));
assert(id == 0);
ida_destroy(&ida);
assert(ida_is_empty(&ida));
}
/*
* Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
* Allocating up to 2^31-1 should succeed, and then allocating the next one
* should fail.
*/
void ida_check_max(void)
{
DEFINE_IDA(ida);
int id, err;
unsigned long i, j;
for (j = 1; j < 65537; j *= 2) {
unsigned long base = (1UL << 31) - j;
for (i = 0; i < j; i++) {
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new_above(&ida, base, &id));
assert(id == base + i);
}
assert(ida_pre_get(&ida, GFP_KERNEL));
err = ida_get_new_above(&ida, base, &id);
assert(err == -ENOSPC);
ida_destroy(&ida);
assert(ida_is_empty(&ida));
rcu_barrier();
}
}
void ida_checks(void)
{
DEFINE_IDA(ida);
int id;
unsigned long i;
radix_tree_cpu_dead(1);
ida_check_nomem();
for (i = 0; i < 10000; i++) {
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new(&ida, &id));
assert(id == i);
}
ida_remove(&ida, 20);
ida_remove(&ida, 21);
for (i = 0; i < 3; i++) {
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new(&ida, &id));
if (i == 2)
assert(id == 10000);
}
for (i = 0; i < 5000; i++)
ida_remove(&ida, i);
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new_above(&ida, 5000, &id));
assert(id == 10001);
ida_destroy(&ida);
assert(ida_is_empty(&ida));
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new_above(&ida, 1, &id));
assert(id == 1);
ida_remove(&ida, id);
assert(ida_is_empty(&ida));
ida_destroy(&ida);
assert(ida_is_empty(&ida));
assert(ida_pre_get(&ida, GFP_KERNEL));
assert(!ida_get_new_above(&ida, 1, &id));