Commit d080827f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "The bulk of this has appeared in -next and independently received a
  build success notification from the kbuild robot.  The 'for-4.5/block-
  dax' topic branch was rebased over the weekend to drop the "block
  device end-of-life" rework that Al would like to see re-implemented
  with a notifier, and to address bug reports against the badblocks
  integration.

  There is pending feedback against "libnvdimm: Add a poison list and
  export badblocks" received last week.  Linda identified some localized
  fixups that we will handle incrementally.

  Summary:

   - Media error handling: The 'badblocks' implementation that
     originated in md-raid is up-levelled to a generic capability of a
     block device.  This initial implementation is limited to being
     consulted in the pmem block-i/o path.  Later, 'badblocks' will be
     consulted when creating dax mappings.

   - Raw block device dax: For virtualization and other cases that want
     large contiguous mappings of persistent memory, add the capability
     to dax-mmap a block device directly.

   - Increased /dev/mem restrictions: Add an option to treat all
     io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access
     while a driver is actively using an address range.  This behavior
     is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be
     overridden by the existing "iomem=relaxed" kernel command line
     option.

   - Miscellaneous fixes include a 'pfn'-device huge page alignment fix,
     block device shutdown crash fix, and other small libnvdimm fixes"

* tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits)
  block: kill disk_{check|set|clear|alloc}_badblocks
  libnvdimm, pmem: nvdimm_read_bytes() badblocks support
  pmem, dax: disable dax in the presence of bad blocks
  pmem: fail io-requests to known bad blocks
  libnvdimm: convert to statically allocated badblocks
  libnvdimm: don't fail init for full badblocks list
  block, badblocks: introduce devm_init_badblocks
  block: clarify badblocks lifetime
  badblocks: rename badblocks_free to badblocks_exit
  libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h
  libnvdimm: Add a poison list and export badblocks
  nfit_test: Enable DSMs for all test NFITs
  md: convert to use the generic badblocks code
  block: Add badblock management for gendisks
  badblocks: Add core badblock management code
  block: fix del_gendisk() vs blkdev_ioctl crash
  block: enable dax for raw block devices
  block: introduce bdev_file_inode()
  restrict /dev/mem to idle io memory ranges
  arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug
  ...
parents cbd88cd4 8b63b6bf
......@@ -2,6 +2,7 @@ config ARM
bool
default y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAVE_CUSTOM_GPIO_H
......
......@@ -15,20 +15,6 @@ config ARM_PTDUMP
kernel.
If in doubt, say "N"
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU
---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel.
If this option is switched on, the /dev/mem file only allows
userspace access to memory mapped peripherals.
If in doubt, say Y.
# RMK wants arm kernels compiled with frame pointers or stack unwinding.
# If you know what you are doing and are willing to live without stack
# traces, you can get a slightly smaller kernel by setting this option to
......
......@@ -3,6 +3,7 @@ config ARM64
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
......
......@@ -14,20 +14,6 @@ config ARM64_PTDUMP
kernel.
If in doubt, say "N"
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU
help
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel.
If this option is switched on, the /dev/mem file only allows
userspace access to memory mapped peripherals.
If in doubt, say Y.
config PID_IN_CONTEXTIDR
bool "Write the current PID to the CONTEXTIDR register"
help
......
......@@ -10,6 +10,7 @@ config FRV
select HAVE_DEBUG_BUGVERBOSE
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CPU_DEVICES
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_WANT_IPC_PARSE_VERSION
select OLD_SIGSUSPEND3
select OLD_SIGACTION
......
......@@ -13,6 +13,7 @@ config M32R
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_ATOMIC64
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_USES_GETTIMEOFFSET
select MODULES_USE_ELF_RELA
select HAVE_DEBUG_STACKOVERFLOW
......
......@@ -159,6 +159,7 @@ config PPC
select EDAC_SUPPORT
select EDAC_ATOMIC_SCRUB
select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
config GENERIC_CSUM
......
......@@ -335,18 +335,6 @@ config PPC_EARLY_DEBUG_CPM_ADDR
platform probing is done, all platforms selected must
share the same address.
config STRICT_DEVMEM
def_bool y
prompt "Filter access to /dev/mem"
help
This option restricts access to /dev/mem. If this option is
disabled, you allow userspace access to all memory, including
kernel and userspace memory. Accidental memory access is likely
to be disastrous.
Memory access is required for experts who want to debug the kernel.
If you are unsure, say Y.
config FAIL_IOMMU
bool "Fault-injection capability for IOMMU"
depends on FAULT_INJECTION
......
......@@ -66,6 +66,7 @@ config S390
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SG_CHAIN
......
......@@ -5,18 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug"
config STRICT_DEVMEM
def_bool y
prompt "Filter access to /dev/mem"
---help---
This option restricts access to /dev/mem. If this option is
disabled, you allow userspace access to all memory, including
kernel and userspace memory. Accidental memory access is likely
to be disastrous.
Memory access is required for experts who want to debug the kernel.
If you are unsure, say Y.
config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
......
......@@ -19,6 +19,7 @@ config TILE
select VIRT_TO_BUS
select SYS_HYPERVISOR
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
......@@ -116,9 +117,6 @@ config ARCH_DISCONTIGMEM_DEFAULT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config STRICT_DEVMEM
def_bool y
# SMP is required for Tilera Linux.
config SMP
def_bool y
......
config UNICORE32
def_bool y
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select HAVE_MEMBLOCK
......
......@@ -2,20 +2,6 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU
---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel.
If this option is switched on, the /dev/mem file only allows
userspace access to memory mapped peripherals.
If in doubt, say Y.
config EARLY_PRINTK
def_bool DEBUG_OCD
help
......
......@@ -24,6 +24,7 @@ config X86
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL
......
......@@ -5,23 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug"
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel. Note that with PAT support
enabled, even in this case there are restrictions on /dev/mem
use due to the cache aliasing requirements.
If this option is switched on, the /dev/mem file only allows
userspace access to PCI space and the BIOS code and data regions.
This is sufficient for dosemu and X and all common users of
/dev/mem.
If in doubt, say Y.
config X86_VERBOSE_BOOTUP
bool "Enable verbose x86 bootup info messages"
default y
......
......@@ -8,7 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
partitions/
badblocks.o partitions/
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
......
This diff is collapsed.
......@@ -20,6 +20,7 @@
#include <linux/idr.h>
#include <linux/log2.h>
#include <linux/pm_runtime.h>
#include <linux/badblocks.h>
#include "blk.h"
......@@ -664,7 +665,6 @@ void del_gendisk(struct gendisk *disk)
kobject_put(disk->part0.holder_dir);
kobject_put(disk->slave_dir);
disk->driverfs_dev = NULL;
if (!sysfs_deprecated)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
......@@ -672,6 +672,31 @@ void del_gendisk(struct gendisk *disk)
}
EXPORT_SYMBOL(del_gendisk);
/* sysfs access to bad-blocks list. */
static ssize_t disk_badblocks_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct gendisk *disk = dev_to_disk(dev);
if (!disk->bb)
return sprintf(page, "\n");
return badblocks_show(disk->bb, page, 0);
}
static ssize_t disk_badblocks_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t len)
{
struct gendisk *disk = dev_to_disk(dev);
if (!disk->bb)
return -ENXIO;
return badblocks_store(disk->bb, page, len, 0);
}
/**
* get_gendisk - get partitioning information for a given device
* @devt: device to get partitioning information for
......@@ -990,6 +1015,8 @@ static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
disk_badblocks_store);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
......@@ -1011,6 +1038,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_capability.attr,
&dev_attr_stat.attr,
&dev_attr_inflight.attr,
&dev_attr_badblocks.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
......
......@@ -4,6 +4,7 @@
#include <linux/gfp.h>
#include <linux/blkpg.h>
#include <linux/hdreg.h>
#include <linux/badblocks.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
#include <linux/blktrace_api.h>
......@@ -406,6 +407,71 @@ static inline int is_unrecognized_ioctl(int ret)
ret == -ENOIOCTLCMD;
}
#ifdef CONFIG_FS_DAX
bool blkdev_dax_capable(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
if (!disk->fops->direct_access)
return false;
/*
* If the partition is not aligned on a page boundary, we can't
* do dax I/O to it.
*/
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
return false;
/*
* If the device has known bad blocks, force all I/O through the
* driver / page cache.
*
* TODO: support finer grained dax error handling
*/
if (disk->bb && disk->bb->count)
return false;
return true;
}
static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
{
unsigned long arg;
int rc = 0;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (get_user(arg, (int __user *)(argp)))
return -EFAULT;
arg = !!arg;
if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
return 0;
if (arg)
arg = S_DAX;
if (arg && !blkdev_dax_capable(bdev))
return -ENOTTY;
mutex_lock(&bdev->bd_inode->i_mutex);
if (bdev->bd_map_count == 0)
inode_set_flags(bdev->bd_inode, arg, S_DAX);
else
rc = -EBUSY;
mutex_unlock(&bdev->bd_inode->i_mutex);
return rc;
}
#else
static int blkdev_daxset(struct block_device *bdev, int arg)
{
if (arg)
return -ENOTTY;
return 0;
}
#endif
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg)
{
......@@ -568,6 +634,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
case BLKDAXSET:
return blkdev_daxset(bdev, arg);
case BLKDAXGET:
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
break;
case IOC_PR_REGISTER:
return blkdev_pr_register(bdev, argp);
case IOC_PR_RESERVE:
......
......@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ndctl.h>
#include <linux/delay.h>
#include <linux/list.h>
#include <linux/acpi.h>
#include <linux/sort.h>
......@@ -1473,6 +1474,201 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
/* devm will free nfit_blk */
}
static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc,
struct nd_cmd_ars_cap *cmd, u64 addr, u64 length)
{
cmd->address = addr;
cmd->length = length;
return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
sizeof(*cmd));
}
static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
struct nd_cmd_ars_start *cmd, u64 addr, u64 length)
{
int rc;
cmd->address = addr;
cmd->length = length;
cmd->type = ND_ARS_PERSISTENT;
while (1) {
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd,
sizeof(*cmd));
if (rc)
return rc;
switch (cmd->status) {
case 0:
return 0;
case 1:
/* ARS unsupported, but we should never get here */
return 0;
case 2:
return -EINVAL;
case 3:
/* ARS is in progress */
msleep(1000);
break;
default:
return -ENXIO;
}
}
}
static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
struct nd_cmd_ars_status *cmd)
{
int rc;
while (1) {
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
sizeof(*cmd));
if (rc || cmd->status & 0xffff)
return -ENXIO;
/* Check extended status (Upper two bytes) */
switch (cmd->status >> 16) {
case 0:
return 0;
case 1:
/* ARS is in progress */
msleep(1000);
break;
case 2:
/* No ARS performed for the current boot */
return 0;
default:
return -ENXIO;
}
}
}
static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
struct nd_cmd_ars_status *ars_status, u64 start)
{
int rc;
u32 i;
/*
* The address field returned by ars_status should be either
* less than or equal to the address we last started ARS for.
* The (start, length) returned by ars_status should also have
* non-zero overlap with the range we started ARS for.
* If this is not the case, bail.
*/
if (ars_status->address > start ||
(ars_status->address + ars_status->length < start))
return -ENXIO;
for (i = 0; i < ars_status->num_records; i++) {
rc = nvdimm_bus_add_poison(nvdimm_bus,
ars_status->records[i].err_address,
ars_status->records[i].length);
if (rc)
return rc;
}
return 0;
}
static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
struct nd_region_desc *ndr_desc)
{
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
struct nd_cmd_ars_status *ars_status = NULL;
struct nd_cmd_ars_start *ars_start = NULL;
struct nd_cmd_ars_cap *ars_cap = NULL;
u64 start, len, cur, remaining;
int rc;
ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
if (!ars_cap)
return -ENOMEM;
start = ndr_desc->res->start;
len = ndr_desc->res->end - ndr_desc->res->start + 1;
rc = ars_get_cap(nd_desc, ars_cap, start, len);
if (rc)
goto out;
/*
* If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
* extended status is not set, skip this but continue initialization
*/
if ((ars_cap->status & 0xffff) ||
!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
dev_warn(acpi_desc->dev,
"ARS unsupported (status: 0x%x), won't create an error list\n",
ars_cap->status);
goto out;
}
/*
* Check if a full-range ARS has been run. If so, use those results
* without having to start a new ARS.
*/
ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
GFP_KERNEL);
if (!ars_status) {
rc = -ENOMEM;
goto out;
}
rc = ars_get_status(nd_desc, ars_status);
if (rc)
goto out;
if (ars_status->address <= start &&
(ars_status->address + ars_status->length >= start + len)) {
rc = ars_status_process_records(nvdimm_bus, ars_status, start);
goto out;
}
/*
* ARS_STATUS can overflow if the number of poison entries found is
* greater than the maximum buffer size (ars_cap->max_ars_out)
* To detect overflow, check if the length field of ars_status
* is less than the length we supplied. If so, process the
* error entries we got, adjust the start point, and start again
*/
ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL);
if (!ars_start)
return -ENOMEM;
cur = start;
remaining = len;
do {
u64 done, end;
rc = ars_do_start(nd_desc, ars_start, cur, remaining);
if (rc)
goto out;
rc = ars_get_status(nd_desc, ars_status);
if (rc)
goto out;
rc = ars_status_process_records(nvdimm_bus, ars_status, cur);
if (rc)
goto out;
end = min(cur + remaining,
ars_status->address + ars_status->length);
done = end - cur;
cur += done;
remaining -= done;
} while (remaining);
out:
kfree(ars_cap);
kfree(ars_start);
kfree(ars_status);
return rc;
}
static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
struct acpi_nfit_memory_map *memdev,
......@@ -1585,6 +1781,13 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
nvdimm_bus = acpi_desc->nvdimm_bus;
if (nfit_spa_type(spa) == NFIT_SPA_PM) {
rc = acpi_nfit_find_poison(acpi_desc, ndr_desc);
if (rc) {
dev_err(acpi_desc->dev,
"error while performing ARS to find poison: %d\n",
rc);
return rc;
}
if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
return -ENOMEM;
} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
......
This diff is collapsed.
......@@ -17,6 +17,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/badblocks.h>
#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/mm.h>
......@@ -28,13 +29,6 @@
#define MaxSector (~(sector_t)0)
/* Bad block numbers are stored sorted in a single page.
* 64bits is used for each block or extent.
* 54 bits are sector number, 9 bits are extent size,
* 1 bit is an 'acknowledged' flag.
*/
#define MD_MAX_BADBLOCKS (PAGE_SIZE/8)
/*
* MD's 'extended' device
*/
......@@ -117,22 +111,7 @@ struct md_rdev {
struct kernfs_node *sysfs_state; /* handle for 'state'
* sysfs entry */
struct badblocks {
int count; /* count of bad blocks */
int unacked_exist; /* there probably are unacknowledged
* bad blocks. This is only cleared
* when a read discovers none
*/
int shift; /* shift from sectors to block size
* a -ve shift means badblocks are
* disabled.*/
u64 *page; /* badblock list */
int changed;
seqlock_t lock;
sector_t sector;
sector_t size; /* in sectors */
} badblocks;
struct badblocks badblocks;
};
enum flag_bits {
Faulty, /* device is known to have a fault */
......@@ -185,22 +164,11 @@ enum flag_bits {
*/
};
#define BB_LEN_MASK (0x00000000000001FFULL)
#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL)
#define BB_ACK_MASK (0x8000000000000000ULL)
#define BB_MAX_LEN 512
#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9)
#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1)
#define BB_ACK(x) (!!((x) & BB_ACK_MASK))
#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
sector_t *first_bad, int *bad_sectors);
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
sector_t *first_bad, int *bad_sectors)
{
if (unlikely(rdev->badblocks.count)) {
int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s,
int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
sectors,
first_bad, bad_sectors);
if (rv)
......@@ -213,8 +181,6 @@ extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern void md_ack_all_badblocks(struct badblocks *bb);
struct md_cluster_info;
struct mddev {
......
......@@ -11,6 +11,7 @@
* General Public License for more details.
*/
#include <linux/libnvdimm.h>
#include <linux/badblocks.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/blkdev.h>
......@@ -325,6 +326,7 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
if (!nvdimm_bus)
return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->poison_list);
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex);
......@@ -359,6 +361,172 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
}
EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
static void set_badblock(struct badblocks *bb, sector_t s, int num)
{
dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
(u64) s * 512, (u64) num * 512);
/* this isn't an error as the hardware will still throw an exception */
if (badblocks_set(bb, s, num, 1))
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
__func__, (u64) s);
}
/**
* __add_badblock_range() - Convert a physical address range to bad sectors
* @bb: badblocks instance to populate
* @ns_offset: namespace offset where the error range begins (in bytes)
* @len: number of bytes of poison to be added
*
* This assumes that the range provided with (ns_offset, len) is within
* the bounds of physical addresses for this namespace, i.e. lies in the
* interval [ns_start, ns_start + ns_size)
*/
static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
{
const unsigned int sector_size = 512;
sector_t start_sector;
u64 num_sectors;
u32 rem;
start_sector = div_u64(ns_offset, sector_size);
num_sectors = div_u64_rem(len, sector_size, &rem);
if (rem)
num_sectors++;
if (unlikely(num_sectors > (u64)INT_MAX)) {
u64 remaining = num_sectors;
sector_t s = start_sector;