Commit 2b76da95 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.14' of git://git.infradead.org/nvme into for-4.14/block-postmerge

Pull NVMe changes from Christoph:

"Below is the current set of NVMe updates for Linux 4.14, now against
 your postmerge branch, and with three more patches.

 The biggest bit comes from Sagi and refactors the RDMA driver to
 prepare for more code sharing in the setup and teardown path.  But we
 have various features and bug fixes from a lot of people as well."
parents cd996fb4 1d5df6af
......@@ -76,6 +76,11 @@ static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
static __le32 nvme_get_log_dw10(u8 lid, size_t size)
{
return cpu_to_le32((((size / 4) - 1) << 16) | lid);
}
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
......@@ -108,7 +113,16 @@ static blk_status_t nvme_error_status(struct request *req)
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
case NVME_SC_ACCESS_DENIED:
case NVME_SC_READ_ONLY:
return BLK_STS_MEDIUM;
case NVME_SC_GUARD_CHECK:
case NVME_SC_APPTAG_CHECK:
case NVME_SC_REFTAG_CHECK:
case NVME_SC_INVALID_PI:
return BLK_STS_PROTECTION;
case NVME_SC_RESERVATION_CONFLICT:
return BLK_STS_NEXUS;
default:
return BLK_STS_IOERR;
}
......@@ -162,9 +176,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state)
{
enum nvme_ctrl_state old_state;
unsigned long flags;
bool changed = false;
spin_lock_irq(&ctrl->lock);
spin_lock_irqsave(&ctrl->lock, flags);
old_state = ctrl->state;
switch (new_state) {
......@@ -225,7 +240,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
if (changed)
ctrl->state = new_state;
spin_unlock_irq(&ctrl->lock);
spin_unlock_irqrestore(&ctrl->lock, flags);
return changed;
}
......@@ -307,7 +322,7 @@ static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
memset(&c, 0, sizeof(c));
c.directive.opcode = nvme_admin_directive_send;
c.directive.nsid = cpu_to_le32(0xffffffff);
c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
c.directive.dtype = NVME_DIR_IDENTIFY;
c.directive.tdtype = NVME_DIR_STREAMS;
......@@ -357,7 +372,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
if (ret)
return ret;
ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
return ret;
......@@ -768,7 +783,8 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
u8 *eui64, u8 *nguid, uuid_t *uuid)
{
struct nvme_command c = { };
int status;
......@@ -784,7 +800,7 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
if (!data)
return -ENOMEM;
status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
NVME_IDENTIFY_DATA_SIZE);
if (status)
goto free_data;
......@@ -798,33 +814,33 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
switch (cur->nidt) {
case NVME_NIDT_EUI64:
if (cur->nidl != NVME_NIDT_EUI64_LEN) {
dev_warn(ns->ctrl->device,
dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_EUI64_LEN;
memcpy(ns->eui, data + pos + sizeof(*cur), len);
memcpy(eui64, data + pos + sizeof(*cur), len);
break;
case NVME_NIDT_NGUID:
if (cur->nidl != NVME_NIDT_NGUID_LEN) {
dev_warn(ns->ctrl->device,
dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_NGUID_LEN;
memcpy(ns->nguid, data + pos + sizeof(*cur), len);
memcpy(nguid, data + pos + sizeof(*cur), len);
break;
case NVME_NIDT_UUID:
if (cur->nidl != NVME_NIDT_UUID_LEN) {
dev_warn(ns->ctrl->device,
dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_UUID_LEN;
uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
uuid_copy(uuid, data + pos + sizeof(*cur));
break;
default:
/* Skip unnkown types */
......@@ -849,9 +865,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id)
static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
unsigned nsid)
{
struct nvme_id_ns *id;
struct nvme_command c = { };
int error;
......@@ -860,15 +877,18 @@ static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
return -ENOMEM;
id = kmalloc(sizeof(*id), GFP_KERNEL);
if (!id)
return NULL;
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ns));
if (error)
kfree(*id);
return error;
error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
if (error) {
dev_warn(ctrl->device, "Identify namespace failed\n");
kfree(id);
return NULL;
}
return id;
}
static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
......@@ -1159,32 +1179,21 @@ static void nvme_config_discard(struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
}
static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid)
{
if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) {
dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__);
return -ENODEV;
}
if ((*id)->ncap == 0) {
kfree(*id);
return -ENODEV;
}
if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
if (ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(eui64, id->eui64, sizeof(id->eui64));
if (ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(nguid, id->nguid, sizeof(id->nguid));
if (ctrl->vs >= NVME_VS(1, 3, 0)) {
/* Don't treat error as fatal we potentially
* already have a NGUID or EUI-64
*/
if (nvme_identify_ns_descs(ns, ns->ns_id))
dev_warn(ns->ctrl->device,
if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid))
dev_warn(ctrl->device,
"%s: Identify Descriptors failed\n", __func__);
}
return 0;
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
......@@ -1225,22 +1234,38 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_id_ns *id = NULL;
int ret;
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_id_ns *id;
u8 eui64[8] = { 0 }, nguid[16] = { 0 };
uuid_t uuid = uuid_null;
int ret = 0;
if (test_bit(NVME_NS_DEAD, &ns->flags)) {
set_capacity(disk, 0);
return -ENODEV;
}
ret = nvme_revalidate_ns(ns, &id);
if (ret)
return ret;
id = nvme_identify_ns(ctrl, ns->ns_id);
if (!id)
return -ENODEV;
__nvme_revalidate_disk(disk, id);
kfree(id);
if (id->ncap == 0) {
ret = -ENODEV;
goto out;
}
return 0;
nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
if (!uuid_equal(&ns->uuid, &uuid) ||
memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
memcmp(&ns->eui, &eui64, sizeof(ns->eui))) {
dev_err(ctrl->device,
"identifiers changed for nsid %d\n", ns->ns_id);
ret = -ENODEV;
}
out:
kfree(id);
return ret;
}
static char nvme_pr_type(enum pr_type type)
......@@ -1440,7 +1465,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
ctrl->ctrl_config = NVME_CC_CSS_NVM;
ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;
......@@ -1453,7 +1478,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
unsigned long timeout = jiffies + (shutdown_timeout * HZ);
unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
u32 csts;
int ret;
......@@ -1502,6 +1527,23 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_write_cache(q, vwc, vwc);
}
static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
{
__le64 ts;
int ret;
if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP))
return 0;
ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts),
NULL);
if (ret)
dev_warn_once(ctrl->device,
"could not set timestamp (%d)\n", ret);
return ret;
}
static int nvme_configure_apst(struct nvme_ctrl *ctrl)
{
/*
......@@ -1804,6 +1846,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
if (id->rtd3e) {
/* us -> s */
u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;
ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
shutdown_timeout, 60);
if (ctrl->shutdown_timeout != shutdown_timeout)
dev_warn(ctrl->device,
"Shutdown timeout set to %u seconds\n",
ctrl->shutdown_timeout);
} else
ctrl->shutdown_timeout = shutdown_timeout;
ctrl->npss = id->npss;
ctrl->apsta = id->apsta;
prev_apst_enabled = ctrl->apst_enabled;
......@@ -1856,6 +1912,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
ret = nvme_configure_timestamp(ctrl);
if (ret < 0)
return ret;
ret = nvme_configure_directives(ctrl);
if (ret < 0)
......@@ -2311,9 +2371,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
if (nvme_revalidate_ns(ns, &id))
id = nvme_identify_ns(ctrl, nsid);
if (!id)
goto out_free_queue;
if (id->ncap == 0)
goto out_free_id;
nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
......@@ -2534,6 +2600,71 @@ static void nvme_async_event_work(struct work_struct *work)
spin_unlock_irq(&ctrl->lock);
}
static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
{
u32 csts;
if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
return false;
if (csts == ~0)
return false;
return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP));
}
static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
{
struct nvme_command c = { };
struct nvme_fw_slot_info_log *log;
log = kmalloc(sizeof(*log), GFP_KERNEL);
if (!log)
return;
c.common.opcode = nvme_admin_get_log_page;
c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
dev_warn(ctrl->device,
"Get FW SLOT INFO log error\n");
kfree(log);
}
static void nvme_fw_act_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work,
struct nvme_ctrl, fw_act_work);
unsigned long fw_act_timeout;
if (ctrl->mtfa)
fw_act_timeout = jiffies +
msecs_to_jiffies(ctrl->mtfa * 100);
else
fw_act_timeout = jiffies +
msecs_to_jiffies(admin_timeout * 1000);
nvme_stop_queues(ctrl);
while (nvme_ctrl_pp_status(ctrl)) {
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
nvme_reset_ctrl(ctrl);
break;
}
msleep(100);
}
if (ctrl->state != NVME_CTRL_LIVE)
return;
nvme_start_queues(ctrl);
/* read FW slot informationi to clear the AER*/
nvme_get_fw_slot_info(ctrl);
}
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
union nvme_result *res)
{
......@@ -2560,6 +2691,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
dev_info(ctrl->device, "rescanning\n");
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
schedule_work(&ctrl->fw_act_work);
break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
......@@ -2607,6 +2741,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
cancel_work_sync(&ctrl->fw_act_work);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
......@@ -2670,6 +2805,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
ret = nvme_set_instance(ctrl);
if (ret)
......
......@@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
if (uuid_parse(p, &hostid)) {
pr_err("Invalid hostid %s\n", p);
ret = -EINVAL;
goto out;
}
......
......@@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
static void
nvme_fc_free_lport(struct kref *ref)
{
struct nvme_fc_lport *lport =
container_of(ref, struct nvme_fc_lport, ref);
unsigned long flags;
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&lport->endp_list));
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->localport_delete(&lport->localport);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
kfree(lport);
}
static void
nvme_fc_lport_put(struct nvme_fc_lport *lport)
{
kref_put(&lport->ref, nvme_fc_free_lport);
}
static int
nvme_fc_lport_get(struct nvme_fc_lport *lport)
{
return kref_get_unless_zero(&lport->ref);
}
static struct nvme_fc_lport *
nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
{
struct nvme_fc_lport *lport;
unsigned long flags;
spin_lock_irqsave(&nvme_fc_lock, flags);
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
if (lport->localport.node_name != pinfo->node_name ||
lport->localport.port_name != pinfo->port_name)
continue;
if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
lport = ERR_PTR(-EEXIST);
goto out_done;
}
if (!nvme_fc_lport_get(lport)) {
/*
* fails if ref cnt already 0. If so,
* act as if lport already deleted
*/
lport = NULL;
goto out_done;
}
/* resume the lport */
lport->localport.port_role = pinfo->port_role;
lport->localport.port_id = pinfo->port_id;
lport->localport.port_state = FC_OBJSTATE_ONLINE;
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return lport;
}
lport = NULL;
out_done:
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return lport;
}
/**
* nvme_fc_register_localport - transport entry point called by an
......@@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
goto out_reghost_failed;
}
/*
* look to see if there is already a localport that had been
* deregistered and in the process of waiting for all the
* references to fully be removed. If the references haven't
* expired, we can simply re-enable the localport. Remoteports
* and controller reconnections should resume naturally.
*/
newrec = nvme_fc_attach_to_unreg_lport(pinfo);
/* found an lport, but something about its state is bad */
if (IS_ERR(newrec)) {
ret = PTR_ERR(newrec);
goto out_reghost_failed;
/* found existing lport, which was resumed */
} else if (newrec) {
*portptr = &newrec->localport;
return 0;
}
/* nothing found - allocate a new localport struct */
newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz),
GFP_KERNEL);
if (!newrec) {
......@@ -310,44 +416,6 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
}
EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
static void
nvme_fc_free_lport(struct kref *ref)
{
struct nvme_fc_lport *lport =
container_of(ref, struct nvme_fc_lport, ref);
unsigned long flags;
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&lport->endp_list));
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->localport_delete(&lport->localport);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
kfree(lport);
}
static void
nvme_fc_lport_put(struct nvme_fc_lport *lport)
{
kref_put(&lport->ref, nvme_fc_free_lport);
}
static int
nvme_fc_lport_get(struct nvme_fc_lport *lport)
{
return kref_get_unless_zero(&lport->ref);
}
/**
* nvme_fc_unregister_localport - transport entry point called by an
* LLDD to deregister/remove a previously
......@@ -2731,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (ret)
goto out_free_queues;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
......
......@@ -125,6 +125,7 @@ struct nvme_ctrl {
struct kref kref;
int instance;
struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces;
struct mutex namespaces_mutex;
struct device *device; /* char device */
......@@ -142,6 +143,7 @@ struct nvme_ctrl {
u16 cntlid;
u32 ctrl_config;
u16 mtfa;
u32 queue_count;
u64 cap;
......@@ -160,6 +162,7 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
unsigned int shutdown_timeout;
unsigned int kato;
bool subsystem;
unsigned long quirks;
......@@ -167,6 +170,7 @@ struct nvme_ctrl {
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
struct work_struct fw_act_work<