Commit 8aa63829 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.12' of git://git.infradead.org/nvme into for-linus

Christoph writes:

"A couple of fixes for the next rc on the nvme front. Various FC fixes
from James, controller removal fixes from Ming (including a block layer
patch), a APST related device quirk from Andy, a RDMA fix for small
queue depth device from Marta, as well as fixes for the lack of
metadata support in non-PCIe drivers and the printk logging format from
me."
parents a8ecdd71 50af47d0
...@@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, ...@@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
} }
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
void blk_mq_abort_requeue_list(struct request_queue *q)
{
unsigned long flags;
LIST_HEAD(rq_list);
spin_lock_irqsave(&q->requeue_lock, flags);
list_splice_init(&q->requeue_list, &rq_list);
spin_unlock_irqrestore(&q->requeue_lock, flags);
while (!list_empty(&rq_list)) {
struct request *rq;
rq = list_first_entry(&rq_list, struct request, queuelist);
list_del_init(&rq->queuelist);
blk_mq_end_request(rq, -EIO);
}
}
EXPORT_SYMBOL(blk_mq_abort_requeue_list);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{ {
if (tag < tags->nr_tags) { if (tag < tags->nr_tags) {
......
...@@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) ...@@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
} }
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
u16 bs)
{
struct nvme_ns *ns = disk->private_data;
u16 old_ms = ns->ms;
u8 pi_type = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
pi_type = id->dps & NVME_NS_DPS_PI_MASK;
if (blk_get_integrity(disk) &&
(ns->pi_type != pi_type || ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
(ns->ms && ns->ext)))
blk_integrity_unregister(disk);
ns->pi_type = pi_type;
}
static void nvme_init_integrity(struct nvme_ns *ns) static void nvme_init_integrity(struct nvme_ns *ns)
{ {
struct blk_integrity integrity; struct blk_integrity integrity;
...@@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns) ...@@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns)
blk_queue_max_integrity_segments(ns->queue, 1); blk_queue_max_integrity_segments(ns->queue, 1);
} }
#else #else
static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
u16 bs)
{
}
static void nvme_init_integrity(struct nvme_ns *ns) static void nvme_init_integrity(struct nvme_ns *ns)
{ {
} }
...@@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) ...@@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{ {
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
u8 lbaf, pi_type; u16 bs;
u16 old_ms;
unsigned short bs;
old_ms = ns->ms;
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* /*
* If identify namespace failed, use default 512 byte block size so * If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity. * block layer can use before failing read/write for 0 capacity.
*/ */
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
if (ns->lba_shift == 0) if (ns->lba_shift == 0)
ns->lba_shift = 9; ns->lba_shift = 9;
bs = 1 << ns->lba_shift; bs = 1 << ns->lba_shift;
/* XXX: PI implementation requires metadata equal t10 pi tuple size */
pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
id->dps & NVME_NS_DPS_PI_MASK : 0;
blk_mq_freeze_queue(disk->queue); blk_mq_freeze_queue(disk->queue);
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
(ns->ms && ns->ext)))
blk_integrity_unregister(disk);
ns->pi_type = pi_type; if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
nvme_prep_integrity(disk, id, bs);
blk_queue_logical_block_size(ns->queue, bs); blk_queue_logical_block_size(ns->queue, bs);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext) if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns); nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
...@@ -1605,7 +1617,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -1605,7 +1617,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
} }
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
if (ctrl->ops->is_fabrics) { if (ctrl->ops->flags & NVME_F_FABRICS) {
ctrl->icdoff = le16_to_cpu(id->icdoff); ctrl->icdoff = le16_to_cpu(id->icdoff);
ctrl->ioccsz = le32_to_cpu(id->ioccsz); ctrl->ioccsz = le32_to_cpu(id->ioccsz);
ctrl->iorcsz = le32_to_cpu(id->iorcsz); ctrl->iorcsz = le32_to_cpu(id->iorcsz);
...@@ -2098,7 +2110,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) ...@@ -2098,7 +2110,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (ns->ndev) if (ns->ndev)
nvme_nvm_unregister_sysfs(ns); nvme_nvm_unregister_sysfs(ns);
del_gendisk(ns->disk); del_gendisk(ns->disk);
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue); blk_cleanup_queue(ns->queue);
} }
...@@ -2436,8 +2447,16 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) ...@@ -2436,8 +2447,16 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
continue; continue;
revalidate_disk(ns->disk); revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue); blk_set_queue_dying(ns->queue);
blk_mq_abort_requeue_list(ns->queue);
blk_mq_start_stopped_hw_queues(ns->queue, true); /*
* Forcibly start all queues to avoid having stuck requests.
* Note that we must ensure the queues are not stopped
* when the final removal happens.
*/
blk_mq_start_hw_queues(ns->queue);
/* draining requests in requeue list */
blk_mq_kick_requeue_list(ns->queue);
} }
mutex_unlock(&ctrl->namespaces_mutex); mutex_unlock(&ctrl->namespaces_mutex);
} }
......
...@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags { ...@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {
#define NVMEFC_QUEUE_DELAY 3 /* ms units */ #define NVMEFC_QUEUE_DELAY 3 /* ms units */
#define NVME_FC_MAX_CONNECT_ATTEMPTS 1
struct nvme_fc_queue { struct nvme_fc_queue {
struct nvme_fc_ctrl *ctrl; struct nvme_fc_ctrl *ctrl;
struct device *dev; struct device *dev;
...@@ -165,8 +163,6 @@ struct nvme_fc_ctrl { ...@@ -165,8 +163,6 @@ struct nvme_fc_ctrl {
struct work_struct delete_work; struct work_struct delete_work;
struct work_struct reset_work; struct work_struct reset_work;
struct delayed_work connect_work; struct delayed_work connect_work;
int reconnect_delay;
int connect_attempts;
struct kref ref; struct kref ref;
u32 flags; u32 flags;
...@@ -1376,9 +1372,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1376,9 +1372,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
if (!complete_rq) { if (!complete_rq) {
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
status = cpu_to_le16(NVME_SC_ABORT_REQ); status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
if (blk_queue_dying(rq->q)) if (blk_queue_dying(rq->q))
status |= cpu_to_le16(NVME_SC_DNR); status |= cpu_to_le16(NVME_SC_DNR << 1);
} }
nvme_end_request(rq, status, result); nvme_end_request(rq, status, result);
} else } else
...@@ -1751,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) ...@@ -1751,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport association error detected: %s\n", "NVME-FC{%d}: transport association error detected: %s\n",
ctrl->cnum, errmsg); ctrl->cnum, errmsg);
dev_info(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: resetting controller\n", ctrl->cnum); "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
/* stop the queues on error, cleanup is in reset thread */ /* stop the queues on error, cleanup is in reset thread */
...@@ -2195,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2195,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
if (!opts->nr_io_queues) if (!opts->nr_io_queues)
return 0; return 0;
dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
opts->nr_io_queues);
nvme_fc_init_io_queues(ctrl); nvme_fc_init_io_queues(ctrl);
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
...@@ -2268,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2268,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
if (ctrl->queue_count == 1) if (ctrl->queue_count == 1)
return 0; return 0;
dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
opts->nr_io_queues);
nvme_fc_init_io_queues(ctrl); nvme_fc_init_io_queues(ctrl);
ret = blk_mq_reinit_tagset(&ctrl->tag_set); ret = blk_mq_reinit_tagset(&ctrl->tag_set);
...@@ -2306,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ...@@ -2306,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
int ret; int ret;
bool changed; bool changed;
ctrl->connect_attempts++; ++ctrl->ctrl.opts->nr_reconnects;
/* /*
* Create the admin queue * Create the admin queue
...@@ -2403,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ...@@ -2403,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed); WARN_ON_ONCE(!changed);
ctrl->connect_attempts = 0; ctrl->ctrl.opts->nr_reconnects = 0;
kref_get(&ctrl->ctrl.kref);
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl); nvme_start_queues(&ctrl->ctrl);
...@@ -2536,26 +2524,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) ...@@ -2536,26 +2524,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
/* /*
* tear down the controller * tear down the controller
* This will result in the last reference on the nvme ctrl to * After the last reference on the nvme ctrl is removed,
* expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. * the transport nvme_fc_nvme_ctrl_freed() callback will be
* From there, the transport will tear down it's logical queues and * invoked. From there, the transport will tear down it's
* association. * logical queues and association.
*/ */
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl);
} }
static int static bool
__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
{ {
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY; return true;
if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
return -EBUSY; return true;
return 0; return false;
}
static int
__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
{
return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
} }
/* /*
...@@ -2580,6 +2574,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) ...@@ -2580,6 +2574,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
return ret; return ret;
} }
static void
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
{
/* If we are resetting/deleting then do nothing */
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
return;
}
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
ctrl->cnum, status);
if (nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
}
}
static void static void
nvme_fc_reset_ctrl_work(struct work_struct *work) nvme_fc_reset_ctrl_work(struct work_struct *work)
{ {
...@@ -2591,34 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) ...@@ -2591,34 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
nvme_fc_delete_association(ctrl); nvme_fc_delete_association(ctrl);
ret = nvme_fc_create_association(ctrl); ret = nvme_fc_create_association(ctrl);
if (ret) { if (ret)
dev_warn(ctrl->ctrl.device, nvme_fc_reconnect_or_delete(ctrl, ret);
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", else
ctrl->cnum, ret);
if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
ctrl->cnum, ctrl->connect_attempts);
if (!nvme_change_ctrl_state(&ctrl->ctrl,
NVME_CTRL_DELETING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to change state "
"to DELETING\n", ctrl->cnum);
return;
}
WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
return;
}
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->reconnect_delay);
queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
ctrl->reconnect_delay * HZ);
} else
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reset complete\n", ctrl->cnum); "NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
} }
...@@ -2632,7 +2630,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) ...@@ -2632,7 +2630,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
{ {
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
dev_warn(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
...@@ -2649,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) ...@@ -2649,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.name = "fc", .name = "fc",
.module = THIS_MODULE, .module = THIS_MODULE,
.is_fabrics = true, .flags = NVME_F_FABRICS,
.reg_read32 = nvmf_reg_read32, .reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64, .reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32, .reg_write32 = nvmf_reg_write32,
...@@ -2671,34 +2669,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) ...@@ -2671,34 +2669,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
struct nvme_fc_ctrl, connect_work); struct nvme_fc_ctrl, connect_work);
ret = nvme_fc_create_association(ctrl); ret = nvme_fc_create_association(ctrl);
if (ret) { if (ret)
dev_warn(ctrl->ctrl.device, nvme_fc_reconnect_or_delete(ctrl, ret);
"NVME-FC{%d}: Reconnect attempt failed (%d)\n", else
ctrl->cnum, ret);
if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
ctrl->cnum, ctrl->connect_attempts);
if (!nvme_change_ctrl_state(&ctrl->ctrl,
NVME_CTRL_DELETING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to change state "
"to DELETING\n", ctrl->cnum);
return;
}
WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
return;
}
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->reconnect_delay);
queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
ctrl->reconnect_delay * HZ);
} else
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reconnect complete\n", "NVME-FC{%d}: controller reconnect complete\n",
ctrl->cnum); ctrl->cnum);
...@@ -2755,7 +2728,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -2755,7 +2728,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
ctrl->reconnect_delay = opts->reconnect_delay;
spin_lock_init(&ctrl->lock); spin_lock_init(&ctrl->lock);
/* io queue count */ /* io queue count */
...@@ -2819,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -2819,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = NULL; ctrl->ctrl.opts = NULL;
/* initiate nvme ctrl ref counting teardown */ /* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
/* as we're past the point where we transition to the ref /* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here, * counting teardown path, if we return a bad pointer here,
...@@ -2835,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -2835,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
kref_get(&ctrl->ctrl.kref);
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n", "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
ctrl->cnum, ctrl->ctrl.opts->subsysnqn); ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
...@@ -2971,7 +2944,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) ...@@ -2971,7 +2944,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
static struct nvmf_transport_ops nvme_fc_transport = { static struct nvmf_transport_ops nvme_fc_transport = {
.name = "fc", .name = "fc",
.required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
.allowed_opts = NVMF_OPT_RECONNECT_DELAY, .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
.create_ctrl = nvme_fc_create_ctrl, .create_ctrl = nvme_fc_create_ctrl,
}; };
......
...@@ -208,7 +208,9 @@ struct nvme_ns { ...@@ -208,7 +208,9 @@ struct nvme_ns {
struct nvme_ctrl_ops { struct nvme_ctrl_ops {
const char *name; const char *name;
struct module *module; struct module *module;
bool is_fabrics; unsigned int flags;
#define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1)
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
......
...@@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) ...@@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
dev_warn(dev->dev, "unable to set dbbuf\n"); dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
/* Free memory and continue on */ /* Free memory and continue on */
nvme_dbbuf_dma_free(dev); nvme_dbbuf_dma_free(dev);
} }
...@@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) ...@@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS, result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
&pci_status); &pci_status);
if (result == PCIBIOS_SUCCESSFUL) if (result == PCIBIOS_SUCCESSFUL)
dev_warn(dev->dev, dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n", "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
csts, pci_status); csts, pci_status);
else else
dev_warn(dev->dev, dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
csts, result); csts, result);
} }
...@@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev) ...@@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
*/ */
if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) { if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
dev->q_depth = 2; dev->q_depth = 2;
dev_warn(dev->dev, "detected Apple NVMe controller, set " dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
"queue depth=%u to work around controller resets\n", "set queue depth=%u to work around controller resets\n",
dev->q_depth); dev->q_depth);
} }
...@@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) ...@@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
if (dev->cmbsz) { if (dev->cmbsz) {
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL)) &dev_attr_cmb.attr, NULL))
dev_warn(dev->dev, dev_warn(dev->ctrl.device,
"failed to add sysfs attribute for CMB\n"); "failed to add sysfs attribute for CMB\n");
} }
} }
...@@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) ...@@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie", .name = "pcie",
.module = THIS_MODULE, .module = THIS_MODULE,
.flags = NVME_F_METADATA_SUPPORTED,
.reg_read32 = nvme_pci_reg_read32, .reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32, .reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64, .reg_read64 = nvme_pci_reg_read64,
...@@ -2293,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = { ...@@ -2293,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0x0a54), { PCI_VDEVICE(INTEL, 0x0a54),
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, }, .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
......
...@@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
nvme_rdma_wr_error(cq, wc, "SEND"); nvme_rdma_wr_error(cq, wc, "SEND");
} }
static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
{
int sig_limit;
/*
* We signal completion every queue depth/2 and also handle the
* degenerated case of a device with queue_depth=1, where we
* would need to signal every message.
*/
sig_limit = max(queue->queue_size / 2, 1);
return (++queue->sig_count % sig_limit) == 0;
}
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
struct ib_send_wr *first, bool flush) struct ib_send_wr *first, bool flush)
...@@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, ...@@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
* Would have been way to obvious to handle this in hardware or * Would have been way to obvious to handle this in hardware or
* at least the RDMA stack.. * at least the RDMA stack..
* *
* This messy and racy code sniplet is copy and pasted from the iSER
* initiator, and the magic '32' comes from there as well.
*
* Always signal the flushes. The magic request used for the flush * Always signal the flushes. The magic request used for the flush
* sequencer is not allocated in our driver's tagset and it's * sequencer is not allocated in our driver's tagset and it's
* triggered to be freed by blk_cleanup_queue(). So we need to * triggered to be freed by blk_cleanup_queue(). So we need to
...@@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, ...@@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
* embedded in request's payload, is not freed when __ib_process_cq() * embedded in request's payload, is not freed when __ib_process_cq()
* calls wr_cqe->done(). * calls wr_cqe->done().
*/ */
if ((++queue->sig_count % 32) == 0 || flush) if (nvme_rdma_queue_sig_limit(queue) || flush)
wr.send_flags |= IB_SEND_SIGNALED; wr.send_flags |= IB_SEND_SIGNALED;
if (first) if (first)
...@@ -1782,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl) ...@@ -1782,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.name = "rdma", .name = "rdma",
.module = THIS_MODULE, .module = THIS_MODULE,
.is_fabrics = true, .flags = NVME_F_FABRICS,
.reg_read32 = nvmf_reg_read32, .reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64, .reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32, .reg_write32 = nvmf_reg_write32,
......
...@@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl) ...@@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.name = "loop", .name = "loop",
.module = THIS_MODULE, .module = THIS_MODULE,