Commit 2873d32f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A collection of fixes from this series.  The most important here is a
  regression fix for an issue that some folks would hit in blk-merge.c,
  and the NVMe queue depth limit for the screwed up Apple "nvme"
  controller.

  In more detail, this pull request contains:

   - a set of fixes for null_blk, including a fix for a few corner cases
     where we could hang the device.  From Arianna and Paolo.

   - lightnvm:
        - A build improvement from Keith.
        - Update the qemu pci id detection from Matias.
        - Error handling fixes for leaks and other little fixes from
          Sudip and Wenwei.

   - fix from Eric where BLKRRPART would not return EBUSY for whole
     device mounts, only when partitions were mounted.

   - fix from Jan Kara, where EOF O_DIRECT reads would return
     negatively.

   - remove check for rq_mergeable() when checking limits for cloned
     requests.  The check doesn't make any sense.  It's assuming that
     since NOMERGE is set on the request that we don't have to
     recalculate limits since the request didn't change, but that's not
     true if the request has been redirected.  From Hannes.

   - correctly get the bio front segment value set for single segment
     bio's, fixing a BUG() in blk-merge.  From Ming"

* 'for-linus' of git://git.kernel.dk/linux-block:
  nvme: temporary fix for Apple controller reset
  null_blk: change type of completion_nsec to unsigned long
  null_blk: guarantee device restart in all irq modes
  null_blk: set a separate timer for each command
  blk-merge: fix computing bio->bi_seg_front_size in case of single segment
  direct-io: Fix negative return from dio read beyond eof
  block: Always check queue limits for cloned requests
  lightnvm: missing nvm_lock acquire
  lightnvm: unconverted ppa returned in get_bb_tbl
  lightnvm: refactor and change vendor id for qemu
  lightnvm: do device max sectors boundary check first
  lightnvm: fix ioctl memory leaks
  lightnvm: free memory when gennvm register fails
  lightnvm: Simplify config when disabled
  Return EBUSY from BLKRRPART for mounted whole-dev fs
parents c041f087 1f390c1f
......@@ -2114,7 +2114,8 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
EXPORT_SYMBOL(submit_bio);
/**
* blk_rq_check_limits - Helper function to check a request for the queue limit
* blk_cloned_rq_check_limits - Helper function to check a cloned request
* for new the queue limits
* @q: the queue
* @rq: the request being checked
*
......@@ -2125,20 +2126,13 @@ EXPORT_SYMBOL(submit_bio);
* after it is inserted to @q, it should be checked against @q before
* the insertion using this generic function.
*
* This function should also be useful for request stacking drivers
* in some cases below, so export this function.
* Request stacking drivers like request-based dm may change the queue
* limits while requests are in the queue (e.g. dm's table swapping).
* Such request stacking drivers should check those requests against
* the new queue limits again when they dispatch those requests,
* although such checkings are also done against the old queue limits
* when submitting requests.
* limits when retrying requests on other queues. Those requests need
* to be checked against the new queue limits again during dispatch.
*/
int blk_rq_check_limits(struct request_queue *q, struct request *rq)
static int blk_cloned_rq_check_limits(struct request_queue *q,
struct request *rq)
{
if (!rq_mergeable(rq))
return 0;
if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
......@@ -2158,7 +2152,6 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
return 0;
}
EXPORT_SYMBOL_GPL(blk_rq_check_limits);
/**
* blk_insert_cloned_request - Helper for stacking drivers to submit a request
......@@ -2170,7 +2163,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
unsigned long flags;
int where = ELEVATOR_INSERT_BACK;
if (blk_rq_check_limits(q, rq))
if (blk_cloned_rq_check_limits(q, rq))
return -EIO;
if (rq->rq_disk &&
......
......@@ -103,6 +103,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
bvprv = bv;
bvprvp = &bvprv;
sectors += bv.bv_len >> 9;
if (nsegs == 1 && seg_size > front_seg_size)
front_seg_size = seg_size;
continue;
}
new_segment:
......
......@@ -397,7 +397,7 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
struct hd_struct *part;
int res;
if (bdev->bd_part_count)
if (bdev->bd_part_count || bdev->bd_super)
return -EBUSY;
res = invalidate_partition(disk, 0);
if (res)
......
......@@ -18,6 +18,7 @@ struct nullb_cmd {
struct bio *bio;
unsigned int tag;
struct nullb_queue *nq;
struct hrtimer timer;
};
struct nullb_queue {
......@@ -49,17 +50,6 @@ static int null_major;
static int nullb_indexes;
static struct kmem_cache *ppa_cache;
struct completion_queue {
struct llist_head list;
struct hrtimer timer;
};
/*
* These are per-cpu for now, they will need to be configured by the
* complete_queues parameter and appropriately mapped.
*/
static DEFINE_PER_CPU(struct completion_queue, completion_queues);
enum {
NULL_IRQ_NONE = 0,
NULL_IRQ_SOFTIRQ = 1,
......@@ -142,8 +132,8 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
static int completion_nsec = 10000;
module_param(completion_nsec, int, S_IRUGO);
static unsigned long completion_nsec = 10000;
module_param(completion_nsec, ulong, S_IRUGO);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
static int hw_queue_depth = 64;
......@@ -180,6 +170,8 @@ static void free_cmd(struct nullb_cmd *cmd)
put_tag(cmd->nq, cmd->tag);
}
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
......@@ -190,6 +182,11 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
cmd = &nq->cmds[tag];
cmd->tag = tag;
cmd->nq = nq;
if (irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
return cmd;
}
......@@ -220,6 +217,8 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
static void end_cmd(struct nullb_cmd *cmd)
{
struct request_queue *q = NULL;
switch (queue_mode) {
case NULL_Q_MQ:
blk_mq_end_request(cmd->rq, 0);
......@@ -230,55 +229,37 @@ static void end_cmd(struct nullb_cmd *cmd)
break;
case NULL_Q_BIO:
bio_endio(cmd->bio);
break;
goto free_cmd;
}
if (cmd->rq)
q = cmd->rq->q;
/* Restart queue if needed, as we are freeing a tag */
if (q && !q->mq_ops && blk_queue_stopped(q)) {
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
if (blk_queue_stopped(q))
blk_start_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
free_cmd:
free_cmd(cmd);
}
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
{
struct completion_queue *cq;
struct llist_node *entry;
struct nullb_cmd *cmd;
cq = &per_cpu(completion_queues, smp_processor_id());
while ((entry = llist_del_all(&cq->list)) != NULL) {
entry = llist_reverse_order(entry);
do {
struct request_queue *q = NULL;
cmd = container_of(entry, struct nullb_cmd, ll_list);
entry = entry->next;
if (cmd->rq)
q = cmd->rq->q;
end_cmd(cmd);
if (q && !q->mq_ops && blk_queue_stopped(q)) {
spin_lock(q->queue_lock);
if (blk_queue_stopped(q))
blk_start_queue(q);
spin_unlock(q->queue_lock);
}
} while (entry);
}
end_cmd(container_of(timer, struct nullb_cmd, timer));
return HRTIMER_NORESTART;
}
static void null_cmd_end_timer(struct nullb_cmd *cmd)
{
struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
cmd->ll_list.next = NULL;
if (llist_add(&cmd->ll_list, &cq->list)) {
ktime_t kt = ktime_set(0, completion_nsec);
hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
}
ktime_t kt = ktime_set(0, completion_nsec);
put_cpu();
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
static void null_softirq_done_fn(struct request *rq)
......@@ -376,6 +357,10 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
if (irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
cmd->nq = hctx->driver_data;
......@@ -813,19 +798,6 @@ static int __init null_init(void)
mutex_init(&lock);
/* Initialize a separate list for each CPU for issuing softirqs */
for_each_possible_cpu(i) {
struct completion_queue *cq = &per_cpu(completion_queues, i);
init_llist_head(&cq->list);
if (irqmode != NULL_IRQ_TIMER)
continue;
hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cq->timer.function = null_cmd_timer_expired;
}
null_major = register_blkdev(0, "nullb");
if (null_major < 0)
return null_major;
......
......@@ -123,6 +123,26 @@ void nvm_unregister_mgr(struct nvmm_type *mt)
}
EXPORT_SYMBOL(nvm_unregister_mgr);
/* register with device with a supported manager */
static int register_mgr(struct nvm_dev *dev)
{
struct nvmm_type *mt;
int ret = 0;
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret)
pr_info("nvm: no compatible nvm manager found.\n");
return ret;
}
static struct nvm_dev *nvm_find_nvm_dev(const char *name)
{
struct nvm_dev *dev;
......@@ -221,7 +241,6 @@ static void nvm_free(struct nvm_dev *dev)
static int nvm_init(struct nvm_dev *dev)
{
struct nvmm_type *mt;
int ret = -EINVAL;
if (!dev->q || !dev->ops)
......@@ -252,21 +271,13 @@ static int nvm_init(struct nvm_dev *dev)
goto err;
}
/* register with device with a supported manager */
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret < 0)
goto err; /* initialization failed */
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret) {
pr_info("nvm: no compatible manager found.\n");
down_write(&nvm_lock);
ret = register_mgr(dev);
up_write(&nvm_lock);
if (ret < 0)
goto err;
if (!ret)
return 0;
}
pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
dev->name, dev->sec_per_pg, dev->nr_planes,
......@@ -308,6 +319,12 @@ int nvm_register(struct request_queue *q, char *disk_name,
if (ret)
goto err_init;
if (dev->ops->max_phys_sect > 256) {
pr_info("nvm: max sectors supported is 256.\n");
ret = -EINVAL;
goto err_init;
}
if (dev->ops->max_phys_sect > 1) {
dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
"ppalist");
......@@ -316,10 +333,6 @@ int nvm_register(struct request_queue *q, char *disk_name,
ret = -ENOMEM;
goto err_init;
}
} else if (dev->ops->max_phys_sect > 256) {
pr_info("nvm: max sectors supported is 256.\n");
ret = -EINVAL;
goto err_init;
}
down_write(&nvm_lock);
......@@ -335,15 +348,17 @@ EXPORT_SYMBOL(nvm_register);
void nvm_unregister(char *disk_name)
{
struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
struct nvm_dev *dev;
down_write(&nvm_lock);
dev = nvm_find_nvm_dev(disk_name);
if (!dev) {
pr_err("nvm: could not find device %s to unregister\n",
disk_name);
up_write(&nvm_lock);
return;
}
down_write(&nvm_lock);
list_del(&dev->devices);
up_write(&nvm_lock);
......@@ -361,38 +376,30 @@ static int nvm_create_target(struct nvm_dev *dev,
{
struct nvm_ioctl_create_simple *s = &create->conf.s;
struct request_queue *tqueue;
struct nvmm_type *mt;
struct gendisk *tdisk;
struct nvm_tgt_type *tt;
struct nvm_target *t;
void *targetdata;
int ret = 0;
down_write(&nvm_lock);
if (!dev->mt) {
/* register with device with a supported NVM manager */
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret < 0)
return ret; /* initialization failed */
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret) {
pr_info("nvm: no compatible nvm manager found.\n");
return -ENODEV;
ret = register_mgr(dev);
if (!ret)
ret = -ENODEV;
if (ret < 0) {
up_write(&nvm_lock);
return ret;
}
}
tt = nvm_find_target_type(create->tgttype);
if (!tt) {
pr_err("nvm: target type %s not found\n", create->tgttype);
up_write(&nvm_lock);
return -EINVAL;
}
down_write(&nvm_lock);
list_for_each_entry(t, &dev->online_targets, list) {
if (!strcmp(create->tgtname, t->disk->disk_name)) {
pr_err("nvm: target name already exists.\n");
......@@ -476,7 +483,9 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
struct nvm_dev *dev;
struct nvm_ioctl_create_simple *s;
down_write(&nvm_lock);
dev = nvm_find_nvm_dev(create->dev);
up_write(&nvm_lock);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
......@@ -535,7 +544,9 @@ static int nvm_configure_show(const char *val)
return -EINVAL;
}
down_write(&nvm_lock);
dev = nvm_find_nvm_dev(devname);
up_write(&nvm_lock);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
......@@ -680,8 +691,10 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
info->tgtsize = tgt_iter;
up_write(&nvm_lock);
if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
kfree(info);
return -EFAULT;
}
kfree(info);
return 0;
......@@ -724,8 +737,11 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
devices->nr_devices = i;
if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
if (copy_to_user(arg, devices,
sizeof(struct nvm_ioctl_get_devices))) {
kfree(devices);
return -EFAULT;
}
kfree(devices);
return 0;
......
......@@ -75,7 +75,6 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
struct nvm_block *blk;
int i;
ppa = dev_to_generic_addr(gn->dev, ppa);
lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun];
for (i = 0; i < nr_blocks; i++) {
......@@ -187,7 +186,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
ppa.g.lun = lun->vlun.id;
ppa = generic_to_dev_addr(dev, ppa);
ret = dev->ops->get_bb_tbl(dev->q, ppa,
ret = dev->ops->get_bb_tbl(dev, ppa,
dev->blks_per_lun,
gennvm_block_bb, gn);
if (ret)
......@@ -207,6 +206,14 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
return 0;
}
static void gennvm_free(struct nvm_dev *dev)
{
gennvm_blocks_free(dev);
gennvm_luns_free(dev);
kfree(dev->mp);
dev->mp = NULL;
}
static int gennvm_register(struct nvm_dev *dev)
{
struct gen_nvm *gn;
......@@ -234,16 +241,13 @@ static int gennvm_register(struct nvm_dev *dev)
return 1;
err:
kfree(gn);
gennvm_free(dev);
return ret;
}
static void gennvm_unregister(struct nvm_dev *dev)
{
gennvm_blocks_free(dev);
gennvm_luns_free(dev);
kfree(dev->mp);
dev->mp = NULL;
gennvm_free(dev);
}
static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
......
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
nvme-y += pci.o scsi.o lightnvm.o
lightnvm-$(CONFIG_NVM) := lightnvm.o
nvme-y += pci.o scsi.o $(lightnvm-y)
......@@ -22,8 +22,6 @@
#include "nvme.h"
#ifdef CONFIG_NVM
#include <linux/nvme.h>
#include <linux/bitops.h>
#include <linux/lightnvm.h>
......@@ -357,10 +355,11 @@ static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb,
return ret;
}
static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa,
static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
int nr_blocks, nvm_bb_update_fn *update_bbtbl,
void *priv)
{
struct request_queue *q = nvmdev->q;
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
......@@ -404,6 +403,7 @@ static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa,
goto out;
}
ppa = dev_to_generic_addr(nvmdev, ppa);
ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
if (ret) {
ret = -EINTR;
......@@ -571,31 +571,27 @@ void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
nvm_unregister(disk_name);
}
/* move to shared place when used in multiple places. */
#define PCI_VENDOR_ID_CNEX 0x1d1d
#define PCI_DEVICE_ID_CNEX_WL 0x2807
#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_dev *dev = ns->dev;
struct pci_dev *pdev = to_pci_dev(dev->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 &&
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
id->vs[0] == 0x1)
return 1;
/* CNEX Labs - PCI ID + Vendor specific bit */
if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 &&
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
pdev->device == PCI_DEVICE_ID_CNEX_WL &&
id->vs[0] == 0x1)
return 1;
return 0;
}
#else
int nvme_nvm_register(struct request_queue *q, char *disk_name)
{
return 0;
}
void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
return 0;
}
#endif /* CONFIG_NVM */
......@@ -136,8 +136,22 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
int nvme_sg_get_version_num(int __user *ip);
#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct request_queue *q, char *disk_name);
void nvme_nvm_unregister(struct request_queue *q, char *disk_name);
#else
static inline int nvme_nvm_register(struct request_queue *q, char *disk_name)
{
return 0;
}
static inline void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
return 0;
}
#endif /* CONFIG_NVM */
#endif /* _NVME_H */
......@@ -2708,6 +2708,18 @@ static int nvme_dev_map(struct nvme_dev *dev)
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
dev->dbs = ((void __iomem *)dev->bar) + 4096;
/*
* Temporary fix for the Apple controller found in the MacBook8,1 and
* some MacBook7,1 to avoid controller resets and data loss.
*/
if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
dev->q_depth = 2;
dev_warn(dev->dev, "detected Apple NVMe controller, set "
"queue depth=%u to work around controller resets\n",
dev->q_depth);
}
if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
dev->cmb = nvme_map_cmb(dev);
......
......@@ -1169,6 +1169,15 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
}
}
/* Once we sampled i_size check for reads beyond EOF */
dio->i_size = i_size_read(inode);
if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
if (dio->flags & DIO_LOCKING)
mutex_unlock(&inode->i_mutex);
kmem_cache_free(dio_cache, dio);
goto out;
}
/*
* For file extending writes updating i_size before data writeouts
* complete can expose uninitialized blocks in dumb filesystems.
......@@ -1222,7 +1231,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
sdio.next_block_for_io = -1;