Commit b9f84ac0 authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'stefanha/block' into staging



* stefanha/block:
  block: Fix how mirror_run() frees its buffer
  win32-aio: Fix how win32_aio_process_completion() frees buffer
  scsi-disk: qemu_vfree(NULL) is fine, simplify
  w32: Make qemu_vfree() accept NULL like the POSIX implementation
  sheepdog: clean up sd_aio_setup()
  sheepdog: multiplex the rw FD to flush cache
  block: clear dirty bitmap when discarding
  ide: issue discard asynchronously but serialize the pieces
  ide: fix TRIM with empty range entry
  block: make discard asynchronous
  raw: support discard on block devices
  raw-posix: remember whether discard failed
  raw-posix: support discard on more filesystems
  block: fix initialization in bdrv_io_limits_enable()
  qcow2: Fix segfault on zero-length write
Signed-off-by: default avatarAnthony Liguori <aliguori@us.ibm.com>
parents c94bf1c1 7191bf31
......@@ -155,10 +155,6 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
{
qemu_co_queue_init(&bs->throttled_reqs);
bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
bs->slice_start = qemu_get_clock_ns(vm_clock);
bs->slice_end = bs->slice_start + bs->slice_time;
memset(&bs->io_base, 0, sizeof(bs->io_base));
bs->io_limits_enabled = true;
}
......@@ -4174,7 +4170,13 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else if (bs->read_only) {
return -EROFS;
} else if (bs->drv->bdrv_co_discard) {
}
if (bs->dirty_bitmap) {
set_dirty_bitmap(bs, sector_num, nb_sectors, 0);
}
if (bs->drv->bdrv_co_discard) {
return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
} else if (bs->drv->bdrv_aio_discard) {
BlockDriverAIOCB *acb;
......
......@@ -225,7 +225,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
immediate_exit:
g_free(s->buf);
qemu_vfree(s->buf);
bdrv_set_dirty_tracking(bs, false);
bdrv_iostatus_disable(s->target);
if (s->should_complete && ret == 0) {
......
......@@ -759,7 +759,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
QEMUIOVector hd_qiov;
uint64_t bytes_done = 0;
uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta;
QCowL2Meta *l2meta = NULL;
trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
remaining_sectors);
......
......@@ -20,11 +20,14 @@
#define QEMU_AIO_WRITE 0x0002
#define QEMU_AIO_IOCTL 0x0004
#define QEMU_AIO_FLUSH 0x0008
#define QEMU_AIO_DISCARD 0x0010
#define QEMU_AIO_TYPE_MASK \
(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
QEMU_AIO_DISCARD)
/* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000
#define QEMU_AIO_BLKDEV 0x2000
/* linux-aio.c - Linux native implementation */
......
......@@ -59,6 +59,9 @@
#ifdef CONFIG_FIEMAP
#include <linux/fiemap.h>
#endif
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <linux/falloc.h>
#endif
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <sys/disk.h>
#include <sys/cdio.h>
......@@ -138,6 +141,7 @@ typedef struct BDRVRawState {
#ifdef CONFIG_XFS
bool is_xfs : 1;
#endif
bool has_discard : 1;
} BDRVRawState;
typedef struct BDRVRawReopenState {
......@@ -159,7 +163,7 @@ typedef struct RawPosixAIOData {
void *aio_ioctl_buf;
};
int aio_niov;
size_t aio_nbytes;
uint64_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
off_t aio_offset;
int aio_type;
......@@ -289,6 +293,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
}
#endif
s->has_discard = 1;
#ifdef CONFIG_XFS
if (platform_test_xfs_fd(s->fd)) {
s->is_xfs = 1;
......@@ -618,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
return nbytes;
}
#ifdef CONFIG_XFS
static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
struct xfs_flock64 fl;
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
fl.l_start = offset;
fl.l_len = bytes;
if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
return -errno;
}
return 0;
}
#endif
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
{
int ret = -EOPNOTSUPP;
BDRVRawState *s = aiocb->bs->opaque;
if (s->has_discard == 0) {
return 0;
}
if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
#ifdef BLKDISCARD
do {
uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
return 0;
}
} while (errno == EINTR);
ret = -errno;
#endif
} else {
#ifdef CONFIG_XFS
if (s->is_xfs) {
return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
}
#endif
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
do {
if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
return 0;
}
} while (errno == EINTR);
ret = -errno;
#endif
}
if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
ret == -ENOTTY) {
s->has_discard = 0;
ret = 0;
}
return ret;
}
static int aio_worker(void *arg)
{
RawPosixAIOData *aiocb = arg;
......@@ -652,6 +723,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_IOCTL:
ret = handle_aiocb_ioctl(aiocb);
break;
case QEMU_AIO_DISCARD:
ret = handle_aiocb_discard(aiocb);
break;
default:
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
ret = -EINVAL;
......@@ -1052,37 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
}
}
#ifdef CONFIG_XFS
static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
struct xfs_flock64 fl;
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
fl.l_start = sector_num << 9;
fl.l_len = (int64_t)nb_sectors << 9;
if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
return -errno;
}
return 0;
}
#endif
static coroutine_fn int raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
#ifdef CONFIG_XFS
BDRVRawState *s = bs->opaque;
if (s->is_xfs) {
return xfs_discard(s, sector_num, nb_sectors);
}
#endif
return 0;
return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
cb, opaque, QEMU_AIO_DISCARD);
}
static QEMUOptionParameter raw_create_options[] = {
......@@ -1105,12 +1156,12 @@ static BlockDriver bdrv_file = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_co_discard = raw_co_discard,
.bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
......@@ -1320,6 +1371,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
}
static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0) {
return NULL;
}
return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int fd_open(BlockDriverState *bs)
{
......@@ -1391,6 +1455,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
......
......@@ -266,6 +266,7 @@ typedef struct AIOReq {
enum AIOCBState {
AIOCB_WRITE_UDATA,
AIOCB_READ_UDATA,
AIOCB_FLUSH_CACHE,
};
struct SheepdogAIOCB {
......@@ -299,7 +300,6 @@ typedef struct BDRVSheepdogState {
char *addr;
char *port;
int fd;
int flush_fd;
CoMutex lock;
Coroutine *co_send;
......@@ -427,12 +427,11 @@ static const AIOCBInfo sd_aiocb_info = {
};
static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
int64_t sector_num, int nb_sectors)
{
SheepdogAIOCB *acb;
acb = qemu_aio_get(&sd_aiocb_info, bs, cb, opaque);
acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
acb->qiov = qiov;
......@@ -736,6 +735,13 @@ static void coroutine_fn aio_read_response(void *opaque)
goto out;
}
break;
case AIOCB_FLUSH_CACHE:
if (rsp.result == SD_RES_INVALID_PARMS) {
dprintf("disable cache since the server doesn't support it\n");
s->cache_flags = SD_FLAG_CMD_DIRECT;
rsp.result = SD_RES_SUCCESS;
}
break;
}
if (rsp.result != SD_RES_SUCCESS) {
......@@ -950,7 +956,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
{
int nr_copies = s->inode.nr_copies;
SheepdogObjReq hdr;
unsigned int wlen;
unsigned int wlen = 0;
int ret;
uint64_t oid = aio_req->oid;
unsigned int datalen = aio_req->data_len;
......@@ -964,18 +970,23 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
memset(&hdr, 0, sizeof(hdr));
if (aiocb_type == AIOCB_READ_UDATA) {
wlen = 0;
switch (aiocb_type) {
case AIOCB_FLUSH_CACHE:
hdr.opcode = SD_OP_FLUSH_VDI;
break;
case AIOCB_READ_UDATA:
hdr.opcode = SD_OP_READ_OBJ;
hdr.flags = flags;
} else if (create) {
wlen = datalen;
hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
hdr.flags = SD_FLAG_CMD_WRITE | flags;
} else {
break;
case AIOCB_WRITE_UDATA:
if (create) {
hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
} else {
hdr.opcode = SD_OP_WRITE_OBJ;
}
wlen = datalen;
hdr.opcode = SD_OP_WRITE_OBJ;
hdr.flags = SD_FLAG_CMD_WRITE | flags;
break;
}
if (s->cache_flags) {
......@@ -1127,15 +1138,6 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
s->cache_flags = SD_FLAG_CMD_DIRECT;
}
if (s->cache_flags == SD_FLAG_CMD_CACHE) {
s->flush_fd = connect_to_sdog(s->addr, s->port);
if (s->flush_fd < 0) {
error_report("failed to connect");
ret = s->flush_fd;
goto out;
}
}
if (snapid || tag[0] != '\0') {
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = true;
......@@ -1397,9 +1399,6 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd);
if (s->cache_flags) {
closesocket(s->flush_fd);
}
g_free(s->addr);
}
......@@ -1672,7 +1671,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
bs->total_sectors = sector_num + nb_sectors;
}
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL);
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aio_done_func = sd_write_done;
acb->aiocb_type = AIOCB_WRITE_UDATA;
......@@ -1693,7 +1692,7 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
SheepdogAIOCB *acb;
int ret;
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL);
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_READ_UDATA;
acb->aio_done_func = sd_finish_aiocb;
......@@ -1711,39 +1710,31 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogObjReq hdr = { 0 };
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
SheepdogInode *inode = &s->inode;
SheepdogAIOCB *acb;
AIOReq *aio_req;
int ret;
unsigned int wlen = 0, rlen = 0;
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
return 0;
}
hdr.opcode = SD_OP_FLUSH_VDI;
hdr.oid = vid_to_vdi_oid(inode->vdi_id);
acb = sd_aio_setup(bs, NULL, 0, 0);
acb->aiocb_type = AIOCB_FLUSH_CACHE;
acb->aio_done_func = sd_finish_aiocb;
ret = do_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
if (ret) {
error_report("failed to send a request to the sheep");
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
0, 0, 0, 0, 0);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
ret = add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
if (ret < 0) {
error_report("add_aio_request is failed");
free_aio_req(s, aio_req);
qemu_aio_release(acb);
return ret;
}
if (rsp->result == SD_RES_INVALID_PARMS) {
dprintf("disable write cache since the server doesn't support it\n");
s->cache_flags = SD_FLAG_CMD_DIRECT;
closesocket(s->flush_fd);
return 0;
}
if (rsp->result != SD_RES_SUCCESS) {
error_report("%s", sd_strerror(rsp->result));
return -EIO;
}
return 0;
qemu_coroutine_yield();
return acb->ret;
}
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
......
......@@ -87,7 +87,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
p += qiov->iov[i].iov_len;
}
g_free(waiocb->buf);
qemu_vfree(waiocb->buf);
}
}
......
......@@ -2581,6 +2581,22 @@ if compile_prog "" "" ; then
fallocate=yes
fi
# check for fallocate hole punching
fallocate_punch_hole=no
cat > $TMPC << EOF
#include <fcntl.h>
#include <linux/falloc.h>
int main(void)
{
fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
return 0;
}
EOF
if compile_prog "" "" ; then
fallocate_punch_hole=yes
fi
# check for sync_file_range
sync_file_range=no
cat > $TMPC << EOF
......@@ -3490,6 +3506,9 @@ fi
if test "$fallocate" = "yes" ; then
echo "CONFIG_FALLOCATE=y" >> $config_host_mak
fi
if test "$fallocate_punch_hole" = "yes" ; then
echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
fi
if test "$sync_file_range" = "yes" ; then
echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
fi
......
......@@ -325,14 +325,26 @@ typedef struct TrimAIOCB {
BlockDriverAIOCB common;
QEMUBH *bh;
int ret;
QEMUIOVector *qiov;
BlockDriverAIOCB *aiocb;
int i, j;
} TrimAIOCB;
static void trim_aio_cancel(BlockDriverAIOCB *acb)
{
TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
/* Exit the loop in case bdrv_aio_cancel calls ide_issue_trim_cb again. */
iocb->j = iocb->qiov->niov - 1;
iocb->i = (iocb->qiov->iov[iocb->j].iov_len / 8) - 1;
/* Tell ide_issue_trim_cb not to trigger the completion, too. */
qemu_bh_delete(iocb->bh);
iocb->bh = NULL;
if (iocb->aiocb) {
bdrv_aio_cancel(iocb->aiocb);
}
qemu_aio_release(iocb);
}
......@@ -349,43 +361,60 @@ static void ide_trim_bh_cb(void *opaque)
qemu_bh_delete(iocb->bh);
iocb->bh = NULL;
qemu_aio_release(iocb);
}
static void ide_issue_trim_cb(void *opaque, int ret)
{
TrimAIOCB *iocb = opaque;
if (ret >= 0) {
while (iocb->j < iocb->qiov->niov) {
int j = iocb->j;
while (++iocb->i < iocb->qiov->iov[j].iov_len / 8) {
int i = iocb->i;
uint64_t *buffer = iocb->qiov->iov[j].iov_base;
/* 6-byte LBA + 2-byte range per entry */
uint64_t entry = le64_to_cpu(buffer[i]);
uint64_t sector = entry & 0x0000ffffffffffffULL;
uint16_t count = entry >> 48;
if (count == 0) {
continue;
}
/* Got an entry! Submit and exit. */
iocb->aiocb = bdrv_aio_discard(iocb->common.bs, sector, count,
ide_issue_trim_cb, opaque);
return;
}
iocb->j++;
iocb->i = -1;
}
} else {
iocb->ret = ret;
}
iocb->aiocb = NULL;
if (iocb->bh) {
qemu_bh_schedule(iocb->bh);
}
}
BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
TrimAIOCB *iocb;
int i, j, ret;
iocb = qemu_aio_get(&trim_aiocb_info, bs, cb, opaque);
iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
iocb->ret = 0;
for (j = 0; j < qiov->niov; j++) {
uint64_t *buffer = qiov->iov[j].iov_base;
for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
/* 6-byte LBA + 2-byte range per entry */
uint64_t entry = le64_to_cpu(buffer[i]);
uint64_t sector = entry & 0x0000ffffffffffffULL;
uint16_t count = entry >> 48;
if (count == 0) {
break;
}
ret = bdrv_discard(bs, sector, count);
if (!iocb->ret) {
iocb->ret = ret;
}
}
}
qemu_bh_schedule(iocb->bh);
iocb->qiov = qiov;
iocb->i = -1;
iocb->j = 0;
ide_issue_trim_cb(iocb, 0);
return &iocb->common;
}
......
......@@ -85,9 +85,7 @@ static void scsi_free_request(SCSIRequest *req)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
if (r->iov.iov_base) {
qemu_vfree(r->iov.iov_base);
}
qemu_vfree(r->iov.iov_base);
}
/* Helper function for command completion with sense. */
......
......@@ -71,7 +71,9 @@ void *qemu_vmalloc(size_t size)
void qemu_vfree(void *ptr)
{
trace_qemu_vfree(ptr);
VirtualFree(ptr, 0, MEM_RELEASE);
if (ptr) {
VirtualFree(ptr, 0, MEM_RELEASE);
}
}
/* FIXME: add proper locking */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment