block.c 137 KB
Newer Older
bellard's avatar
bellard committed
1 2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor/monitor.h"
28 29
#include "block/block_int.h"
#include "block/blockjob.h"
30
#include "qemu/module.h"
31
#include "qapi/qmp/qjson.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
35
#include "qmp-commands.h"
36
#include "qemu/timer.h"
bellard's avatar
bellard committed
37

38
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
39 40 41
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
42
#include <sys/queue.h>
43
#ifndef __DragonFly__
bellard's avatar
bellard committed
44 45
#include <sys/disk.h>
#endif
46
#endif
bellard's avatar
bellard committed
47

48 49 50 51
#ifdef _WIN32
#include <windows.h>
#endif

52 53 54 55 56
struct BdrvDirtyBitmap {
    HBitmap *bitmap;
    QLIST_ENTRY(BdrvDirtyBitmap) list;
};

57 58
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

59
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60 61
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62
        BlockDriverCompletionFunc *cb, void *opaque);
63 64
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65
        BlockDriverCompletionFunc *cb, void *opaque);
66 67 68 69 70 71
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
72
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73 74
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
75
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76 77
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
78 79 80 81
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                               int64_t sector_num,
                                               QEMUIOVector *qiov,
                                               int nb_sectors,
82
                                               BdrvRequestFlags flags,
83 84
                                               BlockDriverCompletionFunc *cb,
                                               void *opaque,
85
                                               bool is_write);
86
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
87
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
88
    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellard's avatar
bellard committed
89

90 91
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
92

93 94
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
95

96 97 98
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

119
/* throttling disk I/O limits */
120 121
void bdrv_set_io_limits(BlockDriverState *bs,
                        ThrottleConfig *cfg)
122
{
123
    int i;
124

125
    throttle_config(&bs->throttle_state, cfg);
126

127 128
    for (i = 0; i < 2; i++) {
        qemu_co_enter_next(&bs->throttled_reqs[i]);
129
    }
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
}

/* this function drain all the throttled IOs */
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
{
    bool drained = false;
    bool enabled = bs->io_limits_enabled;
    int i;

    bs->io_limits_enabled = false;

    for (i = 0; i < 2; i++) {
        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
            drained = true;
        }
    }

    bs->io_limits_enabled = enabled;
148

149
    return drained;
150 151
}

152
void bdrv_io_limits_disable(BlockDriverState *bs)
153
{
154
    bs->io_limits_enabled = false;
155

156 157 158
    bdrv_start_throttled_reqs(bs);

    throttle_destroy(&bs->throttle_state);
159 160
}

161
static void bdrv_throttle_read_timer_cb(void *opaque)
162
{
163 164
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[0]);
165 166
}

167
static void bdrv_throttle_write_timer_cb(void *opaque)
168
{
169 170
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[1]);
171 172
}

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs)
{
    assert(!bs->io_limits_enabled);
    throttle_init(&bs->throttle_state,
                  QEMU_CLOCK_VIRTUAL,
                  bdrv_throttle_read_timer_cb,
                  bdrv_throttle_write_timer_cb,
                  bs);
    bs->io_limits_enabled = true;
}

/* This function makes an IO wait if needed
 *
 * @nb_sectors: the number of sectors of the IO
 * @is_write:   is the IO a write
 */
190
static void bdrv_io_limits_intercept(BlockDriverState *bs,
191 192
                                     int nb_sectors,
                                     bool is_write)
193
{
194 195
    /* does this io must wait */
    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
196

197 198 199 200
    /* if must wait or any request of this type throttled queue the IO */
    if (must_wait ||
        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
201 202
    }

203 204 205 206
    /* the IO will be executed, do the accounting */
    throttle_account(&bs->throttle_state,
                     is_write,
                     nb_sectors * BDRV_SECTOR_SIZE);
207

208 209 210
    /* if the next request must wait -> do nothing */
    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
        return;
211 212
    }

213 214
    /* else queue next request for execution */
    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
215 216
}

217 218 219
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
220 221
    const char *p;

222 223 224 225 226
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
227 228 229
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
230 231
#endif

232
    return *p == ':';
233 234
}

bellard's avatar
bellard committed
235
int path_is_absolute(const char *path)
236
{
237 238
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
239
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
240
        return 1;
241 242
    }
    return (*path == '/' || *path == '\\');
243
#else
244
    return (*path == '/');
245
#endif
246 247
}

bellard's avatar
bellard committed
248 249 250 251 252 253
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
254
{
bellard's avatar
bellard committed
255 256 257 258 259 260 261 262 263 264 265 266 267
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
268 269 270 271 272 273 274 275 276
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
277 278 279 280 281 282 283 284 285 286 287 288
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
289 290 291
    }
}

292 293 294 295 296 297 298 299 300
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

301
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
302
{
303 304
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
305 306 307
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

308 309 310
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
311 312 313 314 315
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
316
    }
317

318
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
319
}
bellard's avatar
bellard committed
320 321 322 323

/* create a new block device (by default it is empty) */
BlockDriverState *bdrv_new(const char *device_name)
{
324
    BlockDriverState *bs;
bellard's avatar
bellard committed
325

326
    bs = g_malloc0(sizeof(BlockDriverState));
327
    QLIST_INIT(&bs->dirty_bitmaps);
bellard's avatar
bellard committed
328
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellard's avatar
bellard committed
329
    if (device_name[0] != '\0') {
330
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellard's avatar
bellard committed
331
    }
332
    bdrv_iostatus_disable(bs);
333
    notifier_list_init(&bs->close_notifiers);
334
    notifier_with_return_list_init(&bs->before_write_notifiers);
335 336
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
337
    bs->refcnt = 1;
338

bellard's avatar
bellard committed
339 340 341
    return bs;
}

342 343 344 345 346
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
347 348 349
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
350 351
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
352
            return drv1;
353
        }
bellard's avatar
bellard committed
354 355 356 357
    }
    return NULL;
}

358
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
359
{
360 361 362 363 364
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
365 366 367
    };
    const char **p;

368
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
369
        return 1;               /* no whitelist, anything goes */
370
    }
371

372
    for (p = whitelist_rw; *p; p++) {
373 374 375 376
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
377 378 379 380 381 382 383
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
384 385 386
    return 0;
}

387 388
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
389 390
{
    BlockDriver *drv = bdrv_find_format(format_name);
391
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
392 393
}

394 395 396 397 398
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
    QEMUOptionParameter *options;
    int ret;
399
    Error *err;
400 401 402 403
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
404 405 406
    Error *local_err = NULL;
    int ret;

407 408 409
    CreateCo *cco = opaque;
    assert(cco->drv);

410 411 412 413 414
    ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
    if (error_is_set(&local_err)) {
        error_propagate(&cco->err, local_err);
    }
    cco->ret = ret;
415 416
}

417
int bdrv_create(BlockDriver *drv, const char* filename,
418
    QEMUOptionParameter *options, Error **errp)
bellard's avatar
bellard committed
419
{
420 421 422 423 424 425 426 427
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
        .options = options,
        .ret = NOT_DONE,
428
        .err = NULL,
429 430 431
    };

    if (!drv->bdrv_create) {
432
        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
433 434
        ret = -ENOTSUP;
        goto out;
435 436 437 438 439 440 441 442 443 444 445 446 447 448
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
            qemu_aio_wait();
        }
    }

    ret = cco.ret;
449 450 451 452 453 454 455
    if (ret < 0) {
        if (error_is_set(&cco.err)) {
            error_propagate(errp, cco.err);
        } else {
            error_setg_errno(errp, -ret, "Could not create image");
        }
    }
456

457 458
out:
    g_free(cco.filename);
459
    return ret;
bellard's avatar
bellard committed
460 461
}

462 463
int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
                     Error **errp)
464 465
{
    BlockDriver *drv;
466 467
    Error *local_err = NULL;
    int ret;
468

469
    drv = bdrv_find_protocol(filename, true);
470
    if (drv == NULL) {
471
        error_setg(errp, "Could not find protocol for file '%s'", filename);
472
        return -ENOENT;
473 474
    }

475 476 477 478 479
    ret = bdrv_create(drv, filename, options, &local_err);
    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
    }
    return ret;
480 481
}

482 483 484 485 486
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
487
{
488
#ifdef _WIN32
489
    char temp_dir[MAX_PATH];
490 491 492 493 494 495
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
496
#else
497
    int fd;
498
    const char *tmpdir;
499 500 501
    tmpdir = getenv("TMPDIR");
    if (!tmpdir)
        tmpdir = "/tmp";
502 503 504
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
505
    fd = mkstemp(filename);
506 507 508 509 510
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
511 512 513
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
514
#endif
515
}
bellard's avatar
bellard committed
516

517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

539 540
BlockDriver *bdrv_find_protocol(const char *filename,
                                bool allow_protocol_prefix)
bellard's avatar
bellard committed
541 542 543
{
    BlockDriver *drv1;
    char protocol[128];
544
    int len;
bellard's avatar
bellard committed
545
    const char *p;
bellard's avatar
bellard committed
546

547 548
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

549 550 551 552 553 554 555 556 557 558 559 560
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

561
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
562
        return bdrv_find_format("file");
563
    }
564

565 566
    p = strchr(filename, ':');
    assert(p != NULL);
567 568 569 570 571
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
572
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
573
        if (drv1->protocol_name &&
574
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
575
            return drv1;
576
        }
bellard's avatar
bellard committed
577 578 579 580
    }
    return NULL;
}

581
static int find_image_format(BlockDriverState *bs, const char *filename,
582
                             BlockDriver **pdrv, Error **errp)
583
{
584
    int score, score_max;
585 586
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
587
    int ret = 0;
588

589
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
590
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
591 592
        drv = bdrv_find_format("raw");
        if (!drv) {
593
            error_setg(errp, "Could not find raw image format");
594 595 596 597
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
598
    }
599

bellard's avatar
bellard committed
600 601
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
602 603
        error_setg_errno(errp, -ret, "Could not read image for determining its "
                         "format");
604 605
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
606 607
    }

bellard's avatar
bellard committed
608
    score_max = 0;
609
    drv = NULL;
610
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
611 612 613 614 615 616
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
617
        }
bellard's avatar
bellard committed
618
    }
619
    if (!drv) {
620 621
        error_setg(errp, "Could not determine image format: No compatible "
                   "driver found");
622 623 624 625
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
626 627
}

628 629 630 631 632 633 634
/**
 * Set the current 'total_sectors' value
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

635 636 637 638
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

639 640 641 642 643 644
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
645
        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
646 647 648 649 650 651
    }

    bs->total_sectors = hint;
    return 0;
}

652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

672 673 674 675 676 677 678 679 680 681 682
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
683 684
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
685 686 687 688 689 690 691 692 693 694 695 696 697 698
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

    /*
     * Snapshots should be writable.
     */
    if (bs->is_temporary) {
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

735 736
/*
 * Common part for opening disk images and files
737 738
 *
 * Removes all processed options from *options.
739
 */
740
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
741
    QDict *options, int flags, BlockDriver *drv, Error **errp)
742 743
{
    int ret, open_flags;
744
    const char *filename;
745
    Error *local_err = NULL;
746 747

    assert(drv != NULL);
748
    assert(bs->file == NULL);
749
    assert(options != NULL && bs->options != options);
750

751 752 753 754 755 756 757
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
758

759 760 761 762 763 764 765 766
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

767 768
    bs->open_flags = flags;
    bs->buffer_alignment = 512;
769
    bs->zero_beyond_eof = true;
770 771 772 773
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
774 775 776 777 778
        error_setg(errp,
                   !bs->read_only && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                   drv->format_name);
779 780
        return -ENOTSUP;
    }
781

782
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
783 784 785 786 787 788 789
    if (flags & BDRV_O_COPY_ON_READ) {
        if (!bs->read_only) {
            bdrv_enable_copy_on_read(bs);
        } else {
            error_setg(errp, "Can't use copy-on-read on read-only device");
            return -EINVAL;
        }
790 791
    }

792 793 794 795 796
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
797 798

    bs->drv = drv;
799
    bs->opaque = g_malloc0(drv->instance_size);
800

801
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
802

803 804
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
805
        assert(file == NULL);
806
        assert(!drv->bdrv_needs_filename || filename != NULL);
807
        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
808
    } else {
809
        if (file == NULL) {
810 811
            error_setg(errp, "Can't use '%s' as a block driver for the "
                       "protocol level", drv->format_name);
812 813 814
            ret = -EINVAL;
            goto free_and_fail;
        }
815
        bs->file = file;
816
        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
817 818
    }

819
    if (ret < 0) {
820 821
        if (error_is_set(&local_err)) {
            error_propagate(errp, local_err);
822 823
        } else if (bs->filename[0]) {
            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
824 825 826
        } else {
            error_setg_errno(errp, -ret, "Could not open image");
        }
827 828 829
        goto free_and_fail;
    }

830 831
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
832
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
833
        goto free_and_fail;
834
    }
835

836 837
#ifndef _WIN32
    if (bs->is_temporary) {
838 839
        assert(bs->filename[0] != '\0');
        unlink(bs->filename);
840 841 842 843 844
    }
#endif
    return 0;

free_and_fail:
845
    bs->file = NULL;
846
    g_free(bs->opaque);
847 848 849 850 851
    bs->opaque = NULL;
    bs->drv = NULL;
    return ret;
}

Kevin Wolf's avatar
Kevin Wolf committed
852 853
/*
 * Opens a file using a protocol (file, host_device, nbd, ...)
854 855 856 857 858
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
Kevin Wolf's avatar
Kevin Wolf committed
859
 */
860
int bdrv_file_open(BlockDriverState **pbs, const char *filename,
861 862
                   const char *reference, QDict *options, int flags,
                   Error **errp)
bellard's avatar
bellard committed
863
{
864
    BlockDriverState *bs = NULL;
865
    BlockDriver *drv;
866
    const char *drvname;
867
    bool allow_protocol_prefix = false;
868
    Error *local_err = NULL;
bellard's avatar
bellard committed
869 870
    int ret;

871 872 873 874 875
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
    if (reference) {
        if (filename || qdict_size(options)) {
            error_setg(errp, "Cannot reference an existing block device with "
                       "additional options or a new filename");
            return -EINVAL;
        }
        QDECREF(options);

        bs = bdrv_find(reference);
        if (!bs) {
            error_setg(errp, "Cannot find block device '%s'", reference);
            return -ENODEV;
        }
        bdrv_ref(bs);
        *pbs = bs;
        return 0;
    }

bellard's avatar
bellard committed
894
    bs = bdrv_new("");
895 896 897
    bs->options = options;
    options = qdict_clone_shallow(options);

898 899 900 901 902
    /* Fetch the file name from the options QDict if necessary */
    if (!filename) {
        filename = qdict_get_try_str(options, "filename");
    } else if (filename && !qdict_haskey(options, "filename")) {
        qdict_put(options, "filename", qstring_from_str(filename));
903
        allow_protocol_prefix = true;
904
    } else {
905 906
        error_setg(errp, "Can't specify 'file' and 'filename' options at the "
                   "same time");
907 908 909 910
        ret = -EINVAL;
        goto fail;
    }

911 912 913
    /* Find the right block driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
914
        drv = bdrv_find_format(drvname);
915 916 917
        if (!drv) {
            error_setg(errp, "Unknown driver '%s'", drvname);
        }
918 919
        qdict_del(options, "driver");
    } else if (filename) {
920 921
        drv = bdrv_find_protocol(filename, allow_protocol_prefix);
        if (!drv) {
922
            error_setg(errp, "Unknown protocol");
923
        }
924
    } else {
925
        error_setg(errp, "Must specify either driver or file");
926 927 928 929
        drv = NULL;
    }

    if (!drv) {
930
        /* errp has been set already */
931 932 933 934 935 936
        ret = -ENOENT;
        goto fail;
    }

    /* Parse the filename and open it */
    if (drv->bdrv_parse_filename && filename) {
937 938
        drv->bdrv_parse_filename(filename, options, &local_err);
        if (error_is_set(&local_err)) {
939
            error_propagate(errp, local_err);
940 941 942
            ret = -EINVAL;
            goto fail;
        }
943
        qdict_del(options, "filename");
944
    } else if (drv->bdrv_needs_filename && !filename) {
945 946
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
947 948
        ret = -EINVAL;
        goto fail;
949 950
    }

951
    ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
bellard's avatar
bellard committed
952
    if (ret < 0) {
953
        error_propagate(errp, local_err);
954 955 956 957 958 959
        goto fail;
    }

    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
960 961
        error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
                   drv->format_name, entry->key);
962 963
        ret = -EINVAL;
        goto fail;
964
    }
965 966
    QDECREF(options);

967
    bs->growable = 1;
bellard's avatar
bellard committed
968 969
    *pbs = bs;
    return 0;
970 971 972 973 974 975

fail:
    QDECREF(options);
    if (!bs->drv) {
        QDECREF(bs->options);
    }
976
    bdrv_unref(bs);
977
    return ret;
bellard's avatar
bellard committed
978 979
}

980 981 982 983 984 985 986 987
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
988
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
989 990 991 992
{
    char backing_filename[PATH_MAX];
    int back_flags, ret;
    BlockDriver *back_drv = NULL;
993
    Error *local_err = NULL;
994 995

    if (bs->backing_hd != NULL) {
996
        QDECREF(options);
997 998 999
        return 0;
    }

1000 1001 1002 1003 1004
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

1005
    bs->open_flags &= ~BDRV_O_NO_BACKING;
1006 1007 1008
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1009
        QDECREF(options);
1010
        return 0;
1011 1012 1013
    } else {
        bdrv_get_full_backing_filename(bs, backing_filename,
                                       sizeof(backing_filename));
1014 1015 1016 1017 1018 1019 1020 1021 1022
    }

    bs->backing_hd = bdrv_new("");

    if (bs->backing_format[0] != '\0') {
        back_drv = bdrv_find_format(bs->backing_format);
    }

    /* backing files always opened read-only */
1023 1024
    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
                                    BDRV_O_COPY_ON_READ);
1025

1026 1027
    ret = bdrv_open(bs->backing_hd,
                    *backing_filename ? backing_filename : NULL, options,
1028
                    back_flags, back_drv, &local_err);
1029
    if (ret < 0) {
1030
        bdrv_unref(bs->backing_hd);
1031 1032
        bs->backing_hd = NULL;
        bs->open_flags |= BDRV_O_NO_BACKING;
1033 1034 1035
        error_setg(errp, "Could not open backing file: %s",
                   error_get_pretty(local_err));
        error_free(local_err);
1036 1037
        return ret;
    }
1038 1039
    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
            bs->backing_hd->file->filename);
1040 1041 1042
    return 0;
}

Kevin Wolf's avatar
Kevin Wolf committed
1043 1044
/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
1045 1046 1047 1048 1049
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Kevin Wolf's avatar
Kevin Wolf committed
1050
 */
1051
int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
1052
              int flags, BlockDriver *drv, Error **errp)
bellard's avatar
bellard committed
1053
{
Kevin Wolf's avatar
Kevin Wolf committed
1054
    int ret;
1055 1056
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char tmp_filename[PATH_MAX + 1];
1057
    BlockDriverState *file = NULL;
1058
    QDict *file_options = NULL;
1059
    const char *file_reference;
1060
    const char *drvname;
1061
    Error *local_err = NULL;
bellard's avatar
bellard committed
1062

1063 1064 1065 1066 1067 1068
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

    bs->options = options;
1069
    options = qdict_clone_shallow(options);
1070 1071

    /* For snapshot=on, create a temporary qcow2 overlay */
bellard's avatar
bellard committed
1072
    if (flags & BDRV_O_SNAPSHOT) {
bellard's avatar
bellard committed
1073 1074
        BlockDriverState *bs1;
        int64_t total_size;
Kevin Wolf's avatar
Kevin Wolf committed
1075
        BlockDriver *bdrv_qcow2;
1076
        QEMUOptionParameter *create_options;
1077
        QDict *snapshot_options;
1078

bellard's avatar
bellard committed
1079 1080
        /* if snapshot, we create a temporary backing file and open it
           instead of opening 'filename' directly */
1081

1082
        /* Get the required size from the image */
bellard's avatar
bellard committed
1083
        bs1 = bdrv_new("");
1084
        QINCREF(options);
1085 1086
        ret = bdrv_open(bs1, filename, options, BDRV_O_NO_BACKING,
                        drv, &local_err);
1087
        if (ret < 0) {
1088
            bdrv_unref(bs1);
1089
            goto fail;
bellard's avatar
bellard committed
1090
        }
1091
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
1092

1093
        bdrv_unref(bs1);
1094

1095
        /* Create the temporary image */
1096 1097
        ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
        if (ret < 0) {
1098
            error_setg_errno(errp, -ret, "Could not get temporary filename");
1099
            goto fail;
1100
        }
1101

Kevin Wolf's avatar
Kevin Wolf committed
1102
        bdrv_qcow2 = bdrv_find_format("qcow2");
1103 1104
        create_options = parse_option_parameters("", bdrv_qcow2->create_options,
                                                 NULL);
Kevin Wolf's avatar
Kevin Wolf committed
1105

1106
        set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf's avatar
Kevin Wolf committed
1107

1108
        ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
1109
        free_option_parameters(create_options);
1110
        if (ret < 0) {
1111
            error_setg_errno(errp, -ret, "Could not create temporary overlay "
1112 1113 1114 1115
                             "'%s': %s", tmp_filename,
                             error_get_pretty(local_err));
            error_free(local_err);
            local_err = NULL;
1116
            goto fail;
bellard's avatar
bellard committed
1117
        }
Kevin Wolf's avatar
Kevin Wolf committed
1118

1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
        /* Prepare a new options QDict for the temporary file, where user
         * options refer to the backing file */
        if (filename) {
            qdict_put(options, "file.filename", qstring_from_str(filename));
        }
        if (drv) {
            qdict_put(options, "driver", qstring_from_str(drv->format_name));
        }

        snapshot_options = qdict_new();
        qdict_put(snapshot_options, "backing", options);
        qdict_flatten(snapshot_options);

        bs->options = snapshot_options;
        options = qdict_clone_shallow(bs->options);

bellard's avatar
bellard committed
1135
        filename = tmp_filename;
Kevin Wolf's avatar
Kevin Wolf committed
1136
        drv = bdrv_qcow2;
bellard's avatar
bellard committed
1137 1138
        bs->is_temporary = 1;
    }
bellard's avatar
bellard committed
1139

1140 1141 1142 1143 1144
    /* Open image file without format layer */
    if (flags & BDRV_O_RDWR) {
        flags |= BDRV_O_ALLOW_RDWR;
    }

1145
    qdict_extract_subqdict(options, &file_options, "file.");
1146
    file_reference = qdict_get_try_str(options, "file");
1147

1148 1149 1150 1151 1152 1153 1154 1155
    if (filename || file_reference || qdict_size(file_options)) {
        ret = bdrv_file_open(&file, filename, file_reference, file_options,
                             bdrv_open_flags(bs, flags | BDRV_O_UNMAP),
                             &local_err);
        qdict_del(options, "file");
        if (ret < 0) {
            goto fail;
        }
1156 1157
    }

Kevin Wolf's avatar
Kevin Wolf committed
1158
    /* Find the right image format driver */
1159 1160
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
1161
        drv = bdrv_find_format(drvname);
1162
        qdict_del(options, "driver");
1163 1164 1165 1166 1167
        if (!drv) {
            error_setg(errp, "Invalid driver: '%s'", drvname);
            ret = -EINVAL;
            goto unlink_and_fail;
        }
1168 1169
    }

1170
    if (!drv) {
1171 1172 1173 1174 1175 1176 1177
        if (file) {
            ret = find_image_format(file, filename, &drv, &local_err);
        } else {
            error_setg(errp, "Must specify either driver or file");
            ret = -EINVAL;
            goto unlink_and_fail;
        }
1178
    }
1179

1180 1181
    if (!drv) {
        goto unlink_and_fail;
bellard's avatar
bellard committed
1182
    }
Kevin Wolf's avatar
Kevin Wolf committed
1183 1184

    /* Open the image */
1185
    ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolf's avatar
Kevin Wolf committed
1186
    if (ret < 0) {
1187 1188 1189
        goto unlink_and_fail;
    }

1190
    if (file && (bs->file != file)) {
1191
        bdrv_unref(file);
1192 1193 1194
        file = NULL;
    }

Kevin Wolf's avatar
Kevin Wolf committed
1195
    /* If there is a backing file, use it */
1196
    if ((flags & BDRV_O_NO_BACKING) == 0) {
1197 1198
        QDict *backing_options;

1199
        qdict_extract_subqdict(options, &backing_options, "backing.");
1200
        ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolf's avatar
Kevin Wolf committed
1201
        if (ret < 0) {
1202
            goto close_and_fail;
Kevin Wolf's avatar
Kevin Wolf committed
1203 1204 1205
        }
    }

1206 1207 1208
    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
1209 1210 1211
        error_setg(errp, "Block format '%s' used by device '%s' doesn't "
                   "support the option '%s'", drv->format_name, bs->device_name,
                   entry->key);
1212 1213 1214 1215 1216 1217

        ret = -EINVAL;
        goto close_and_fail;
    }
    QDECREF(options);

Kevin Wolf's avatar
Kevin Wolf committed
1218
    if (!bdrv_key_required(bs)) {
1219
        bdrv_dev_change_media_cb(bs, true);
Kevin Wolf's avatar
Kevin Wolf committed
1220 1221 1222 1223 1224
    }

    return 0;

unlink_and_fail:
1225
    if (file != NULL) {
1226
        bdrv_unref(file);
1227
    }
Kevin Wolf's avatar
Kevin Wolf committed
1228 1229 1230
    if (bs->is_temporary) {
        unlink(filename);
    }
1231 1232
fail:
    QDECREF(bs->options);
1233
    QDECREF(options);
1234
    bs->options = NULL;
1235 1236 1237
    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
    }
1238
    return ret;
1239

1240 1241 1242
close_and_fail:
    bdrv_close(bs);
    QDECREF(options);
1243 1244 1245
    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
    }
Kevin Wolf's avatar
Kevin Wolf committed
1246 1247 1248
    return ret;
}

1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 <