block.c 125 KB
Newer Older
bellard's avatar
bellard committed
1 2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor/monitor.h"
28 29
#include "block/block_int.h"
#include "block/blockjob.h"
30
#include "qemu/module.h"
31
#include "qapi/qmp/qjson.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
Luiz Capitulino's avatar
Luiz Capitulino committed
35
#include "qmp-commands.h"
36
#include "qemu/timer.h"
bellard's avatar
bellard committed
37

Juan Quintela's avatar
Juan Quintela committed
38
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
39 40 41
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
Blue Swirl's avatar
Blue Swirl committed
42
#include <sys/queue.h>
43
#ifndef __DragonFly__
bellard's avatar
bellard committed
44 45
#include <sys/disk.h>
#endif
46
#endif
bellard's avatar
bellard committed
47

48 49 50 51
#ifdef _WIN32
#include <windows.h>
#endif

52 53
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

54 55
typedef enum {
    BDRV_REQ_COPY_ON_READ = 0x1,
56
    BDRV_REQ_ZERO_WRITE   = 0x2,
57 58
} BdrvRequestFlags;

59
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60 61
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62
        BlockDriverCompletionFunc *cb, void *opaque);
63 64
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65
        BlockDriverCompletionFunc *cb, void *opaque);
66 67 68 69 70 71
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
72
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73 74
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
75
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76 77
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
78 79 80 81 82 83
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                               int64_t sector_num,
                                               QEMUIOVector *qiov,
                                               int nb_sectors,
                                               BlockDriverCompletionFunc *cb,
                                               void *opaque,
84
                                               bool is_write);
85
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
86 87
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors);
bellard's avatar
bellard committed
88

89 90 91 92 93 94 95
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
        bool is_write, double elapsed_time, uint64_t *wait);
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
        double elapsed_time, uint64_t *wait);
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
        bool is_write, int64_t *wait);

96 97
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
98

99 100
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
101

102 103 104
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

125
/* throttling disk I/O limits */
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
void bdrv_io_limits_disable(BlockDriverState *bs)
{
    bs->io_limits_enabled = false;

    while (qemu_co_queue_next(&bs->throttled_reqs));

    if (bs->block_timer) {
        qemu_del_timer(bs->block_timer);
        qemu_free_timer(bs->block_timer);
        bs->block_timer = NULL;
    }

    bs->slice_start = 0;
    bs->slice_end   = 0;
}

142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
static void bdrv_block_timer(void *opaque)
{
    BlockDriverState *bs = opaque;

    qemu_co_queue_next(&bs->throttled_reqs);
}

void bdrv_io_limits_enable(BlockDriverState *bs)
{
    qemu_co_queue_init(&bs->throttled_reqs);
    bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
    bs->io_limits_enabled = true;
}

bool bdrv_io_limits_enabled(BlockDriverState *bs)
{
    BlockIOLimit *io_limits = &bs->io_limits;
    return io_limits->bps[BLOCK_IO_LIMIT_READ]
         || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
         || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
         || io_limits->iops[BLOCK_IO_LIMIT_READ]
         || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
         || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
static void bdrv_io_limits_intercept(BlockDriverState *bs,
                                     bool is_write, int nb_sectors)
{
    int64_t wait_time = -1;

    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
        qemu_co_queue_wait(&bs->throttled_reqs);
    }

    /* In fact, we hope to keep each request's timing, in FIFO mode. The next
     * throttled requests will not be dequeued until the current request is
     * allowed to be serviced. So if the current request still exceeds the
     * limits, it will be inserted to the head. All requests followed it will
     * be still in throttled_reqs queue.
     */

    while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
        qemu_mod_timer(bs->block_timer,
                       wait_time + qemu_get_clock_ns(vm_clock));
        qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
    }

    qemu_co_queue_next(&bs->throttled_reqs);
}

192 193 194
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
195 196
    const char *p;

197 198 199 200 201
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
202 203 204
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
205 206
#endif

207
    return *p == ':';
208 209
}

bellard's avatar
bellard committed
210
int path_is_absolute(const char *path)
211
{
bellard's avatar
bellard committed
212 213
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
214
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard's avatar
bellard committed
215
        return 1;
216 217
    }
    return (*path == '/' || *path == '\\');
218
#else
219
    return (*path == '/');
220
#endif
221 222
}

bellard's avatar
bellard committed
223 224 225 226 227 228
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
229
{
bellard's avatar
bellard committed
230 231 232 233 234 235 236 237 238 239 240 241 242
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
243 244 245 246 247 248 249 250 251
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
252 253 254 255 256 257 258 259 260 261 262 263
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
264 265 266
    }
}

267 268 269 270 271 272 273 274 275
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

276
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
277
{
278 279
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
280 281 282
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

283 284 285
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
286 287 288 289 290
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
291
    }
292

293
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
294
}
bellard's avatar
bellard committed
295 296 297 298

/* create a new block device (by default it is empty) */
BlockDriverState *bdrv_new(const char *device_name)
{
299
    BlockDriverState *bs;
bellard's avatar
bellard committed
300

301
    bs = g_malloc0(sizeof(BlockDriverState));
bellard's avatar
bellard committed
302
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellard's avatar
bellard committed
303
    if (device_name[0] != '\0') {
304
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellard's avatar
bellard committed
305
    }
306
    bdrv_iostatus_disable(bs);
Paolo Bonzini's avatar
Paolo Bonzini committed
307 308
    notifier_list_init(&bs->close_notifiers);

bellard's avatar
bellard committed
309 310 311
    return bs;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
312 313 314 315 316
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
317 318 319
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
320 321
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
322
            return drv1;
323
        }
bellard's avatar
bellard committed
324 325 326 327
    }
    return NULL;
}

328
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
329
{
330 331 332 333 334
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
335 336 337
    };
    const char **p;

338
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
339
        return 1;               /* no whitelist, anything goes */
340
    }
341

342
    for (p = whitelist_rw; *p; p++) {
343 344 345 346
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
347 348 349 350 351 352 353
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
354 355 356
    return 0;
}

357 358
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
359 360
{
    BlockDriver *drv = bdrv_find_format(format_name);
361
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
362 363
}

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
    QEMUOptionParameter *options;
    int ret;
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
    CreateCo *cco = opaque;
    assert(cco->drv);

    cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
}

379 380
int bdrv_create(BlockDriver *drv, const char* filename,
    QEMUOptionParameter *options)
bellard's avatar
bellard committed
381
{
382 383 384 385 386 387 388 389 390 391 392
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
        .options = options,
        .ret = NOT_DONE,
    };

    if (!drv->bdrv_create) {
393 394
        ret = -ENOTSUP;
        goto out;
395 396 397 398 399 400 401 402 403 404 405 406 407 408
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
            qemu_aio_wait();
        }
    }

    ret = cco.ret;
409

410 411
out:
    g_free(cco.filename);
412
    return ret;
bellard's avatar
bellard committed
413 414
}

415 416 417 418
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
{
    BlockDriver *drv;

419
    drv = bdrv_find_protocol(filename);
420
    if (drv == NULL) {
421
        return -ENOENT;
422 423 424 425 426
    }

    return bdrv_create(drv, filename, options);
}

427 428 429 430 431
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
432
{
433
#ifdef _WIN32
434
    char temp_dir[MAX_PATH];
435 436 437 438 439 440
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
441
#else
bellard's avatar
bellard committed
442
    int fd;
443
    const char *tmpdir;
aurel32's avatar
aurel32 committed
444 445 446
    tmpdir = getenv("TMPDIR");
    if (!tmpdir)
        tmpdir = "/tmp";
447 448 449
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
450
    fd = mkstemp(filename);
451 452 453 454 455
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
456 457 458
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
459
#endif
460
}
bellard's avatar
bellard committed
461

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

484
BlockDriver *bdrv_find_protocol(const char *filename)
bellard's avatar
bellard committed
485 486 487
{
    BlockDriver *drv1;
    char protocol[128];
488
    int len;
bellard's avatar
bellard committed
489
    const char *p;
bellard's avatar
bellard committed
490

491 492
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

493 494 495 496 497 498 499 500 501 502 503 504
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

505
    if (!path_has_protocol(filename)) {
506
        return bdrv_find_format("file");
507
    }
508 509
    p = strchr(filename, ':');
    assert(p != NULL);
510 511 512 513 514
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
515
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
516
        if (drv1->protocol_name &&
517
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
518
            return drv1;
519
        }
bellard's avatar
bellard committed
520 521 522 523
    }
    return NULL;
}

524 525
static int find_image_format(BlockDriverState *bs, const char *filename,
                             BlockDriver **pdrv)
526
{
527
    int score, score_max;
528 529
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
530
    int ret = 0;
531

532
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
533
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
534 535 536 537 538 539
        drv = bdrv_find_format("raw");
        if (!drv) {
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
540
    }
541

bellard's avatar
bellard committed
542 543
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
544 545
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
546 547
    }

bellard's avatar
bellard committed
548
    score_max = 0;
549
    drv = NULL;
550
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
551 552 553 554 555 556
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
557
        }
bellard's avatar
bellard committed
558
    }
559 560 561 562 563
    if (!drv) {
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
564 565
}

566 567 568 569 570 571 572
/**
 * Set the current 'total_sectors' value
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

573 574 575 576
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

577 578 579 580 581 582 583 584 585 586 587 588 589
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
        hint = length >> BDRV_SECTOR_BITS;
    }

    bs->total_sectors = hint;
    return 0;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

610 611 612 613 614 615 616 617 618 619 620
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
621 622
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
623 624 625 626 627 628 629 630 631 632 633 634 635 636
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

Kevin Wolf's avatar
Kevin Wolf committed
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

    /*
     * Snapshots should be writable.
     */
    if (bs->is_temporary) {
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

673 674
/*
 * Common part for opening disk images and files
675 676
 *
 * Removes all processed options from *options.
677
 */
678
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Kevin Wolf's avatar
Kevin Wolf committed
679
    QDict *options, int flags, BlockDriver *drv)
680 681
{
    int ret, open_flags;
Kevin Wolf's avatar
Kevin Wolf committed
682
    const char *filename;
683 684

    assert(drv != NULL);
685
    assert(bs->file == NULL);
686
    assert(options != NULL && bs->options != options);
687

688 689 690 691 692 693 694
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
695

696 697 698 699 700 701 702 703
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

704 705
    bs->open_flags = flags;
    bs->buffer_alignment = 512;
706 707 708 709 710 711
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
        return -ENOTSUP;
    }
712

713
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
714
    if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
715 716 717
        bdrv_enable_copy_on_read(bs);
    }

718 719 720 721 722
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
723 724

    bs->drv = drv;
725
    bs->opaque = g_malloc0(drv->instance_size);
726

727
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
728

729 730
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
731 732
        assert(file == NULL);
        assert(drv->bdrv_parse_filename || filename != NULL);
733
        ret = drv->bdrv_file_open(bs, options, open_flags);
734
    } else {
735 736 737 738 739 740 741
        if (file == NULL) {
            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
                          "block driver for the protocol level",
                          drv->format_name);
            ret = -EINVAL;
            goto free_and_fail;
        }
742 743
        assert(file != NULL);
        bs->file = file;
744
        ret = drv->bdrv_open(bs, options, open_flags);
745 746
    }

747 748 749 750
    if (ret < 0) {
        goto free_and_fail;
    }

751 752 753
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        goto free_and_fail;
754
    }
755

756 757
#ifndef _WIN32
    if (bs->is_temporary) {
758
        assert(filename != NULL);
759 760 761 762 763 764
        unlink(filename);
    }
#endif
    return 0;

free_and_fail:
765
    bs->file = NULL;
766
    g_free(bs->opaque);
767 768 769 770 771
    bs->opaque = NULL;
    bs->drv = NULL;
    return ret;
}

Kevin Wolf's avatar
Kevin Wolf committed
772 773
/*
 * Opens a file using a protocol (file, host_device, nbd, ...)
774 775 776 777 778
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
Kevin Wolf's avatar
Kevin Wolf committed
779
 */
780 781
int bdrv_file_open(BlockDriverState **pbs, const char *filename,
                   QDict *options, int flags)
bellard's avatar
bellard committed
782
{
bellard's avatar
bellard committed
783
    BlockDriverState *bs;
784
    BlockDriver *drv;
785
    const char *drvname;
bellard's avatar
bellard committed
786 787
    int ret;

788 789 790 791 792
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

bellard's avatar
bellard committed
793
    bs = bdrv_new("");
794 795 796
    bs->options = options;
    options = qdict_clone_shallow(options);

Kevin Wolf's avatar
Kevin Wolf committed
797 798 799 800 801 802 803 804 805 806 807 808
    /* Fetch the file name from the options QDict if necessary */
    if (!filename) {
        filename = qdict_get_try_str(options, "filename");
    } else if (filename && !qdict_haskey(options, "filename")) {
        qdict_put(options, "filename", qstring_from_str(filename));
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
                      "'filename' options at the same time");
        ret = -EINVAL;
        goto fail;
    }

809 810 811
    /* Find the right block driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
812
        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
        qdict_del(options, "driver");
    } else if (filename) {
        drv = bdrv_find_protocol(filename);
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "Must specify either driver or file");
        drv = NULL;
    }

    if (!drv) {
        ret = -ENOENT;
        goto fail;
    }

    /* Parse the filename and open it */
    if (drv->bdrv_parse_filename && filename) {
829 830 831 832 833 834 835 836
        Error *local_err = NULL;
        drv->bdrv_parse_filename(filename, options, &local_err);
        if (error_is_set(&local_err)) {
            qerror_report_err(local_err);
            error_free(local_err);
            ret = -EINVAL;
            goto fail;
        }
837
        qdict_del(options, "filename");
838 839 840 841 842 843
    } else if (!drv->bdrv_parse_filename && !filename) {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "The '%s' block driver requires a file name",
                      drv->format_name);
        ret = -EINVAL;
        goto fail;
844 845
    }

Kevin Wolf's avatar
Kevin Wolf committed
846
    ret = bdrv_open_common(bs, NULL, options, flags, drv);
bellard's avatar
bellard committed
847
    if (ret < 0) {
848 849 850 851 852 853 854 855 856 857 858
        goto fail;
    }

    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
                      "support the option '%s'",
                      drv->format_name, entry->key);
        ret = -EINVAL;
        goto fail;
859
    }
860 861
    QDECREF(options);

862
    bs->growable = 1;
bellard's avatar
bellard committed
863 864
    *pbs = bs;
    return 0;
865 866 867 868 869 870 871 872

fail:
    QDECREF(options);
    if (!bs->drv) {
        QDECREF(bs->options);
    }
    bdrv_delete(bs);
    return ret;
bellard's avatar
bellard committed
873 874
}

875 876 877 878 879 880 881 882 883
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
884 885 886 887 888 889
{
    char backing_filename[PATH_MAX];
    int back_flags, ret;
    BlockDriver *back_drv = NULL;

    if (bs->backing_hd != NULL) {
890
        QDECREF(options);
891 892 893
        return 0;
    }

894 895 896 897 898
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

899
    bs->open_flags &= ~BDRV_O_NO_BACKING;
900 901 902
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
903
        QDECREF(options);
904 905 906 907 908 909 910 911 912 913 914 915 916 917
        return 0;
    }

    bs->backing_hd = bdrv_new("");
    bdrv_get_full_backing_filename(bs, backing_filename,
                                   sizeof(backing_filename));

    if (bs->backing_format[0] != '\0') {
        back_drv = bdrv_find_format(bs->backing_format);
    }

    /* backing files always opened read-only */
    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);

918 919
    ret = bdrv_open(bs->backing_hd,
                    *backing_filename ? backing_filename : NULL, options,
920
                    back_flags, back_drv);
921 922 923 924 925 926 927 928 929
    if (ret < 0) {
        bdrv_delete(bs->backing_hd);
        bs->backing_hd = NULL;
        bs->open_flags |= BDRV_O_NO_BACKING;
        return ret;
    }
    return 0;
}

930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
static void extract_subqdict(QDict *src, QDict **dst, const char *start)
{
    const QDictEntry *entry, *next;
    const char *p;

    *dst = qdict_new();
    entry = qdict_first(src);

    while (entry != NULL) {
        next = qdict_next(src, entry);
        if (strstart(entry->key, start, &p)) {
            qobject_incref(entry->value);
            qdict_put_obj(*dst, p, entry->value);
            qdict_del(src, entry->key);
        }
        entry = next;
    }
}

Kevin Wolf's avatar
Kevin Wolf committed
949 950
/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
951 952 953 954 955
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Kevin Wolf's avatar
Kevin Wolf committed
956
 */
957 958
int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
              int flags, BlockDriver *drv)
bellard's avatar
bellard committed
959
{
Kevin Wolf's avatar
Kevin Wolf committed
960
    int ret;
961 962
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char tmp_filename[PATH_MAX + 1];
963
    BlockDriverState *file = NULL;
964
    QDict *file_options = NULL;
bellard's avatar
bellard committed
965

966 967 968 969 970 971
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

    bs->options = options;
972
    options = qdict_clone_shallow(options);
973 974

    /* For snapshot=on, create a temporary qcow2 overlay */
bellard's avatar
bellard committed
975
    if (flags & BDRV_O_SNAPSHOT) {
bellard's avatar
bellard committed
976 977
        BlockDriverState *bs1;
        int64_t total_size;
Kevin Wolf's avatar
Kevin Wolf committed
978
        BlockDriver *bdrv_qcow2;
979
        QEMUOptionParameter *create_options;
Kevin Wolf's avatar
Kevin Wolf committed
980
        char backing_filename[PATH_MAX];
981

982 983 984 985 986 987 988
        if (qdict_size(options) != 0) {
            error_report("Can't use snapshot=on with driver-specific options");
            ret = -EINVAL;
            goto fail;
        }
        assert(filename != NULL);

bellard's avatar
bellard committed
989 990
        /* if snapshot, we create a temporary backing file and open it
           instead of opening 'filename' directly */
991

bellard's avatar
bellard committed
992 993
        /* if there is a backing file, use it */
        bs1 = bdrv_new("");
994
        ret = bdrv_open(bs1, filename, NULL, 0, drv);
995
        if (ret < 0) {
bellard's avatar
bellard committed
996
            bdrv_delete(bs1);
997
            goto fail;
bellard's avatar
bellard committed
998
        }
999
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori's avatar
aliguori committed
1000

bellard's avatar
bellard committed
1001
        bdrv_delete(bs1);
1002

1003 1004
        ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
        if (ret < 0) {
1005
            goto fail;
1006
        }
aliguori's avatar
aliguori committed
1007 1008

        /* Real path is meaningless for protocols */
1009
        if (path_has_protocol(filename)) {
aliguori's avatar
aliguori committed
1010 1011
            snprintf(backing_filename, sizeof(backing_filename),
                     "%s", filename);
1012 1013 1014 1015
        } else if (!realpath(filename, backing_filename)) {
            ret = -errno;
            goto fail;
        }
aliguori's avatar
aliguori committed
1016

Kevin Wolf's avatar
Kevin Wolf committed
1017
        bdrv_qcow2 = bdrv_find_format("qcow2");
1018 1019
        create_options = parse_option_parameters("", bdrv_qcow2->create_options,
                                                 NULL);
Kevin Wolf's avatar
Kevin Wolf committed
1020

1021 1022 1023
        set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
        set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
                             backing_filename);
Kevin Wolf's avatar
Kevin Wolf committed
1024
        if (drv) {
1025
            set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
Kevin Wolf's avatar
Kevin Wolf committed
1026 1027 1028
                drv->format_name);
        }

1029 1030
        ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
        free_option_parameters(create_options);
1031
        if (ret < 0) {
1032
            goto fail;
bellard's avatar
bellard committed
1033
        }
Kevin Wolf's avatar
Kevin Wolf committed
1034

bellard's avatar
bellard committed
1035
        filename = tmp_filename;
Kevin Wolf's avatar
Kevin Wolf committed
1036
        drv = bdrv_qcow2;
bellard's avatar
bellard committed
1037 1038
        bs->is_temporary = 1;
    }
bellard's avatar
bellard committed
1039

1040 1041 1042