block.c 137 KB
Newer Older
bellard's avatar
bellard committed
1 2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor/monitor.h"
28 29
#include "block/block_int.h"
#include "block/blockjob.h"
30
#include "qemu/module.h"
31
#include "qapi/qmp/qjson.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
Luiz Capitulino's avatar
Luiz Capitulino committed
35
#include "qmp-commands.h"
36
#include "qemu/timer.h"
bellard's avatar
bellard committed
37

38
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
39 40 41
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
42
#include <sys/queue.h>
43
#ifndef __DragonFly__
bellard's avatar
bellard committed
44 45
#include <sys/disk.h>
#endif
46
#endif
bellard's avatar
bellard committed
47

48 49 50 51
#ifdef _WIN32
#include <windows.h>
#endif

Fam Zheng's avatar
Fam Zheng committed
52 53 54 55 56
struct BdrvDirtyBitmap {
    HBitmap *bitmap;
    QLIST_ENTRY(BdrvDirtyBitmap) list;
};

57 58
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

59
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60 61
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62
        BlockDriverCompletionFunc *cb, void *opaque);
63 64
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65
        BlockDriverCompletionFunc *cb, void *opaque);
66 67 68 69 70 71
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
72
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73 74
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
75
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76 77
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
78 79 80 81
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                               int64_t sector_num,
                                               QEMUIOVector *qiov,
                                               int nb_sectors,
82
                                               BdrvRequestFlags flags,
83 84
                                               BlockDriverCompletionFunc *cb,
                                               void *opaque,
85
                                               bool is_write);
86
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
87
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
88
    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellard's avatar
bellard committed
89

90 91
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
92

93 94
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
95

96 97 98
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

119
/* throttling disk I/O limits */
120 121
void bdrv_set_io_limits(BlockDriverState *bs,
                        ThrottleConfig *cfg)
122
{
123
    int i;
124

125
    throttle_config(&bs->throttle_state, cfg);
126

127 128
    for (i = 0; i < 2; i++) {
        qemu_co_enter_next(&bs->throttled_reqs[i]);
129
    }
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
}

/* this function drain all the throttled IOs */
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
{
    bool drained = false;
    bool enabled = bs->io_limits_enabled;
    int i;

    bs->io_limits_enabled = false;

    for (i = 0; i < 2; i++) {
        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
            drained = true;
        }
    }

    bs->io_limits_enabled = enabled;
148

149
    return drained;
150 151
}

152
void bdrv_io_limits_disable(BlockDriverState *bs)
153
{
154
    bs->io_limits_enabled = false;
155

156 157 158
    bdrv_start_throttled_reqs(bs);

    throttle_destroy(&bs->throttle_state);
159 160
}

161
static void bdrv_throttle_read_timer_cb(void *opaque)
162
{
163 164
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[0]);
165 166
}

167
static void bdrv_throttle_write_timer_cb(void *opaque)
168
{
169 170
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[1]);
171 172
}

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs)
{
    assert(!bs->io_limits_enabled);
    throttle_init(&bs->throttle_state,
                  QEMU_CLOCK_VIRTUAL,
                  bdrv_throttle_read_timer_cb,
                  bdrv_throttle_write_timer_cb,
                  bs);
    bs->io_limits_enabled = true;
}

/* This function makes an IO wait if needed
 *
 * @nb_sectors: the number of sectors of the IO
 * @is_write:   is the IO a write
 */
190
static void bdrv_io_limits_intercept(BlockDriverState *bs,
191 192
                                     int nb_sectors,
                                     bool is_write)
193
{
194 195
    /* does this io must wait */
    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
196

197 198 199 200
    /* if must wait or any request of this type throttled queue the IO */
    if (must_wait ||
        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
201 202
    }

203 204 205 206
    /* the IO will be executed, do the accounting */
    throttle_account(&bs->throttle_state,
                     is_write,
                     nb_sectors * BDRV_SECTOR_SIZE);
207

208 209 210
    /* if the next request must wait -> do nothing */
    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
        return;
211 212
    }

213 214
    /* else queue next request for execution */
    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
215 216
}

217 218 219
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
220 221
    const char *p;

222 223 224 225 226
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
227 228 229
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
230 231
#endif

232
    return *p == ':';
233 234
}

bellard's avatar
bellard committed
235
int path_is_absolute(const char *path)
236
{
bellard's avatar
bellard committed
237 238
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
239
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard's avatar
bellard committed
240
        return 1;
241 242
    }
    return (*path == '/' || *path == '\\');
243
#else
244
    return (*path == '/');
245
#endif
246 247
}

bellard's avatar
bellard committed
248 249 250 251 252 253
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
254
{
bellard's avatar
bellard committed
255 256 257 258 259 260 261 262 263 264 265 266 267
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
268 269 270 271 272 273 274 275 276
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
277 278 279 280 281 282 283 284 285 286 287 288
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
289 290 291
    }
}

292 293 294 295 296 297 298 299 300
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

301
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
302
{
303 304
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
305 306 307
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

308 309 310
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
311 312 313 314 315
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
316
    }
317

318
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
319
}
bellard's avatar
bellard committed
320 321 322 323

/* create a new block device (by default it is empty) */
BlockDriverState *bdrv_new(const char *device_name)
{
324
    BlockDriverState *bs;
bellard's avatar
bellard committed
325

326
    bs = g_malloc0(sizeof(BlockDriverState));
Fam Zheng's avatar
Fam Zheng committed
327
    QLIST_INIT(&bs->dirty_bitmaps);
bellard's avatar
bellard committed
328
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellard's avatar
bellard committed
329
    if (device_name[0] != '\0') {
330
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellard's avatar
bellard committed
331
    }
332
    bdrv_iostatus_disable(bs);
Paolo Bonzini's avatar
Paolo Bonzini committed
333
    notifier_list_init(&bs->close_notifiers);
334
    notifier_with_return_list_init(&bs->before_write_notifiers);
335 336
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
337
    bs->refcnt = 1;
Paolo Bonzini's avatar
Paolo Bonzini committed
338

bellard's avatar
bellard committed
339 340 341
    return bs;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
342 343 344 345 346
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
347 348 349
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
350 351
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
352
            return drv1;
353
        }
bellard's avatar
bellard committed
354 355 356 357
    }
    return NULL;
}

358
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
359
{
360 361 362 363 364
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
365 366 367
    };
    const char **p;

368
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
369
        return 1;               /* no whitelist, anything goes */
370
    }
371

372
    for (p = whitelist_rw; *p; p++) {
373 374 375 376
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
377 378 379 380 381 382 383
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
384 385 386
    return 0;
}

387 388
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
389 390
{
    BlockDriver *drv = bdrv_find_format(format_name);
391
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
392 393
}

394 395 396 397 398
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
    QEMUOptionParameter *options;
    int ret;
399
    Error *err;
400 401 402 403
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
404 405 406
    Error *local_err = NULL;
    int ret;

407 408 409
    CreateCo *cco = opaque;
    assert(cco->drv);

410 411 412 413 414
    ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
    if (error_is_set(&local_err)) {
        error_propagate(&cco->err, local_err);
    }
    cco->ret = ret;
415 416
}

417
int bdrv_create(BlockDriver *drv, const char* filename,
418
    QEMUOptionParameter *options, Error **errp)
bellard's avatar
bellard committed
419
{
420 421 422 423 424 425 426 427
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
        .options = options,
        .ret = NOT_DONE,
428
        .err = NULL,
429 430 431
    };

    if (!drv->bdrv_create) {
432
        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
433 434
        ret = -ENOTSUP;
        goto out;
435 436 437 438 439 440 441 442 443 444 445 446 447 448
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
            qemu_aio_wait();
        }
    }

    ret = cco.ret;
449 450 451 452 453 454 455
    if (ret < 0) {
        if (error_is_set(&cco.err)) {
            error_propagate(errp, cco.err);
        } else {
            error_setg_errno(errp, -ret, "Could not create image");
        }
    }
456

457 458
out:
    g_free(cco.filename);
459
    return ret;
bellard's avatar
bellard committed
460 461
}

462 463
int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
                     Error **errp)
464 465
{
    BlockDriver *drv;
466 467
    Error *local_err = NULL;
    int ret;
468

469
    drv = bdrv_find_protocol(filename, true);
470
    if (drv == NULL) {
471
        error_setg(errp, "Could not find protocol for file '%s'", filename);
472
        return -ENOENT;
473 474
    }

475 476 477 478 479
    ret = bdrv_create(drv, filename, options, &local_err);
    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
    }
    return ret;
480 481
}

482 483 484 485 486
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
487
{
488
#ifdef _WIN32
489
    char temp_dir[MAX_PATH];
490 491 492 493 494 495
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
496
#else
bellard's avatar
bellard committed
497
    int fd;
498
    const char *tmpdir;
aurel32's avatar
aurel32 committed
499 500 501
    tmpdir = getenv("TMPDIR");
    if (!tmpdir)
        tmpdir = "/tmp";
502 503 504
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
505
    fd = mkstemp(filename);
506 507 508 509 510
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
511 512 513
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
514
#endif
515
}
bellard's avatar
bellard committed
516

517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

539 540
BlockDriver *bdrv_find_protocol(const char *filename,
                                bool allow_protocol_prefix)
bellard's avatar
bellard committed
541 542 543
{
    BlockDriver *drv1;
    char protocol[128];
544
    int len;
bellard's avatar
bellard committed
545
    const char *p;
bellard's avatar
bellard committed
546

547 548
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

549 550 551 552 553 554 555 556 557 558 559 560
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

561
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
562
        return bdrv_find_format("file");
563
    }
564

565 566
    p = strchr(filename, ':');
    assert(p != NULL);
567 568 569 570 571
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
572
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
573
        if (drv1->protocol_name &&
574
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
575
            return drv1;
576
        }
bellard's avatar
bellard committed
577 578 579 580
    }
    return NULL;
}

581
static int find_image_format(BlockDriverState *bs, const char *filename,
582
                             BlockDriver **pdrv, Error **errp)
583
{
584
    int score, score_max;
585 586
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
587
    int ret = 0;
588

589
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
590
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
591 592
        drv = bdrv_find_format("raw");
        if (!drv) {
593
            error_setg(errp, "Could not find raw image format");
594 595 596 597
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
598
    }
599

bellard's avatar
bellard committed
600 601
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
602 603
        error_setg_errno(errp, -ret, "Could not read image for determining its "
                         "format");
604 605
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
606 607
    }

bellard's avatar
bellard committed
608
    score_max = 0;
609
    drv = NULL;
610
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
611 612 613 614 615 616
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
617
        }
bellard's avatar
bellard committed
618
    }
619
    if (!drv) {
620 621
        error_setg(errp, "Could not determine image format: No compatible "
                   "driver found");
622 623 624 625
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
626 627
}

628 629 630 631 632 633 634
/**
 * Set the current 'total_sectors' value
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

635 636 637 638
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

639 640 641 642 643 644
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
Fam Zheng's avatar
Fam Zheng committed
645
        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
646 647 648 649 650 651
    }

    bs->total_sectors = hint;
    return 0;
}

652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

672 673 674 675 676 677 678 679 680 681 682
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
683 684
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
685 686 687 688 689 690 691 692 693 694 695 696 697 698
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

    /*
     * Snapshots should be writable.
     */
    if (bs->is_temporary) {
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

735 736
/*
 * Common part for opening disk images and files
737 738
 *
 * Removes all processed options from *options.
739
 */
740
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
741
    QDict *options, int flags, BlockDriver *drv, Error **errp)
742 743
{
    int ret, open_flags;
Kevin Wolf's avatar
Kevin Wolf committed
744
    const char *filename;
745
    Error *local_err = NULL;
746 747

    assert(drv != NULL);
748
    assert(bs->file == NULL);
749
    assert(options != NULL && bs->options != options);
750

751 752 753 754 755 756 757
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
758

759 760 761 762 763 764 765 766
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

767 768
    bs->open_flags = flags;
    bs->buffer_alignment = 512;
769
    bs->zero_beyond_eof = true;
770 771 772 773
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
774 775 776 777 778
        error_setg(errp,
                   !bs->read_only && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                   drv->format_name);
779 780
        return -ENOTSUP;
    }
781

782
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
783 784 785 786 787 788 789
    if (flags & BDRV_O_COPY_ON_READ) {
        if (!bs->read_only) {
            bdrv_enable_copy_on_read(bs);
        } else {
            error_setg(errp, "Can't use copy-on-read on read-only device");
            return -EINVAL;
        }
790 791
    }

792 793 794 795 796
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
797 798

    bs->drv = drv;
799
    bs->opaque = g_malloc0(drv->instance_size);
800

801
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
802

803 804
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
805
        assert(file == NULL);
806
        assert(!drv->bdrv_needs_filename || filename != NULL);
807
        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
808
    } else {
809
        if (file == NULL) {
810 811
            error_setg(errp, "Can't use '%s' as a block driver for the "
                       "protocol level", drv->format_name);
812 813 814
            ret = -EINVAL;
            goto free_and_fail;
        }
815
        bs->file = file;
816
        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
817 818
    }

819
    if (ret < 0) {
820 821
        if (error_is_set(&local_err)) {
            error_propagate(errp, local_err);
822 823
        } else if (bs->filename[0]) {
            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
824 825 826
        } else {
            error_setg_errno(errp, -ret, "Could not open image");
        }
827 828 829
        goto free_and_fail;
    }

830 831
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
832
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
833
        goto free_and_fail;
834
    }
835

836 837
#ifndef _WIN32
    if (bs->is_temporary) {
Dunrong Huang's avatar
Dunrong Huang committed
838 839
        assert(bs->filename[0] != '\0');
        unlink(bs->filename);
840 841 842 843 844
    }
#endif
    return 0;

free_and_fail:
845
    bs->file = NULL;
846
    g_free(bs->opaque);
847 848 849 850 851
    bs->opaque = NULL;
    bs->drv = NULL;
    return ret;
}

Kevin Wolf's avatar
Kevin Wolf committed
852 853
/*
 * Opens a file using a protocol (file, host_device, nbd, ...)
854 855 856 857 858
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
Kevin Wolf's avatar
Kevin Wolf committed
859
 */
860
int bdrv_file_open(BlockDriverState **pbs, const char *filename,
861 862
                   const char *reference, QDict *options, int flags,
                   Error **errp)
bellard's avatar
bellard committed
863
{
864
    BlockDriverState *bs = NULL;
865
    BlockDriver *drv;
866
    const char *drvname;
867
    bool allow_protocol_prefix = false;
868
    Error *local_err = NULL;
bellard's avatar
bellard committed
869 870
    int ret;

871 872 873 874 875
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
    if (reference) {
        if (filename || qdict_size(options)) {
            error_setg(errp, "Cannot reference an existing block device with "
                       "additional options or a new filename");
            return -EINVAL;
        }
        QDECREF(options);

        bs = bdrv_find(reference);
        if (!bs) {
            error_setg(errp, "Cannot find block device '%s'", reference);
            return -ENODEV;
        }
        bdrv_ref(bs);
        *pbs = bs;
        return 0;
    }

bellard's avatar
bellard committed
894
    bs = bdrv_new("");
895 896 897
    bs->options = options;
    options = qdict_clone_shallow(options);

Kevin Wolf's avatar
Kevin Wolf committed
898 899 900 901 902
    /* Fetch the file name from the options QDict if necessary */
    if (!filename) {
        filename = qdict_get_try_str(options, "filename");
    } else if (filename && !qdict_haskey(options, "filename")) {
        qdict_put(options, "filename", qstring_from_str(filename));
903
        allow_protocol_prefix = true;
Kevin Wolf's avatar
Kevin Wolf committed
904
    } else {
905 906
        error_setg(errp, "Can't specify 'file' and 'filename' options at the "
                   "same time");
Kevin Wolf's avatar
Kevin Wolf committed
907 908 909 910
        ret = -EINVAL;
        goto fail;
    }

911 912 913
    /* Find the right block driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
914
        drv = bdrv_find_format(drvname);
915 916 917
        if (!drv) {
            error_setg(errp, "Unknown driver '%s'", drvname);
        }
918 919
        qdict_del(options, "driver");
    } else if (filename) {
920 921
        drv = bdrv_find_protocol(filename, allow_protocol_prefix);
        if (!drv) {
922
            error_setg(errp, "Unknown protocol");
923
        }
924
    } else {
925
        error_setg(errp, "Must specify either driver or file");
926 927 928 929
        drv = NULL;
    }

    if (!drv) {
930
        /* errp has been set already */
931 932 933 934 935 936
        ret = -ENOENT;
        goto fail;
    }

    /* Parse the filename and open it */
    if (drv->bdrv_parse_filename && filename) {
937 938
        drv->bdrv_parse_filename(filename, options, &local_err);
        if (error_is_set(&local_err)) {
939
            error_propagate(errp, local_err);
940 941 942
            ret = -EINVAL;
            goto fail;
        }
943
        qdict_del(options, "filename");
944
    } else if (drv->bdrv_needs_filename && !filename) {
945 946
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
947 948
        ret = -EINVAL;
        goto fail;
949 950
    }

951
    ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
bellard's avatar
bellard committed
952
    if (ret < 0) {
953
        error_propagate(errp, local_err);
954 955 956 957 958 959
        goto fail;
    }

    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
960 961
        error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
                   drv->format_name, entry->key);
962 963
        ret = -EINVAL;
        goto fail;
964
    }
965 966
    QDECREF(options);

967
    bs->growable = 1;
bellard's avatar
bellard committed
968 969
    *pbs = bs;
    return 0;
970 971 972 973 974 975

fail:
    QDECREF(options);
    if (!bs->drv) {
        QDECREF(bs->options);
    }
Fam Zheng's avatar
Fam Zheng committed
976
    bdrv_unref(bs);
977
    return ret;
bellard's avatar
bellard committed
978 979
}

980 981 982 983 984 985 986 987
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
988
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
989 990 991 992
{
    char backing_filename[PATH_MAX];
    int back_flags, ret;
    BlockDriver *back_drv = NULL;
993
    Error *local_err = NULL;
994 995

    if (bs->backing_hd != NULL) {
996
        QDECREF(options);
997 998 999
        return 0;
    }

1000 1001 1002 1003 1004
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

1005
    bs->open_flags &= ~BDRV_O_NO_BACKING;
1006 1007 1008
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1009
        QDECREF(options);
1010
        return 0;
1011 1012 1013
    } else {
        bdrv_get_full_backing_filename(bs, backing_filename,
                                       sizeof(backing_filename));
1014 1015 1016 1017 1018 1019 1020 1021 1022
    }

    bs->backing_hd = bdrv_new("");

    if (bs->backing_format[0] != '\0') {
        back_drv = bdrv_find_format(bs->backing_format);
    }

    /* backing files always opened read-only */
1023 1024
    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
                                    BDRV_O_COPY_ON_READ);
1025

1026 1027
    ret = bdrv_open(bs->backing_hd,
                    *backing_filename ? backing_filename : NULL, options,
1028
                    back_flags, back_drv, &local_err);
1029
    if (ret < 0) {
Fam Zheng's avatar
Fam Zheng committed
1030
        bdrv_unref(bs->backing_hd);
1031 1032
        bs->backing_hd = NULL;
        bs->open_flags |= BDRV_O_NO_BACKING;
1033 1034 1035
        error_setg(errp, "Could not open backing file: %s",
                   error_get_pretty(local_err));
        error_free(local_err);
1036 1037
        return ret;
    }
1038 1039
    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
            bs->backing_hd->file->filename);
1040 1041 1042
    return 0;
}

Kevin Wolf's avatar
Kevin Wolf committed
1043 1044
/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
1045 1046 1047 1048 1049
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Kevin Wolf's avatar
Kevin Wolf committed
1050
 */
1051
int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
1052
              int flags, BlockDriver *drv, Error **errp)
bellard's avatar
bellard committed
1053
{
Kevin Wolf's avatar
Kevin Wolf committed
1054
    int ret;
1055 1056
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char tmp_filename[PATH_MAX + 1];
1057
    BlockDriverState *file = NULL;
1058
    QDict *file_options = NULL;
1059
    const char *file_reference;
1060
    const char *drvname;
1061
    Error *local_err = NULL;
bellard's avatar
bellard committed
1062

1063 1064 1065 1066 1067 1068
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

    bs->options = options;
1069
    options = qdict_clone_shallow(options);
1070 1071

    /* For snapshot=on, create a temporary qcow2 overlay */
bellard's avatar
bellard committed
1072
    if (flags & BDRV_O_SNAPSHOT) {
bellard's avatar
bellard committed
1073 1074
        BlockDriverState *bs1;
        int64_t total_size;
Kevin Wolf's avatar
Kevin Wolf committed
1075
        BlockDriver *bdrv_qcow2;
1076
        QEMUOptionParameter *create_options;
1077
        QDict *snapshot_options;
1078

bellard's avatar
bellard committed
1079 1080
        /* if snapshot, we create a temporary backing file and open it
           instead of opening 'filename' directly */
1081

1082
        /* Get the required size from the image */
bellard's avatar
bellard committed
1083
        bs1 = bdrv_new("");
1084
        QINCREF(options);
1085 1086
        ret = bdrv_open(bs1, filename, options, BDRV_O_NO_BACKING,
                        drv, &local_err);
1087
        if (ret < 0) {
Fam Zheng's avatar
Fam Zheng committed
1088
            bdrv_unref(bs1);
1089
            goto fail;
bellard's avatar
bellard committed
1090
        }
1091
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori's avatar
aliguori committed
1092

Fam Zheng's avatar
Fam Zheng committed
1093
        bdrv_unref(bs1);
1094

1095
        /* Create the temporary image */
1096 1097
        ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
        if (ret < 0) {
1098
            error_setg_errno(errp, -ret, "Could not get temporary filename");
1099
            goto fail;
1100
        }
aliguori's avatar
aliguori committed
1101

Kevin Wolf's avatar
Kevin Wolf committed
1102
        bdrv_qcow2 = bdrv_find_format("qcow2");
1103 1104
        create_options = parse_option_parameters("", bdrv_qcow2->create_options,
                                                 NULL);
Kevin Wolf's avatar
Kevin Wolf committed
1105

1106
        set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf's avatar
Kevin Wolf committed
1107

1108
        ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);