Commit 26c5eaa1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-4.12-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The two main items are support for disabling automatic rbd exclusive
  lock transfers from myself and the long awaited -ENOSPC handling
  series from Jeff.

  The former will allow rbd users to take advantage of exclusive lock's
  built-in blacklist/break-lock functionality while staying in control
  of who owns the lock. With the latter in place, we will abort
  filesystem writes on -ENOSPC instead of having them block
  indefinitely.

  Beyond that we've got the usual pile of filesystem fixes from Zheng,
  some refcount_t conversion patches from Elena and a patch for an
  ancient open() flags handling bug from Alexander"

* tag 'ceph-for-4.12-rc1' of git://github.com/ceph/ceph-client: (31 commits)
  ceph: fix memory leak in __ceph_setxattr()
  ceph: fix file open flags on ppc64
  ceph: choose readdir frag based on previous readdir reply
  rbd: exclusive map option
  rbd: return ResponseMessage result from rbd_handle_request_lock()
  rbd: kill rbd_is_lock_supported()
  rbd: support updating the lock cookie without releasing the lock
  rbd: store lock cookie
  rbd: ignore unlock errors
  rbd: fix error handling around rbd_init_disk()
  rbd: move rbd_unregister_watch() call into rbd_dev_image_release()
  rbd: move rbd_dev_destroy() call out of rbd_dev_image_release()
  ceph: when seeing write errors on an inode, switch to sync writes
  Revert "ceph: SetPageError() for writeback pages if writepages fails"
  ceph: handle epoch barriers in cap messages
  libceph: add an epoch_barrier field to struct ceph_osd_client
  libceph: abort already submitted but abortable requests when map or pool goes full
  libceph: allow requests to return immediately on full conditions if caller wishes
  libceph: remove req->r_replay_version
  ceph: make seeky readdir more efficient
  ...
parents 1176032c eeca958d
......@@ -387,6 +387,7 @@ struct rbd_device {
struct rw_semaphore lock_rwsem;
enum rbd_lock_state lock_state;
char lock_cookie[32];
struct rbd_client_id owner_cid;
struct work_struct acquired_lock_work;
struct work_struct released_lock_work;
......@@ -477,13 +478,6 @@ static int minor_to_rbd_dev_id(int minor)
return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
}
static bool rbd_is_lock_supported(struct rbd_device *rbd_dev)
{
return (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
rbd_dev->spec->snap_id == CEPH_NOSNAP &&
!rbd_dev->mapping.read_only;
}
static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
{
return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
......@@ -731,7 +725,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
kref_init(&rbdc->kref);
INIT_LIST_HEAD(&rbdc->node);
rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
rbdc->client = ceph_create_client(ceph_opts, rbdc);
if (IS_ERR(rbdc->client))
goto out_rbdc;
ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
......@@ -804,6 +798,7 @@ enum {
Opt_read_only,
Opt_read_write,
Opt_lock_on_read,
Opt_exclusive,
Opt_err
};
......@@ -816,6 +811,7 @@ static match_table_t rbd_opts_tokens = {
{Opt_read_write, "read_write"},
{Opt_read_write, "rw"}, /* Alternate spelling */
{Opt_lock_on_read, "lock_on_read"},
{Opt_exclusive, "exclusive"},
{Opt_err, NULL}
};
......@@ -823,11 +819,13 @@ struct rbd_options {
int queue_depth;
bool read_only;
bool lock_on_read;
bool exclusive;
};
#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
#define RBD_READ_ONLY_DEFAULT false
#define RBD_LOCK_ON_READ_DEFAULT false
#define RBD_EXCLUSIVE_DEFAULT false
static int parse_rbd_opts_token(char *c, void *private)
{
......@@ -866,6 +864,9 @@ static int parse_rbd_opts_token(char *c, void *private)
case Opt_lock_on_read:
rbd_opts->lock_on_read = true;
break;
case Opt_exclusive:
rbd_opts->exclusive = true;
break;
default:
/* libceph prints "bad option" msg */
return -EINVAL;
......@@ -3079,7 +3080,8 @@ static int rbd_lock(struct rbd_device *rbd_dev)
char cookie[32];
int ret;
WARN_ON(__rbd_is_lock_owner(rbd_dev));
WARN_ON(__rbd_is_lock_owner(rbd_dev) ||
rbd_dev->lock_cookie[0] != '\0');
format_lock_cookie(rbd_dev, cookie);
ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
......@@ -3089,6 +3091,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
return ret;
rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
strcpy(rbd_dev->lock_cookie, cookie);
rbd_set_owner_cid(rbd_dev, &cid);
queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
return 0;
......@@ -3097,27 +3100,24 @@ static int rbd_lock(struct rbd_device *rbd_dev)
/*
* lock_rwsem must be held for write
*/
static int rbd_unlock(struct rbd_device *rbd_dev)
static void rbd_unlock(struct rbd_device *rbd_dev)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
char cookie[32];
int ret;
WARN_ON(!__rbd_is_lock_owner(rbd_dev));
rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
WARN_ON(!__rbd_is_lock_owner(rbd_dev) ||
rbd_dev->lock_cookie[0] == '\0');
format_lock_cookie(rbd_dev, cookie);
ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
RBD_LOCK_NAME, cookie);
if (ret && ret != -ENOENT) {
rbd_warn(rbd_dev, "cls_unlock failed: %d", ret);
return ret;
}
RBD_LOCK_NAME, rbd_dev->lock_cookie);
if (ret && ret != -ENOENT)
rbd_warn(rbd_dev, "failed to unlock: %d", ret);
/* treat errors as the image is unlocked */
rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
rbd_dev->lock_cookie[0] = '\0';
rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
return 0;
}
static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
......@@ -3447,6 +3447,18 @@ static void rbd_acquire_lock(struct work_struct *work)
ret = rbd_request_lock(rbd_dev);
if (ret == -ETIMEDOUT) {
goto again; /* treat this as a dead client */
} else if (ret == -EROFS) {
rbd_warn(rbd_dev, "peer will not release lock");
/*
* If this is rbd_add_acquire_lock(), we want to fail
* immediately -- reuse BLACKLISTED flag. Otherwise we
* want to block.
*/
if (!(rbd_dev->disk->flags & GENHD_FL_UP)) {
set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
/* wake "rbd map --exclusive" process */
wake_requests(rbd_dev, false);
}
} else if (ret < 0) {
rbd_warn(rbd_dev, "error requesting lock: %d", ret);
mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
......@@ -3490,16 +3502,15 @@ static bool rbd_release_lock(struct rbd_device *rbd_dev)
if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
return false;
if (!rbd_unlock(rbd_dev))
/*
* Give others a chance to grab the lock - we would re-acquire
* almost immediately if we got new IO during ceph_osdc_sync()
* otherwise. We need to ack our own notifications, so this
* lock_dwork will be requeued from rbd_wait_state_locked()
* after wake_requests() in rbd_handle_released_lock().
*/
cancel_delayed_work(&rbd_dev->lock_dwork);
rbd_unlock(rbd_dev);
/*
* Give others a chance to grab the lock - we would re-acquire
* almost immediately if we got new IO during ceph_osdc_sync()
* otherwise. We need to ack our own notifications, so this
* lock_dwork will be requeued from rbd_wait_state_locked()
* after wake_requests() in rbd_handle_released_lock().
*/
cancel_delayed_work(&rbd_dev->lock_dwork);
return true;
}
......@@ -3580,12 +3591,16 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
up_read(&rbd_dev->lock_rwsem);
}
static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
void **p)
/*
* Returns result for ResponseMessage to be encoded (<= 0), or 1 if no
* ResponseMessage is needed.
*/
static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
void **p)
{
struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
struct rbd_client_id cid = { 0 };
bool need_to_send;
int result = 1;
if (struct_v >= 2) {
cid.gid = ceph_decode_64(p);
......@@ -3595,19 +3610,36 @@ static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
cid.handle);
if (rbd_cid_equal(&cid, &my_cid))
return false;
return result;
down_read(&rbd_dev->lock_rwsem);
need_to_send = __rbd_is_lock_owner(rbd_dev);
if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
if (!rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) {
dout("%s rbd_dev %p queueing unlock_work\n", __func__,
rbd_dev);
queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work);
if (__rbd_is_lock_owner(rbd_dev)) {
if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
goto out_unlock;
/*
* encode ResponseMessage(0) so the peer can detect
* a missing owner
*/
result = 0;
if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
if (!rbd_dev->opts->exclusive) {
dout("%s rbd_dev %p queueing unlock_work\n",
__func__, rbd_dev);
queue_work(rbd_dev->task_wq,
&rbd_dev->unlock_work);
} else {
/* refuse to release the lock */
result = -EROFS;
}
}
}
out_unlock:
up_read(&rbd_dev->lock_rwsem);
return need_to_send;
return result;
}
static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
......@@ -3690,13 +3722,10 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
break;
case RBD_NOTIFY_OP_REQUEST_LOCK:
if (rbd_handle_request_lock(rbd_dev, struct_v, &p))
/*
* send ResponseMessage(0) back so the client
* can detect a missing owner
*/
ret = rbd_handle_request_lock(rbd_dev, struct_v, &p);
if (ret <= 0)
rbd_acknowledge_notify_result(rbd_dev, notify_id,
cookie, 0);
cookie, ret);
else
rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
break;
......@@ -3821,24 +3850,51 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
}
/*
* lock_rwsem must be held for write
*/
static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
char cookie[32];
int ret;
WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
format_lock_cookie(rbd_dev, cookie);
ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
&rbd_dev->header_oloc, RBD_LOCK_NAME,
CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
RBD_LOCK_TAG, cookie);
if (ret) {
if (ret != -EOPNOTSUPP)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
*/
if (rbd_release_lock(rbd_dev))
queue_delayed_work(rbd_dev->task_wq,
&rbd_dev->lock_dwork, 0);
} else {
strcpy(rbd_dev->lock_cookie, cookie);
}
}
static void rbd_reregister_watch(struct work_struct *work)
{
struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
struct rbd_device, watch_dwork);
bool was_lock_owner = false;
bool need_to_wake = false;
int ret;
dout("%s rbd_dev %p\n", __func__, rbd_dev);
down_write(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
was_lock_owner = rbd_release_lock(rbd_dev);
mutex_lock(&rbd_dev->watch_mutex);
if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
mutex_unlock(&rbd_dev->watch_mutex);
goto out;
return;
}
ret = __rbd_register_watch(rbd_dev);
......@@ -3846,36 +3902,28 @@ static void rbd_reregister_watch(struct work_struct *work)
rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
if (ret == -EBLACKLISTED || ret == -ENOENT) {
set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
need_to_wake = true;
wake_requests(rbd_dev, true);
} else {
queue_delayed_work(rbd_dev->task_wq,
&rbd_dev->watch_dwork,
RBD_RETRY_DELAY);
}
mutex_unlock(&rbd_dev->watch_mutex);
goto out;
return;
}
need_to_wake = true;
rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
mutex_unlock(&rbd_dev->watch_mutex);
down_write(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
rbd_reacquire_lock(rbd_dev);
up_write(&rbd_dev->lock_rwsem);
ret = rbd_dev_refresh(rbd_dev);
if (ret)
rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
if (was_lock_owner) {
ret = rbd_try_lock(rbd_dev);
if (ret)
rbd_warn(rbd_dev, "reregisteration lock failed: %d",
ret);
}
out:
up_write(&rbd_dev->lock_rwsem);
if (need_to_wake)
wake_requests(rbd_dev, true);
}
/*
......@@ -4034,10 +4082,6 @@ static void rbd_queue_workfn(struct work_struct *work)
if (op_type != OBJ_OP_READ) {
snapc = rbd_dev->header.snapc;
ceph_get_snap_context(snapc);
must_be_locked = rbd_is_lock_supported(rbd_dev);
} else {
must_be_locked = rbd_dev->opts->lock_on_read &&
rbd_is_lock_supported(rbd_dev);
}
up_read(&rbd_dev->header_rwsem);
......@@ -4048,14 +4092,20 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
must_be_locked =
(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
(op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read);
if (must_be_locked) {
down_read(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
if (rbd_dev->opts->exclusive) {
rbd_warn(rbd_dev, "exclusive lock required");
result = -EROFS;
goto err_unlock;
}
rbd_wait_state_locked(rbd_dev);
WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^
!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
}
if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
result = -EBLACKLISTED;
goto err_unlock;
......@@ -4114,19 +4164,10 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
static void rbd_free_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk = rbd_dev->disk;
if (!disk)
return;
blk_cleanup_queue(rbd_dev->disk->queue);
blk_mq_free_tag_set(&rbd_dev->tag_set);
put_disk(rbd_dev->disk);
rbd_dev->disk = NULL;
if (disk->flags & GENHD_FL_UP) {
del_gendisk(disk);
if (disk->queue)
blk_cleanup_queue(disk->queue);
blk_mq_free_tag_set(&rbd_dev->tag_set);
}
put_disk(disk);
}
static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
......@@ -4383,8 +4424,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
/*
* disk_release() expects a queue ref from add_disk() and will
* put it. Hold an extra ref until add_disk() is called.
*/
WARN_ON(!blk_get_queue(q));
disk->queue = q;
q->queuedata = rbd_dev;
rbd_dev->disk = disk;
......@@ -5624,6 +5669,7 @@ static int rbd_add_parse_args(const char *buf,
rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
copts = ceph_parse_options(options, mon_addrs,
mon_addrs + mon_addrs_size - 1,
......@@ -5682,6 +5728,33 @@ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
return ret;
}
static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
{
down_write(&rbd_dev->lock_rwsem);
if (__rbd_is_lock_owner(rbd_dev))
rbd_unlock(rbd_dev);
up_write(&rbd_dev->lock_rwsem);
}
static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
{
if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
return -EINVAL;
}
/* FIXME: "rbd map --exclusive" should be in interruptible */
down_read(&rbd_dev->lock_rwsem);
rbd_wait_state_locked(rbd_dev);
up_read(&rbd_dev->lock_rwsem);
if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
rbd_warn(rbd_dev, "failed to acquire exclusive lock");
return -EROFS;
}
return 0;
}
/*
* An rbd format 2 image has a unique identifier, distinct from the
* name given to it by the user. Internally, that identifier is
......@@ -5873,6 +5946,15 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
return ret;
}
static void rbd_dev_device_release(struct rbd_device *rbd_dev)
{
clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
rbd_dev_mapping_clear(rbd_dev);
rbd_free_disk(rbd_dev);
if (!single_major)
unregister_blkdev(rbd_dev->major, rbd_dev->name);
}
/*
* rbd_dev->header_rwsem must be locked for write and will be unlocked
* upon return.
......@@ -5908,26 +5990,13 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
ret = device_add(&rbd_dev->dev);
ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
if (ret)
goto err_out_mapping;
/* Everything's ready. Announce the disk to the world. */
set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
up_write(&rbd_dev->header_rwsem);
spin_lock(&rbd_dev_list_lock);
list_add_tail(&rbd_dev->node, &rbd_dev_list);
spin_unlock(&rbd_dev_list_lock);
add_disk(rbd_dev->disk);
pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
rbd_dev->header.features);
return ret;
return 0;
err_out_mapping:
rbd_dev_mapping_clear(rbd_dev);
......@@ -5962,11 +6031,11 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
rbd_dev_unprobe(rbd_dev);
if (rbd_dev->opts)
rbd_unregister_watch(rbd_dev);
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
rbd_dev->spec->image_id = NULL;
rbd_dev_destroy(rbd_dev);
}
/*
......@@ -6126,22 +6195,43 @@ static ssize_t do_rbd_add(struct bus_type *bus,
rbd_dev->mapping.read_only = read_only;
rc = rbd_dev_device_setup(rbd_dev);
if (rc) {
/*
* rbd_unregister_watch() can't be moved into
* rbd_dev_image_release() without refactoring, see
* commit 1f3ef78861ac.
*/
rbd_unregister_watch(rbd_dev);
rbd_dev_image_release(rbd_dev);
goto out;
if (rc)
goto err_out_image_probe;
if (rbd_dev->opts->exclusive) {
rc = rbd_add_acquire_lock(rbd_dev);
if (rc)
goto err_out_device_setup;
}
/* Everything's ready. Announce the disk to the world. */
rc = device_add(&rbd_dev->dev);
if (rc)
goto err_out_image_lock;
add_disk(rbd_dev->disk);
/* see rbd_init_disk() */
blk_put_queue(rbd_dev->disk->queue);
spin_lock(&rbd_dev_list_lock);
list_add_tail(&rbd_dev->node, &rbd_dev_list);
spin_unlock(&rbd_dev_list_lock);
pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
rbd_dev->header.features);
rc = count;
out:
module_put(THIS_MODULE);
return rc;
err_out_image_lock:
rbd_dev_image_unlock(rbd_dev);
err_out_device_setup:
rbd_dev_device_release(rbd_dev);
err_out_image_probe:
rbd_dev_image_release(rbd_dev);
err_out_rbd_dev:
rbd_dev_destroy(rbd_dev);
err_out_client:
......@@ -6169,21 +6259,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus,
return do_rbd_add(bus, buf, count);
}
static void rbd_dev_device_release(struct rbd_device *rbd_dev)
{
rbd_free_disk(rbd_dev);
spin_lock(&rbd_dev_list_lock);
list_del_init(&rbd_dev->node);
spin_unlock(&rbd_dev_list_lock);
clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
device_del(&rbd_dev->dev);
rbd_dev_mapping_clear(rbd_dev);
if (!single_major)
unregister_blkdev(rbd_dev->major, rbd_dev->name);
}
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
{
while (rbd_dev->parent) {
......@@ -6201,6 +6276,7 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
}
rbd_assert(second);
rbd_dev_image_release(second);
rbd_dev_destroy(second);
first->parent = NULL;
first->parent_overlap = 0;
......@@ -6269,21 +6345,16 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
blk_set_queue_dying(rbd_dev->disk->queue);
}
down_write(&rbd_dev->lock_rwsem);
if (__rbd_is_lock_owner(rbd_dev))
rbd_unlock(rbd_dev);
up_write(&rbd_dev->lock_rwsem);
rbd_unregister_watch(rbd_dev);
del_gendisk(rbd_dev->disk);
spin_lock(&rbd_dev_list_lock);
list_del_init(&rbd_dev->node);
spin_unlock(&rbd_dev_list_lock);
device_del(&rbd_dev->dev);
/*
* Don't free anything from rbd_dev->disk until after all
* notifies are completely processed. Otherwise
* rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
* in a potential use after free of rbd_dev->disk or rbd_dev.
*/
rbd_dev_image_unlock(rbd_dev);
rbd_dev_device_release(rbd_dev);
rbd_dev_image_release(rbd_dev);
rbd_dev_destroy(rbd_dev);
return count;
}
......
......@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
bool remove_page;
dout("writepages_finish %p rc %d\n", inode, rc);
if (rc < 0)
if (rc < 0) {
mapping_set_error(mapping, rc);
ceph_set_error_write(ci);
} else {
ceph_clear_error_write(ci);
}
/*
* We lost the cache cap, need to truncate the page before
......@@ -703,9 +707,6 @@ static void writepages_finish(struct ceph_osd_request *req)
clear_bdi_congested(inode_to_bdi(inode),
BLK_RW_ASYNC);
if (rc < 0)
SetPageError(page);
ceph_put_snap_context(page_snap_context(page));
page->private = 0;
ClearPagePrivate(page);
......@@ -1892,6 +1893,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
wr_req->r_mtime = ci->vfs_inode.i_mtime;
wr_req->r_abort_on_full = true;
err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
if (!err)
......
......@@ -1015,6 +1015,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
void *p;
size_t extra_len;
struct timespec zerotime = {0};
struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
" seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu"
......@@ -1076,8 +1077,12 @@ static int send_cap_msg(struct cap_msg_args *arg)
ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE);
/* inline data size */
ceph_encode_32(&p, 0);
/* osd_epoch_barrier (version 5) */
ceph_encode_32(&p, 0);
/*
* osd_epoch_barrier (version 5)
* The epoch_barrier is protected osdc->lock, so READ_ONCE here in
* case it was recently changed
*/
ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier));
/* oldest_flush_tid (version 6) */
ceph_encode_64(&p, arg->oldest_flush_tid);
......@@ -1389,7 +1394,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
first_tid = cf->tid + 1;
capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
atomic_inc(&capsnap->nref);
refcount_inc(&capsnap->nref);
spin_unlock(&ci->i_ceph_lock);
dout("__flush_snaps %p capsnap %p tid %llu %s\n",
......@@ -2202,7 +2207,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
inode, capsnap, cf->tid,
ceph_cap_string(capsnap->dirty));
atomic_inc(&capsnap->nref);
refcount_inc(&capsnap->nref);
spin_unlock(&ci->i_ceph_lock);
ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
......@@ -3633,13 +3638,19 @@ void ceph_handle_caps(struct ceph_mds_session *session,
p += inline_len;
}
if (le16_to_cpu(msg->hdr.version) >= 5) {
struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
u32 epoch_barrier;
ceph_decode_32_safe(&p, end, epoch_barrier, bad);
ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
}
if (le16_to_cpu(msg->hdr.version) >= 8) {
u64 flush_tid;
u32 caller_uid, caller_gid;
u32 osd_epoch_barrier;
u32 pool_ns_len;
/* version >= 5 */
ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
/* version >= 6 */
ceph_decode_64_safe(&p, end, flush_tid, bad);
/* version >= 7 */
......
......@@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_fs_client *fsc = s->private;
struct ceph_mdsmap *mdsmap;
if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
return 0;
seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
<