Commit b695188d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs update from Chris Mason:
 "The biggest feature in the pull is the new (and still experimental)
  raid56 code that David Woodhouse started long ago.  I'm still working
  on the parity logging setup that will avoid inconsistent parity after
  a crash, so this is only for testing right now.  But, I'd really like
  to get it out to a broader audience to hammer out any performance
  issues or other problems.

  scrub does not yet correct errors on raid5/6 either.

  Josef has another pass at fsync performance.  The big change here is
  to combine waiting for metadata with waiting for data, which is a big
  latency win.  It is also step one toward using atomics from the
  hardware during a commit.

  Mark Fasheh has a new way to use btrfs send/receive to send only the
  metadata changes.  SUSE is using this to make snapper more efficient
  at finding changes between snapshosts.

  Snapshot-aware defrag is also included.

  Otherwise we have a large number of fixes and cleanups.  Eric Sandeen
  wins the award for removing the most lines, and I'm hoping we steal
  this idea from XFS over and over again."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits)
  btrfs: fixup/remove module.h usage as required
  Btrfs: delete inline extents when we find them during logging
  btrfs: try harder to allocate raid56 stripe cache
  Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic
  Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails
  Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree
  Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails
  Btrfs: fix missing deleted items in btrfs_clean_quota_tree
  btrfs: use only inline_pages from extent buffer
  Btrfs: fix wrong reserved space when deleting a snapshot/subvolume
  Btrfs: fix wrong reserved space in qgroup during snap/subv creation
  Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot
  btrfs: remove a printk from scan_one_device
  Btrfs: fix NULL pointer after aborting a transaction
  Btrfs: fix memory leak of log roots
  Btrfs: copy everything if we've created an inline extent
  btrfs: cleanup for open-coded alignment
  Btrfs: do not change inode flags in rename
  Btrfs: use reserved space for creating a snapshot
  clear chunk_alloc flag on retryable failure
  ...
parents 48476df9 180e001c
......@@ -5,6 +5,9 @@ config BTRFS_FS
select ZLIB_DEFLATE
select LZO_COMPRESS
select LZO_DECOMPRESS
select RAID6_PQ
select XOR_BLOCKS
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
......
......@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
......@@ -352,11 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
err = __resolve_indirect_ref(fs_info, search_commit_root,
time_seq, ref, parents,
extent_item_pos);
if (err) {
if (ret == 0)
ret = err;
if (err)
continue;
}
/* we put the first parent into the ref at hand */
ULIST_ITER_INIT(&uiter);
......
......@@ -19,7 +19,7 @@
#ifndef __BTRFS_BACKREF__
#define __BTRFS_BACKREF__
#include "ioctl.h"
#include <linux/btrfs.h>
#include "ulist.h"
#include "extent_io.h"
......
......@@ -40,6 +40,8 @@
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
/* in memory btrfs inode */
struct btrfs_inode {
......@@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
return 0;
}
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb__before_clear_bit();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags);
}
#endif
......@@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror(
(bh->b_data + (dev_bytenr & 4095));
if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
sizeof(super_tmp->magic)) ||
super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
btrfs_super_nodesize(super_tmp) != state->metablock_size ||
btrfs_super_leafsize(super_tmp) != state->metablock_size ||
......
......@@ -372,7 +372,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_size)
ret = io_tree->ops->merge_bio_hook(page, 0,
ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
PAGE_CACHE_SIZE,
bio, 0);
else
......@@ -655,7 +655,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_CACHE_SHIFT;
if (comp_bio->bi_size)
ret = tree->ops->merge_bio_hook(page, 0,
ret = tree->ops->merge_bio_hook(READ, page, 0,
PAGE_CACHE_SIZE,
comp_bio, 0);
else
......
......@@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
switch (tm->op) {
case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
BUG_ON(tm->slot < n);
/* Fallthrough */
case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case MOD_LOG_KEY_REMOVE:
btrfs_set_node_key(eb, &tm->key, tm->slot);
......@@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
__tree_mod_log_rewind(eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
return eb_rewin;
}
......@@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
*/
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
int start_slot, u64 *last_ret,
struct btrfs_key *progress)
{
struct extent_buffer *cur;
......@@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
parent_level = btrfs_header_level(parent);
if (cache_only && parent_level != 1)
return 0;
WARN_ON(trans->transaction != root->fs_info->running_transaction);
WARN_ON(trans->transid != root->fs_info->generation);
......@@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
else
uptodate = 0;
if (!cur || !uptodate) {
if (cache_only) {
free_extent_buffer(cur);
continue;
}
if (!cur) {
cur = read_tree_block(root, blocknr,
blocksize, gen);
......@@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
/*
* A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are either in cache or have a minimum
* transaction id. This is used by the btree defrag code, and tree logging
* for nodes or leaves that are have a minimum transaction id.
* This is used by the btree defrag code, and tree logging
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
......@@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_key *max_key,
struct btrfs_path *path, int cache_only,
struct btrfs_path *path,
u64 min_trans)
{
struct extent_buffer *cur;
......@@ -4887,15 +4882,12 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
if (sret && slot > 0)
slot--;
/*
* check this node pointer against the cache_only and
* min_trans parameters. If it isn't in cache or is too
* old, skip to the next one.
* check this node pointer against the min_trans parameters.
* If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
u64 blockptr;
u64 gen;
struct extent_buffer *tmp;
struct btrfs_disk_key disk_key;
blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
......@@ -4903,27 +4895,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
slot++;
continue;
}
if (!cache_only)
break;
if (max_key) {
btrfs_node_key(cur, &disk_key, slot);
if (comp_keys(&disk_key, max_key) >= 0) {
ret = 1;
goto out;
}
}
tmp = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
free_extent_buffer(tmp);
break;
}
if (tmp)
free_extent_buffer(tmp);
slot++;
break;
}
find_next_key:
/*
......@@ -4934,7 +4906,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
path->slots[level] = slot;
btrfs_set_path_blocking(path);
sret = btrfs_find_next_key(root, path, min_key, level,
cache_only, min_trans);
min_trans);
if (sret == 0) {
btrfs_release_path(path);
goto again;
......@@ -5399,8 +5371,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
/*
* this is similar to btrfs_next_leaf, but does not try to preserve
* and fixup the path. It looks for and returns the next key in the
* tree based on the current path and the cache_only and min_trans
* parameters.
* tree based on the current path and the min_trans parameters.
*
* 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree
......@@ -5409,8 +5380,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
* calling this function.
*/
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int level,
int cache_only, u64 min_trans)
struct btrfs_key *key, int level, u64 min_trans)
{
int slot;
struct extent_buffer *c;
......@@ -5461,22 +5431,8 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
if (level == 0)
btrfs_item_key_to_cpu(c, key, slot);
else {
u64 blockptr = btrfs_node_blockptr(c, slot);
u64 gen = btrfs_node_ptr_generation(c, slot);
if (cache_only) {
struct extent_buffer *cur;
cur = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (!cur ||
btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
slot++;
if (cur)
free_extent_buffer(cur);
goto next;
}
free_extent_buffer(cur);
}
if (gen < min_trans) {
slot++;
goto next;
......
This diff is collapsed.
......@@ -875,7 +875,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_delayed_item *delayed_item)
{
struct extent_buffer *leaf;
struct btrfs_item *item;
char *ptr;
int ret;
......@@ -886,7 +885,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item = btrfs_item_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
......@@ -1065,32 +1063,25 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
}
}
static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
{
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
int ret;
mutex_lock(&node->mutex);
if (!node->inode_dirty) {
mutex_unlock(&node->mutex);
return 0;
}
key.objectid = node->inode_id;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
ret = btrfs_lookup_inode(trans, root, path, &key, 1);
if (ret > 0) {
btrfs_release_path(path);
mutex_unlock(&node->mutex);
return -ENOENT;
} else if (ret < 0) {
mutex_unlock(&node->mutex);
return ret;
}
......@@ -1105,11 +1096,47 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
btrfs_delayed_inode_release_metadata(root, node);
btrfs_release_delayed_inode(node);
mutex_unlock(&node->mutex);
return 0;
}
static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
{
int ret;
mutex_lock(&node->mutex);
if (!node->inode_dirty) {
mutex_unlock(&node->mutex);
return 0;
}
ret = __btrfs_update_delayed_inode(trans, root, path, node);
mutex_unlock(&node->mutex);
return ret;
}
static inline int
__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
{
int ret;
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (ret)
return ret;
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
if (ret)
return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
/*
* Called when committing the transaction.
* Returns 0 on success.
......@@ -1119,7 +1146,6 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr)
{
struct btrfs_root *curr_root = root;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
......@@ -1142,15 +1168,8 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
curr_node = btrfs_first_delayed_node(delayed_root);
while (curr_node && (!count || (count && nr--))) {
curr_root = curr_node->root;
ret = btrfs_insert_delayed_items(trans, path, curr_root,
curr_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path,
curr_root, curr_node);
if (!ret)
ret = btrfs_update_delayed_inode(trans, curr_root,
path, curr_node);
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
......@@ -1183,51 +1202,93 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
return __btrfs_run_delayed_items(trans, root, nr);
}
static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
if (!delayed_node)
return 0;
mutex_lock(&delayed_node->mutex);
if (!delayed_node->count) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
return 0;
}
mutex_unlock(&delayed_node->mutex);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
btrfs_release_delayed_node(delayed_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct inode *inode)
int btrfs_commit_inode_delayed_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
if (!delayed_node)
return 0;
mutex_lock(&delayed_node->mutex);
if (!delayed_node->count) {
if (!delayed_node->inode_dirty) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
return 0;
}
mutex_unlock(&delayed_node->mutex);
ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
trans = btrfs_join_transaction(delayed_node->root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto trans_out;
}
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
mutex_lock(&delayed_node->mutex);
if (delayed_node->inode_dirty)
ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
path, delayed_node);
else
ret = 0;
mutex_unlock(&delayed_node->mutex);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
trans_out:
btrfs_end_transaction(trans, delayed_node->root);
btrfs_btree_balance_dirty(delayed_node->root);
out:
btrfs_release_delayed_node(delayed_node);
return ret;
}
......@@ -1258,7 +1319,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
int need_requeue = 0;
int ret;
async_node = container_of(work, struct btrfs_async_delayed_node, work);
......@@ -1277,14 +1337,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
delayed_node);
if (!ret)
btrfs_update_delayed_inode(trans, root, path, delayed_node);
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
/*
* Maybe new delayed items have been inserted, so we need requeue
* the work. Besides that, we must dequeue the empty delayed nodes
......
......@@ -117,6 +117,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
/* Used for evicting the inode. */
void btrfs_remove_delayed_node(struct inode *inode);
void btrfs_kill_delayed_inode_items(struct inode *inode);
int btrfs_commit_inode_delayed_inode(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
......
......@@ -23,6 +23,10 @@
#include "delayed-ref.h"
#include "transaction.h"
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
struct kmem_cache *btrfs_delayed_data_ref_cachep;
struct kmem_cache *btrfs_delayed_extent_op_cachep;
/*
* delayed back reference update tracking. For subvolume trees
* we queue up extent allocations and backref maintenance for
......@@ -422,6 +426,14 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
return 1;
}
void btrfs_release_ref_cluster(struct list_head *cluster)
{
struct list_head *pos, *q;
list_for_each_safe(pos, q, cluster)
list_del_init(pos);
}
/*
* helper function to update an extent delayed ref in the
* rbtree. existing and update must both have the same
......@@ -511,7 +523,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
ref->extent_op->flags_to_set;
existing_ref->extent_op->update_flags = 1;
}
kfree(ref->extent_op);
btrfs_free_delayed_extent_op(ref->extent_op);
}
}
/*
......@@ -592,7 +604,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
kfree(head_ref);
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
} else {
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
......@@ -653,7 +665,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
kfree(full_ref);
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
} else {
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
......@@ -714,7 +726,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
kfree(full_ref);
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
} else {
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
......@@ -738,13 +750,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
BUG_ON(extent_op && extent_op->is_data);
ref = kmalloc(sizeof(*ref), GFP_NOFS);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kfree(ref);
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
return -ENOMEM;
}
......@@ -786,13 +798,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
BUG_ON(extent_op && !extent_op->is_data);
ref = kmalloc(sizeof(*ref), GFP_NOFS);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kfree(ref);
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
return -ENOMEM;
}
......@@ -826,7 +838,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref)
return -ENOMEM;
......@@ -860,3 +872,51 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
return btrfs_delayed_node_to_head(ref);
return NULL;
}
void btrfs_delayed_ref_exit(void)
{
if (btrfs_delayed_ref_head_cachep)
kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
if (btrfs_delayed_tree_ref_cachep)
kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
if (btrfs_delayed_data_ref_cachep)
kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
if (btrfs_delayed_extent_op_cachep)
kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
{
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",
sizeof(struct btrfs_delayed_ref_head), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
btrfs_delayed_tree_ref_cachep = kmem_cache_create(
"btrfs_delayed_tree_ref",
sizeof(struct btrfs_delayed_tree_ref), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
btrfs_delayed_data_ref_cachep = kmem_cache_create(
"btrfs_delayed_data_ref",
sizeof(struct btrfs_delayed_data_ref), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
btrfs_delayed_extent_op_cachep = kmem_cache_create(
"btrfs_delayed_extent_op",
sizeof(struct btrfs_delayed_extent_op), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
return 0;
fail:
btrfs_delayed_ref_exit();
return -ENOMEM;
}
......@@ -131,6 +131,15 @@ struct btrfs_delayed_ref_root {
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;
/*
* bumped when someone is making progress on the delayed
* refs, so that other procs know they are just adding to
* contention intead of helping
*/
atomic_t procs_running_refs;
atomic_t ref_seq;
wait_queue_head_t wait;
/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
......@@ -141,12 +150,47 @@ struct btrfs_delayed_ref_root {
u64 run_delayed_start;
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int btrfs_delayed_ref_init(void);