Commit c2661b80 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "A large number of cleanups and bug fixes, with some (minor) journal
  optimizations"

[ This got sent to me before -rc1, but was stuck in my spam folder.   - Linus ]

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (67 commits)
  ext4: check s_chksum_driver when looking for bg csum presence
  ext4: move error report out of atomic context in ext4_init_block_bitmap()
  ext4: Replace open coded mdata csum feature to helper function
  ext4: delete useless comments about ext4_move_extents
  ext4: fix reservation overflow in ext4_da_write_begin
  ext4: add ext4_iget_normal() which is to be used for dir tree lookups
  ext4: don't orphan or truncate the boot loader inode
  ext4: grab missed write_count for EXT4_IOC_SWAP_BOOT
  ext4: optimize block allocation on grow indepth
  ext4: get rid of code duplication
  ext4: fix over-defensive complaint after journal abort
  ext4: fix return value of ext4_do_update_inode
  ext4: fix mmap data corruption when blocksize < pagesize
  vfs: fix data corruption when blocksize < pagesize for mmaped data
  ext4: fold ext4_nojournal_sops into ext4_sops
  ext4: support freezing ext2 (nojournal) file systems
  ext4: fold ext4_sync_fs_nojournal() into ext4_sync_fs()
  ext4: don't check quota format when there are no quota files
  jbd2: simplify calling convention around __jbd2_journal_clean_checkpoint_list
  jbd2: avoid pointless scanning of checkpoint lists
  ...
parents f114040e 813d32f9
......@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
*/
static int
grow_dev_page(struct block_device *bdev, sector_t block,
pgoff_t index, int size, int sizebits)
pgoff_t index, int size, int sizebits, gfp_t gfp)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
......@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
int ret = 0; /* Will call free_more_memory() */
gfp_t gfp_mask;
gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
gfp_mask |= __GFP_MOVABLE;
gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
/*
* XXX: __getblk_slow() can not really deal with failure and
* will endlessly loop on improvised global reclaim. Prefer
......@@ -1060,7 +1060,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
* that page was dirty, the buffers are set dirty also.
*/
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
{
pgoff_t index;
int sizebits;
......@@ -1087,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
}
/* Create a page with the proper size buffers.. */
return grow_dev_page(bdev, block, index, size, sizebits);
return grow_dev_page(bdev, block, index, size, sizebits, gfp);
}
static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size)
struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
......@@ -1113,13 +1114,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
if (bh)
return bh;
ret = grow_buffers(bdev, block, size);
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
if (ret == 0)
free_more_memory();
}
}
EXPORT_SYMBOL(__getblk_slow);
/*
* The relationship between dirty buffers and dirty pages:
......@@ -1373,24 +1375,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
EXPORT_SYMBOL(__find_get_block);
/*
* __getblk will locate (and, if necessary, create) the buffer_head
* __getblk_gfp() will locate (and, if necessary, create) the buffer_head
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
* __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
* attempt is failing. FIXME, perhaps?
* __getblk_gfp() will lock up the machine if grow_dev_page's
* try_to_free_buffers() attempt is failing. FIXME, perhaps?
*/
struct buffer_head *
__getblk(struct block_device *bdev, sector_t block, unsigned size)
__getblk_gfp(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
might_sleep();
if (bh == NULL)
bh = __getblk_slow(bdev, block, size);
bh = __getblk_slow(bdev, block, size, gfp);
return bh;
}
EXPORT_SYMBOL(__getblk);
EXPORT_SYMBOL(__getblk_gfp);
/*
* Do async read-ahead on a buffer..
......@@ -1406,24 +1409,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
EXPORT_SYMBOL(__breadahead);
/**
* __bread() - reads a specified block and returns the bh
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
* @block: number of block
* @size: size (in bytes) to read
* @gfp: page allocation flag
*
* Reads a specified block, and returns buffer head that contains it.
* The page cache can be allocated from non-movable area
* not to prevent page migration if you set gfp to zero.
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
__bread(struct block_device *bdev, sector_t block, unsigned size)
__bread_gfp(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __getblk(bdev, block, size);
struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
if (likely(bh) && !buffer_uptodate(bh))
bh = __bread_slow(bh);
return bh;
}
EXPORT_SYMBOL(__bread);
EXPORT_SYMBOL(__bread_gfp);
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
......@@ -2082,6 +2089,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
int i_size_changed = 0;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
......@@ -2101,6 +2109,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
......
......@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
}
/* Initializes an uninitialized block bitmap */
static void ext4_init_block_bitmap(struct super_block *sb,
static int ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
......@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb,
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
percpu_counter_sub(&sbi->s_freeclusters_counter,
......@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
count);
}
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
return -EIO;
}
memset(bh->b_data, 0, sb->s_blocksize);
......@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
sb->s_blocksize * 8, bh->b_data);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
return 0;
}
/* Return the number of free blocks in a block group. It is used when
......@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
int err;
err = ext4_init_block_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
if (err)
ext4_error(sb, "Checksum bad for grp %u", block_group);
return bh;
}
ext4_unlock_group(sb, block_group);
......@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
* Account for the allocated meta blocks. We will never
* fail EDQUOT for metdata, but we do account for it.
*/
if (!(*errp) &&
ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode,
......
......@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
if (!ext4_has_metadata_csum(sb))
return 1;
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
......@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
if (!ext4_has_metadata_csum(sb))
return;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
......@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_sb_info *sbi = EXT4_SB(sb);
int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
if (!ext4_has_metadata_csum(sb))
return 1;
provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
......@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
if (!ext4_has_metadata_csum(sb))
return;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
......
......@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
&file->f_ra, file,
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
}
/*
* We ignore I/O errors on directories so users have a chance
* of recovering data when there's a bad sector
*/
if (!bh) {
if (!dir_has_error) {
EXT4_ERROR_FILE(file, 0,
......
......@@ -572,15 +572,15 @@ enum {
/*
* The bit position of these flags must not overlap with any of the
* EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(),
* EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(),
* read_extent_tree_block(), ext4_split_extent_at(),
* ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
* EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
* caching the extents when reading from the extent tree while a
* truncate or punch hole operation is in progress.
*/
#define EXT4_EX_NOCACHE 0x0400
#define EXT4_EX_FORCE_CACHE 0x0800
#define EXT4_EX_NOCACHE 0x40000000
#define EXT4_EX_FORCE_CACHE 0x20000000
/*
* Flags used by ext4_free_blocks
......@@ -890,6 +890,7 @@ struct ext4_inode_info {
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
struct list_head i_es_lru;
unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_lru_nr; /* protected by i_es_lock */
unsigned long i_touch_when; /* jiffies of last accessing */
......@@ -1174,6 +1175,9 @@ struct ext4_super_block {
#define EXT4_MF_MNTDIR_SAMPLED 0x0001
#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 2
/*
* fourth extended-fs super-block data in memory
*/
......@@ -1237,7 +1241,7 @@ struct ext4_sb_info {
u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_QUOTA
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
......@@ -1330,8 +1334,7 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_lru;
unsigned long s_es_last_sorted;
struct percpu_counter s_extent_cache_cnt;
struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
......@@ -1399,7 +1402,6 @@ enum {
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
......@@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
/* inode.c */
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
......@@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle,
#define CONVERT_INLINE_DATA 2
extern struct inode *ext4_iget(struct super_block *, unsigned long);
extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
......@@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb,
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
(EXT4_SB(sb)->s_chksum_driver != NULL);
}
static inline int ext4_has_metadata_csum(struct super_block *sb)
{
WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
!EXT4_SB(sb)->s_chksum_driver);
return (EXT4_SB(sb)->s_chksum_driver != NULL);
}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
......@@ -2731,10 +2740,10 @@ extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_insert_extent(handle_t *, struct inode *,
struct ext4_ext_path *,
struct ext4_ext_path **,
struct ext4_extent *, int);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *,
extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path **,
int flags);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
......@@ -2742,10 +2751,15 @@ extern int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
struct inode *inode2, ext4_lblk_t lblk1,
ext4_lblk_t lblk2, ext4_lblk_t count,
int mark_unwritten,int *err);
/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
......@@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent **extent);
/* page-io.c */
extern int __init ext4_init_pageio(void);
......
......@@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh)
struct ext4_ext_path {
ext4_fsblk_t p_block;
__u16 p_depth;
__u16 p_maxdepth;
struct ext4_extent *p_ext;
struct ext4_extent_idx *p_idx;
struct ext4_extent_header *p_hdr;
......
......@@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
set_buffer_prio(bh);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
/* Errors can only happen if there is a bug */
if (WARN_ON_ONCE(err)) {
/* Errors can only happen due to aborted journal or a nasty bug */
if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
if (inode == NULL) {
......
......@@ -102,9 +102,9 @@
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
#endif
#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
static inline int ext4_jbd2_credits_xattr(struct inode *inode)
{
......
This diff is collapsed.
......@@ -11,6 +11,8 @@
*/
#include <linux/rbtree.h>
#include <linux/list_sort.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "ext4.h"
#include "extents_status.h"
......@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
*/
if (!ext4_es_is_delayed(es)) {
EXT4_I(inode)->i_es_lru_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
s_es_stats.es_stats_lru_cnt);
}
EXT4_I(inode)->i_es_all_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
return es;
}
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
{
EXT4_I(inode)->i_es_all_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
/* Decrease the lru counter when this es is not delayed */
if (!ext4_es_is_delayed(es)) {
BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
EXT4_I(inode)->i_es_lru_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
s_es_stats.es_stats_lru_cnt);
}
kmem_cache_free(ext4_es_cachep, es);
......@@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
unsigned short ee_len;
int depth, ee_status, es_status;
path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path))
return;
......@@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
}
}
out:
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
}
}
static void ext4_es_insert_extent_ind_check(struct inode *inode,
......@@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es)
{
struct ext4_es_tree *tree;
struct ext4_es_stats *stats;
struct extent_status *es1 = NULL;
struct rb_node *node;
int found = 0;
......@@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
}
out:
stats = &EXT4_SB(inode->i_sb)->s_es_stats;
if (found) {
BUG_ON(!es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
stats->es_stats_cache_hits++;
} else {
stats->es_stats_cache_misses++;
}
read_unlock(&EXT4_I(inode)->i_es_lock);
......@@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei)
{
struct ext4_inode_info *ei;
struct ext4_es_stats *es_stats;
struct list_head *cur, *tmp;
LIST_HEAD(skipped);
ktime_t start_time;
u64 scan_time;
int nr_shrunk = 0;
int retried = 0, skip_precached = 1, nr_skipped = 0;
es_stats = &sbi->s_es_stats;
start_time = ktime_get();
spin_lock(&sbi->s_es_lru_lock);
retry:
......@@ -948,7 +966,8 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
* If we have already reclaimed all extents from extent
* status tree, just stop the loop immediately.
*/
if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
if (percpu_counter_read_positive(
&es_stats->es_stats_lru_cnt) == 0)
break;
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
......@@ -958,7 +977,7 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
* time. Normally we try hard to avoid shrinking
* precached inodes, but we will as a last resort.
*/
if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
(skip_precached && ext4_test_inode_state(&ei->vfs_inode,
EXT4_STATE_EXT_PRECACHED))) {
nr_skipped++;
......@@ -992,7 +1011,7 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
if ((nr_shrunk == 0) && nr_skipped && !retried) {
retried++;
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
sbi->s_es_last_sorted = jiffies;
es_stats->es_stats_last_sorted = jiffies;
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
i_es_lru);
/*
......@@ -1010,6 +1029,22 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
if (locked_ei && nr_shrunk == 0)
nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
if (likely(es_stats->es_stats_scan_time))
es_stats->es_stats_scan_time = (scan_time +
es_stats->es_stats_scan_time*3) / 4;
else
es_stats->es_stats_scan_time = scan_time;
if (scan_time > es_stats->es_stats_max_scan_time)
es_stats->es_stats_max_scan_time = scan_time;
if (likely(es_stats->es_stats_shrunk))
es_stats->es_stats_shrunk = (nr_shrunk +
es_stats->es_stats_shrunk*3) / 4;
else
es_stats->es_stats_shrunk = nr_shrunk;
trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
nr_skipped, retried);
return nr_shrunk;
}
......@@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
struct ext4_sb_info *sbi;
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
return nr;
}
......@@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk;
ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
if (!nr_to_scan)
return ret;
nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk;
}
void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos)
{
return *pos ? NULL : SEQ_START_TOKEN;
}
static void *
ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos)