Commit b93b0163 authored by Matthew Wilcox's avatar Matthew Wilcox Committed by Linus Torvalds

page cache: use xa_lock

Remove the address_space ->tree_lock and use the xa_lock newly added to
the radix_tree_root.  Rename the address_space ->page_tree to ->i_pages,
since we don't really care that it's a tree.

[willy@infradead.org: fix nds32, fs/dax.c]
  Link: http://lkml.kernel.org/r/20180406145415.GB20605@bombadil.infradead.orgLink: http://lkml.kernel.org/r/20180313132639.17387-9-willy@infradead.orgSigned-off-by: 's avatarMatthew Wilcox <mawilcox@microsoft.com>
Acked-by: 's avatarJeff Layton <jlayton@redhat.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: 's avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: 's avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f6bb2a2c
......@@ -262,7 +262,7 @@ When oom event notifier is registered, event will be delivered.
2.6 Locking
lock_page_cgroup()/unlock_page_cgroup() should not be called under
mapping->tree_lock.
the i_pages lock.
Other lock order is following:
PG_locked.
......
......@@ -90,7 +90,7 @@ Steps:
1. Lock the page to be migrated
2. Insure that writeback is complete.
2. Ensure that writeback is complete.
3. Lock the new page that we want to move to. It is locked so that accesses to
this (not yet uptodate) page immediately lock while the move is in progress.
......@@ -100,8 +100,8 @@ Steps:
mapcount is not zero then we do not migrate the page. All user space
processes that attempt to access the page will now wait on the page lock.
5. The radix tree lock is taken. This will cause all processes trying
to access the page via the mapping to block on the radix tree spinlock.
5. The i_pages lock is taken. This will cause all processes trying
to access the page via the mapping to block on the spinlock.
6. The refcount of the page is examined and we back out if references remain
otherwise we know that we are the only one referencing this page.
......@@ -114,12 +114,12 @@ Steps:
9. The radix tree is changed to point to the new page.
10. The reference count of the old page is dropped because the radix tree
10. The reference count of the old page is dropped because the address space
reference is gone. A reference to the new page is established because
the new page is referenced to by the radix tree.
the new page is referenced by the address space.
11. The radix tree lock is dropped. With that lookups in the mapping
become possible again. Processes will move from spinning on the tree_lock
11. The i_pages lock is dropped. With that lookups in the mapping
become possible again. Processes will move from spinning on the lock
to sleeping on the locked new page.
12. The page contents are copied to the new page.
......
......@@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
extern void flush_kernel_dcache_page(struct page *);
#define flush_dcache_mmap_lock(mapping) \
spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_icache_user_range(vma,page,addr,len) \
flush_dcache_page(page)
......
......@@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma,
void flush_kernel_dcache_page(struct page *page);
void flush_icache_range(unsigned long start, unsigned long end);
void flush_icache_page(struct vm_area_struct *vma, struct page *page);
#define flush_dcache_mmap_lock(mapping) spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
#else
#include <asm-generic/cacheflush.h>
......
......@@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
extern void flush_dcache_range(unsigned long start, unsigned long end);
extern void invalidate_dcache_range(unsigned long start, unsigned long end);
#define flush_dcache_mmap_lock(mapping) \
spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#endif /* _ASM_NIOS2_CACHEFLUSH_H */
......@@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *page);
#define flush_dcache_mmap_lock(mapping) \
spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_icache_page(vma,page) do { \
flush_kernel_dcache_page(page); \
......
......@@ -69,7 +69,7 @@ blkcnt_t dirty_cnt(struct inode *inode)
void *results[1];
if (inode->i_mapping)
cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
results, 0, 1,
PAGECACHE_TAG_DIRTY);
if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
......
......@@ -934,14 +934,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
struct page *page;
int found;
spin_lock_irq(&mapping->tree_lock);
found = radix_tree_gang_lookup(&mapping->page_tree,
xa_lock_irq(&mapping->i_pages);
found = radix_tree_gang_lookup(&mapping->i_pages,
(void **)&page, offset, 1);
if (found > 0 && !radix_tree_exceptional_entry(page)) {
struct lu_dirpage *dp;
get_page(page);
spin_unlock_irq(&mapping->tree_lock);
xa_unlock_irq(&mapping->i_pages);
/*
* In contrast to find_lock_page() we are sure that directory
* page cannot be truncated (while DLM lock is held) and,
......@@ -989,7 +989,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
page = ERR_PTR(-EIO);
}
} else {
spin_unlock_irq(&mapping->tree_lock);
xa_unlock_irq(&mapping->i_pages);
page = NULL;
}
return page;
......
......@@ -570,10 +570,11 @@ static int afs_writepages_region(struct address_space *mapping,
_debug("wback %lx", page->index);
/* at this point we hold neither mapping->tree_lock nor lock on
* the page itself: the page may be truncated or invalidated
* (changing page->mapping to NULL), or even swizzled back from
* swapper_space to tmpfs file mapping
/*
* at this point we hold neither the i_pages lock nor the
* page lock: the page may be truncated or invalidated
* (changing page->mapping to NULL), or even swizzled
* back from swapper_space to tmpfs file mapping
*/
ret = lock_page_killable(page);
if (ret < 0) {
......
......@@ -458,7 +458,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
break;
rcu_read_lock();
page = radix_tree_lookup(&mapping->page_tree, pg_index);
page = radix_tree_lookup(&mapping->i_pages, pg_index);
rcu_read_unlock();
if (page && !radix_tree_exceptional_entry(page)) {
misses++;
......
......@@ -3963,11 +3963,11 @@ static int extent_write_cache_pages(struct address_space *mapping,
done_index = page->index;
/*
* At this point we hold neither mapping->tree_lock nor
* lock on the page itself: the page may be truncated or
* invalidated (changing page->mapping to NULL), or even
* swizzled back from swapper_space to tmpfs file
* mapping
* At this point we hold neither the i_pages lock nor
* the page lock: the page may be truncated or
* invalidated (changing page->mapping to NULL),
* or even swizzled back from swapper_space to
* tmpfs file mapping
*/
if (!trylock_page(page)) {
flush_write_bio(epd);
......@@ -5174,13 +5174,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
WARN_ON(!PagePrivate(page));
clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock);
xa_lock_irq(&page->mapping->i_pages);
if (!PageDirty(page)) {
radix_tree_tag_clear(&page->mapping->page_tree,
radix_tree_tag_clear(&page->mapping->i_pages,
page_index(page),
PAGECACHE_TAG_DIRTY);
}
spin_unlock_irq(&page->mapping->tree_lock);
xa_unlock_irq(&page->mapping->i_pages);
ClearPageError(page);
unlock_page(page);
}
......
......@@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync);
* we get exclusion from try_to_free_buffers with the blockdev mapping's
* private_lock.
*
* Hack idea: for the blockdev mapping, i_bufferlist_lock contention
* Hack idea: for the blockdev mapping, private_lock contention
* may be quite high. This code could TryLock the page, and if that
* succeeds, there is no need to take private_lock. (But if
* private_lock is contended then so is mapping->tree_lock).
* succeeds, there is no need to take private_lock.
*/
static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block)
......@@ -599,14 +598,14 @@ void __set_page_dirty(struct page *page, struct address_space *mapping,
{
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
xa_lock_irqsave(&mapping->i_pages, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
radix_tree_tag_set(&mapping->i_pages,
page_index(page), PAGECACHE_TAG_DIRTY);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
xa_unlock_irqrestore(&mapping->i_pages, flags);
}
EXPORT_SYMBOL_GPL(__set_page_dirty);
......@@ -1096,7 +1095,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
* mapping->tree_lock and mapping->host->i_lock.
* i_pages lock and mapping->host->i_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
{
......
......@@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
for (i = 0; i < found_pages; i++) {
page = wdata->pages[i];
/*
* At this point we hold neither mapping->tree_lock nor
* lock on the page itself: the page may be truncated or
* invalidated (changing page->mapping to NULL), or even
* swizzled back from swapper_space to tmpfs file
* mapping
* At this point we hold neither the i_pages lock nor the
* page lock: the page may be truncated or invalidated
* (changing page->mapping to NULL), or even swizzled
* back from swapper_space to tmpfs file mapping
*/
if (nr_pages == 0)
......
This diff is collapsed.
......@@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
SetPageDirty(page);
spin_unlock(&mapping->private_lock);
spin_lock_irqsave(&mapping->tree_lock, flags);
xa_lock_irqsave(&mapping->i_pages, flags);
WARN_ON_ONCE(!PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
radix_tree_tag_set(&mapping->i_pages,
page_index(page), PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
xa_unlock_irqrestore(&mapping->i_pages, flags);
unlock_page_memcg(page);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
......
......@@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
spin_lock_irqsave(&mapping->tree_lock, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page),
xa_lock_irqsave(&mapping->i_pages, flags);
radix_tree_tag_clear(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
xa_unlock_irqrestore(&mapping->i_pages, flags);
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
......
......@@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
spin_lock_irqsave(&mapping->tree_lock, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page),
xa_lock_irqsave(&mapping->i_pages, flags);
radix_tree_tag_clear(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
xa_unlock_irqrestore(&mapping->i_pages, flags);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
......
......@@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page)
unsigned int long flags;
if (PageDirty(page)) {
spin_lock_irqsave(&mapping->tree_lock, flags);
radix_tree_tag_clear(&mapping->page_tree,
xa_lock_irqsave(&mapping->i_pages, flags);
radix_tree_tag_clear(&mapping->i_pages,
page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
xa_unlock_irqrestore(&mapping->i_pages, flags);
clear_page_dirty_for_io(page);
dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
......@@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
rcu_read_lock();
apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
rcu_read_unlock();
if (apage)
return;
......
......@@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
* synchronizing against mapping->tree_lock.
* synchronizing against the i_pages lock.
*
* Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock
* Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
* gives us exclusion against all wb related operations on @inode
* including IO list manipulations and stat updates.
*/
......@@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
}
spin_lock(&inode->i_lock);
spin_lock_irq(&mapping->tree_lock);
xa_lock_irq(&mapping->i_pages);
/*
* Once I_FREEING is visible under i_lock, the eviction path owns
......@@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
/*
* Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
* to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
* pages actually under underwriteback.
* pages actually under writeback.
*/
radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
PAGECACHE_TAG_DIRTY) {
struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock);
&mapping->i_pages.xa_lock);
if (likely(page) && PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
PAGECACHE_TAG_WRITEBACK) {
struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock);
&mapping->i_pages.xa_lock);
if (likely(page)) {
WARN_ON_ONCE(!PageWriteback(page));
dec_wb_stat(old_wb, WB_WRITEBACK);
......@@ -430,7 +430,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
*/
smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
spin_unlock_irq(&mapping->tree_lock);
xa_unlock_irq(&mapping->i_pages);
spin_unlock(&inode->i_lock);
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
......@@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
* the RCU protected stat update paths to grab the mapping's
* tree_lock so that stat transfer can synchronize against them.
* the RCU protected stat update paths to grab the i_page
* lock so that stat transfer can synchronize against them.
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
......
......@@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
static void __address_space_init_once(struct address_space *mapping)
{
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
spin_lock_init(&mapping->tree_lock);
INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
......@@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash);
void clear_inode(struct inode *inode)
{
/*
* We have to cycle tree_lock here because reclaim can be still in the
* We have to cycle the i_pages lock here because reclaim can be in the
* process of removing the last page (in __delete_from_page_cache())
* and we must not free mapping under it.
* and we must not free the mapping under it.
*/
spin_lock_irq(&inode->i_data.tree_lock);
xa_lock_irq(&inode->i_data.i_pages);
BUG_ON(inode->i_data.nrpages);
BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock);
xa_unlock_irq(&inode->i_data.i_pages);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
......
......@@ -193,9 +193,9 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
(unsigned long long)oldkey,
(unsigned long long)newkey);
spin_lock_irq(&btnc->tree_lock);
err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
spin_unlock_irq(&btnc->tree_lock);
xa_lock_irq(&btnc->i_pages);
err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page);
xa_unlock_irq(&btnc->i_pages);
/*
* Note: page->index will not change to newkey until
* nilfs_btnode_commit_change_key() will be called.
......@@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
(unsigned long long)newkey);
mark_buffer_dirty(obh);
spin_lock_irq(&btnc->tree_lock);
radix_tree_delete(&btnc->page_tree, oldkey);
radix_tree_tag_set(&btnc->page_tree, newkey,
xa_lock_irq(&btnc->i_pages);
radix_tree_delete(&btnc->i_pages, oldkey);
radix_tree_tag_set(&btnc->i_pages, newkey,
PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&btnc->tree_lock);
xa_unlock_irq(&btnc->i_pages);
opage->index = obh->b_blocknr = newkey;
unlock_page(opage);
......@@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
return;
if (nbh == NULL) { /* blocksize == pagesize */
spin_lock_irq(&btnc->tree_lock);
radix_tree_delete(&btnc->page_tree, newkey);
spin_unlock_irq(&btnc->tree_lock);
xa_lock_irq(&btnc->i_pages);
radix_tree_delete(&btnc->i_pages, newkey);
xa_unlock_irq(&btnc->i_pages);
unlock_page(ctxt->bh->b_page);
} else
brelse(nbh);
......
......@@ -331,15 +331,15 @@ void nilfs_copy_back_pages(struct address_space *dmap,
struct page *page2;
/* move the page to the destination cache */
spin_lock_irq(&smap->tree_lock);
page2 = radix_tree_delete(&smap->page_tree, offset);
xa_lock_irq(&smap->i_pages);
page2 = radix_tree_delete(&smap->i_pages, offset);
WARN_ON(page2 != page);
smap->nrpages--;
spin_unlock_irq(&smap->tree_lock);
xa_unlock_irq(&smap->i_pages);
spin_lock_irq(&dmap->tree_lock);
err = radix_tree_insert(&dmap->page_tree, offset, page);
xa_lock_irq(&dmap->i_pages);
err = radix_tree_insert(&dmap->i_pages, offset, page);
if (unlikely(err < 0)) {
WARN_ON(err == -EEXIST);
page->mapping = NULL;
......@@ -348,11 +348,11 @@ void nilfs_copy_back_pages(struct address_space *dmap,
page->mapping = dmap;
dmap->nrpages++;
if (PageDirty(page))
radix_tree_tag_set(&dmap->page_tree,
radix_tree_tag_set(&dmap->i_pages,
offset,
PAGECACHE_TAG_DIRTY);
}
spin_unlock_irq(&dmap->tree_lock);
xa_unlock_irq(&dmap->i_pages);
}
unlock_page(page);
}
......@@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
if (mapping) {
spin_lock_irq(&mapping->tree_lock);
xa_lock_irq(&mapping->i_pages);
if (test_bit(PG_dirty, &page->flags)) {
radix_tree_tag_clear(&mapping->page_tree,
radix_tree_tag_clear(&mapping->i_pages,
page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&mapping->tree_lock);
xa_unlock_irq(&mapping->i_pages);
return clear_page_dirty_for_io(page);
}
spin_unlock_irq(&mapping->tree_lock);
xa_unlock_irq(&mapping->i_pages);
return 0;
}
return TestClearPageDirty(page);
......
......@@ -329,7 +329,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode)
* @inode: inode of interest
*
* Returns the wb @inode is currently associated with. The caller must be
* holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
* holding either @inode->i_lock, the i_pages lock, or the
* associated wb's list_lock.
*/
static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
......@@ -337,7 +337,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
#ifdef CONFIG_LOCKDEP
WARN_ON_ONCE(debug_locks &&
(!lockdep_is_held(&inode->i_lock) &&
!lockdep_is_held(&inode->i_mapping->tree_lock) &&
!lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
!lockdep_is_held(&inode->i_wb->list_lock)));
#endif
return inode->i_wb;
......@@ -349,7 +349,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
* @lockedp: temp bool output param, to be passed to the end function
*
* The caller wants to access the wb associated with @inode but isn't
* holding inode->i_lock, mapping->tree_lock or wb->list_lock. This
* holding inode->i_lock, the i_pages lock or wb->list_lock. This
* function determines the wb associated with @inode and ensures that the
* association doesn't change until the transaction is finished with
* unlocked_inode_to_wb_end().
......@@ -370,11 +370,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
if (unlikely(*lockedp))
spin_lock_irq(&inode->i_mapping->tree_lock);
xa_lock_irq(&inode->i_mapping->i_pages);
/*
* Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
* inode_to_wb() will bark. Deref directly.
* Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
* lock. inode_to_wb() will bark. Deref directly.
*/
return inode->i_wb;
}
......@@ -387,7 +387,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
{
if (unlikely(locked))
spin_unlock_irq(&inode->i_mapping->tree_lock);
xa_unlock_irq(&inode->i_mapping->i_pages);
rcu_read_unlock();
}
......
......@@ -13,6 +13,7 @@
#include <linux/list_lru.h>
#include <linux/llist.h>
#include <linux/radix-tree.h>
#include <linux/xarray.h>
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/pid.h>
......@@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
struct address_space {
struct inode *host; /* owner: inode, block_device */
struct radix_tree_root page_tree; /* radix tree of all pages */
spinlock_t tree_lock; /* and lock protecting it */
struct radix_tree_root i_pages; /* cached pages */
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root_cached i_mmap; /* tree of private and shared mappings */
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
/* Protected by the i_pages lock */
unsigned long nrpages; /* number of total pages */
/* number of shadow or DAX exceptional entries */
unsigned long nrexceptional;
......@@ -1989,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
*
* I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
* synchronize competing switching instances and to tell
* wb stat updates to grab mapping->tree_lock. See
* wb stat updates to grab the i_pages lock. See
* inode_switch_wb_work_fn() for details.
*
* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
......
......@@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf);
* refcount. The each user mapping also has a reference to the page.
*
* The pagecache pages are stored in a per-mapping radix tree, which is
* rooted at mapping->page_tree, and indexed by offset.
* rooted at mapping->i_pages, and indexed by offset.
* Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
* lists, we instead now tag pages as dirty/writeback in the radix tree.
*
......
......@@ -144,7 +144,7 @@ void release_pages(struct page **pages, int nr);
* 3. check the page is still in pagecache (if no, goto 1)
*
* Remove-side that cares about stability of _refcount (eg. reclaim) has the
* following (with tree_lock held for write):
* following (with the i_pages lock held):
* A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
* B. remove page from pagecache
* C. free the page
......@@ -157,7 +157,7 @@ void release_pages(struct page **pages, int nr);
*
* It is possible that between 1 and 2, the page is removed then the exact same
* page is inserted into the same position in pagecache. That's OK: the
* old find_get_page using tree_lock could equally have run before or after
* old find_get_page using a lock could equally have run before or after
* such a re-insertion, depending on order that locks are granted.
*
* Lookups racing against pagecache insertion isn't a big problem: either 1
......
This diff is collapsed.
......@@ -2450,7 +2450,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
} else {
/* Additional pin to radix tree */
page_ref_add(head, 2);
spin_unlock(&head->mapping->tree_lock);
xa_unlock(&head->mapping->i_pages);
}
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
......@@ -2658,15 +2658,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping) {
void **pslot;
spin_lock(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
xa_lock(&mapping->i_pages);
pslot = radix_tree_lookup_slot(&mapping->i_pages,
page_index(head));
/*
* Check if the head page is present in radix tree.
* We assume all tail are present too, if head is there.
*/
if (radix_tree_deref_slot_protected(pslot,
&mapping->tree_lock) != head)
&mapping->i_pages.xa_lock) != head)
goto fail;
}
......@@ -2700,7 +2700,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}
spin_unlock(&pgdata->split_queue_lock);
fail: if (mapping)
spin_unlock(&mapping->tree_lock);
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
unfreeze_page(head);
ret = -EBUSY;
......
......@@ -1344,8 +1344,8 @@ static void collapse_shmem(struct mm_struct *mm,
*/
index = start;
spin_lock_irq(&mapping->tree_lock);
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
xa_lock_irq(&mapping->i_pages);
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
int n = min(iter.index, end) - index;
/*
......@@ -1358,7 +1358,7 @@ static void collapse_shmem(struct mm_struct *mm,
}
nr_none += n;
for (; index < min(iter.index, end); index++) {
radix_tree_insert(&mapping->page_tree, index,
radix_tree_insert(&mapping->i_pages, index,
new_page + (index % HPAGE_PMD_NR));
}
......@@ -1367,16 +1367,16 @@ static void collapse_shmem(struct mm_struct *mm,
break;
page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock);
&mapping->i_pages.xa_lock);
if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
spin_unlock_irq(&mapping->tree_lock);
xa_unlock_irq(&mapping->i_pages);
/* swap in or instantiate fallocated page */
if (shmem_getpage(mapping->host, index, &page,
SGP_NOHUGE)) {
result = SCAN_FAIL;
goto tree_unlocked;
}
spin_lock_irq(&mapping->tree_lock);
xa_lock_irq(&mapping->i_pages);
} else if (trylock_page(page)) {
get_page(page);
} else {
......@@ -1385,7 +1385,7 @@ static void collapse_shmem(struct mm_struct *mm,
}
/*
* The page must be locked, so we can drop the tree_lock
* The page must be locked, so we can drop the i_pages lock
* without racing with truncate.
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
......@@ -1396,7 +1396,7 @@ static void collapse_shmem(struct mm_struct *mm,
result = SCAN_TRUNCATED;
goto out_unlock;
}