Newer
Older
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (C) 2016-2019 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/buffer_head.h>
#include <linux/dax.h>
#include <linux/writeback.h>
#include <linux/list_sort.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/sched/signal.h>
#include <linux/migrate.h>
#include "../internal.h"
* Structure allocated for each page or THP when block size < page size
* to track sub-page uptodate status and I/O completions.
*/
struct iomap_page {
atomic_t read_bytes_pending;
atomic_t write_bytes_pending;
unsigned long uptodate[];
};
static inline struct iomap_page *to_iomap_page(struct page *page)
{
/*
* per-block data is stored in the head page. Callers should
* not be dealing with tail pages, and if they are, they can
* call thp_head() first.
*/
VM_BUG_ON_PGFLAGS(PageTail(page), page);
if (page_has_private(page))
return (struct iomap_page *)page_private(page);
return NULL;
}
static struct bio_set iomap_ioend_bioset;
static struct iomap_page *
iomap_page_create(struct inode *inode, struct page *page)
{
struct iomap_page *iop = to_iomap_page(page);
unsigned int nr_blocks = i_blocks_per_page(inode, page);
if (iop || nr_blocks <= 1)
iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
GFP_NOFS | __GFP_NOFAIL);
spin_lock_init(&iop->uptodate_lock);
if (PageUptodate(page))
bitmap_fill(iop->uptodate, nr_blocks);
return iop;
}
static void
iomap_page_release(struct page *page)
{
struct iomap_page *iop = detach_page_private(page);
unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
if (!iop)
return;
WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
PageUptodate(page));
kfree(iop);
}
/*
* Calculate the range inside the page that we actually need to read.
*/
static void
iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
{
loff_t orig_pos = *pos;
loff_t isize = i_size_read(inode);
unsigned block_bits = inode->i_blkbits;
unsigned block_size = (1 << block_bits);
unsigned poff = offset_in_page(*pos);
unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
unsigned first = poff >> block_bits;
unsigned last = (poff + plen - 1) >> block_bits;
/*
* If the block size is smaller than the page size, we need to check the
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
* per-block uptodate status and adjust the offset and length if needed
* to avoid reading in already uptodate ranges.
*/
if (iop) {
unsigned int i;
/* move forward for each leading block marked uptodate */
for (i = first; i <= last; i++) {
if (!test_bit(i, iop->uptodate))
break;
*pos += block_size;
poff += block_size;
plen -= block_size;
first++;
}
/* truncate len if we find any trailing uptodate block(s) */
for ( ; i <= last; i++) {
if (test_bit(i, iop->uptodate)) {
plen -= (last - i + 1) * block_size;
last = i - 1;
break;
}
}
}
/*
* If the extent spans the block that contains the i_size, we need to
* handle both halves separately so that we properly zero data in the
* page cache for blocks that are entirely outside of i_size.
*/
if (orig_pos <= isize && orig_pos + length > isize) {
unsigned end = offset_in_page(isize - 1) >> block_bits;
if (first <= end && last > end)
plen -= (last - end) * block_size;
}
*offp = poff;
*lenp = plen;
}
static void
iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
{
struct iomap_page *iop = to_iomap_page(page);
struct inode *inode = page->mapping->host;
unsigned first = off >> inode->i_blkbits;
unsigned last = (off + len - 1) >> inode->i_blkbits;
spin_lock_irqsave(&iop->uptodate_lock, flags);
bitmap_set(iop->uptodate, first, last - first + 1);
if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
SetPageUptodate(page);
spin_unlock_irqrestore(&iop->uptodate_lock, flags);
}
static void
iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
{
if (PageError(page))
return;
if (page_has_private(page))
iomap_iop_set_range_uptodate(page, off, len);
else
SetPageUptodate(page);
}
static void
iomap_read_page_end_io(struct bio_vec *bvec, int error)
{
struct page *page = bvec->bv_page;
struct iomap_page *iop = to_iomap_page(page);
if (unlikely(error)) {
ClearPageUptodate(page);
SetPageError(page);
} else {
iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
}
if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending))
unlock_page(page);
}
static void
iomap_read_end_io(struct bio *bio)
{
int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all)
iomap_read_page_end_io(bvec, error);
bio_put(bio);
}
struct iomap_readpage_ctx {
struct page *cur_page;
bool cur_page_in_bio;
struct bio *bio;
struct readahead_control *rac;
/**
* iomap_read_inline_data - copy inline data into the page cache
* @iter: iteration structure
* @page: page to copy to
*
* Copy the inline data in @iter into @page and zero out the rest of the page.
* Only a single IOMAP_INLINE extent is allowed at the end of each file.
* Returns zero for success to complete the read, or the usual negative errno.
*/
static int iomap_read_inline_data(const struct iomap_iter *iter,
struct page *page)
const struct iomap *iomap = iomap_iter_srcmap(iter);
size_t size = i_size_read(iter->inode) - iomap->offset;
size_t poff = offset_in_page(iomap->offset);
void *addr;
if (PageUptodate(page))
if (WARN_ON_ONCE(size > PAGE_SIZE - poff))
return -EIO;
if (WARN_ON_ONCE(size > PAGE_SIZE -
offset_in_page(iomap->inline_data)))
return -EIO;
if (WARN_ON_ONCE(size > iomap->length))
return -EIO;
if (poff > 0)
iomap_page_create(iter->inode, page);
addr = kmap_local_page(page) + poff;
memcpy(addr, iomap->inline_data, size);
memset(addr + size, 0, PAGE_SIZE - poff - size);
kunmap_local(addr);
iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
loff_t pos)
const struct iomap *srcmap = iomap_iter_srcmap(iter);
return srcmap->type != IOMAP_MAPPED ||
(srcmap->flags & IOMAP_F_NEW) ||
pos >= i_size_read(iter->inode);
static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx, loff_t offset)
const struct iomap *iomap = &iter->iomap;
loff_t pos = iter->pos + offset;
loff_t length = iomap_length(iter) - offset;
struct page *page = ctx->cur_page;
struct iomap_page *iop;
loff_t orig_pos = pos;
unsigned poff, plen;
sector_t sector;
if (iomap->type == IOMAP_INLINE)
return iomap_read_inline_data(iter, page);
/* zero post-eof blocks as the page may be mapped */
iop = iomap_page_create(iter->inode, page);
iomap_adjust_read_range(iter->inode, iop, &pos, length, &poff, &plen);
if (plen == 0)
goto done;
if (iomap_block_needs_zeroing(iter, pos)) {
zero_user(page, poff, plen);
iomap_set_range_uptodate(page, poff, plen);
goto done;
}
ctx->cur_page_in_bio = true;
if (iop)
atomic_add(plen, &iop->read_bytes_pending);
sector = iomap_sector(iomap, pos);
if (!ctx->bio ||
bio_end_sector(ctx->bio) != sector ||
bio_add_page(ctx->bio, page, plen, poff) != plen) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
gfp_t orig_gfp = gfp;
unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
if (ctx->bio)
submit_bio(ctx->bio);
if (ctx->rac) /* same as readahead_gfp_mask */
gfp |= __GFP_NORETRY | __GFP_NOWARN;
ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
/*
* If the bio_alloc fails, try it again for a single page to
* avoid having to deal with partial page reads. This emulates
* what do_mpage_readpage does.
*/
if (!ctx->bio)
ctx->bio = bio_alloc(orig_gfp, 1);
ctx->bio->bi_opf = REQ_OP_READ;
ctx->bio->bi_opf |= REQ_RAHEAD;
ctx->bio->bi_iter.bi_sector = sector;
bio_set_dev(ctx->bio, iomap->bdev);
ctx->bio->bi_end_io = iomap_read_end_io;
__bio_add_page(ctx->bio, page, plen, poff);
}
done:
/*
* Move the caller beyond our range so that it keeps making progress.
* For that, we have to include any leading non-uptodate ranges, but
* we can skip trailing ones as they will be handled in the next
* iteration.
*/
return pos - orig_pos + plen;
}
int
iomap_readpage(struct page *page, const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = page->mapping->host,
.pos = page_offset(page),
.len = PAGE_SIZE,
};
struct iomap_readpage_ctx ctx = {
.cur_page = page,
};
int ret;
trace_iomap_readpage(page->mapping->host, 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_readpage_iter(&iter, &ctx, 0);
if (ret < 0)
SetPageError(page);
if (ctx.bio) {
submit_bio(ctx.bio);
WARN_ON_ONCE(!ctx.cur_page_in_bio);
} else {
WARN_ON_ONCE(ctx.cur_page_in_bio);
unlock_page(page);
}
/*
* Just like mpage_readahead and block_read_full_page, we always
* return 0 and just mark the page as PageError on errors. This
* should be cleaned up throughout the stack eventually.
*/
return 0;
}
EXPORT_SYMBOL_GPL(iomap_readpage);
static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx)
loff_t length = iomap_length(iter);
loff_t done, ret;
for (done = 0; done < length; done += ret) {
if (ctx->cur_page && offset_in_page(iter->pos + done) == 0) {
if (!ctx->cur_page_in_bio)
unlock_page(ctx->cur_page);
put_page(ctx->cur_page);
ctx->cur_page = NULL;
}
if (!ctx->cur_page) {
ctx->cur_page = readahead_page(ctx->rac);
ctx->cur_page_in_bio = false;
}
ret = iomap_readpage_iter(iter, ctx, done);
if (ret <= 0)
return ret;
}
return done;
}
/**
* iomap_readahead - Attempt to read pages from a file.
* @rac: Describes the pages to be read.
* @ops: The operations vector for the filesystem.
*
* This function is for filesystems to call to implement their readahead
* address_space operation.
*
* Context: The @ops callbacks may submit I/O (eg to read the addresses of
* blocks from disc), and may wait for it. The caller may be trying to
* access a different page, and so sleeping excessively should be avoided.
* It may allocate memory, but should avoid costly allocations. This
* function is called with memalloc_nofs set, so allocations will not cause
* the filesystem to be reentered.
*/
void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
struct iomap_iter iter = {
.inode = rac->mapping->host,
.pos = readahead_pos(rac),
.len = readahead_length(rac),
};
struct iomap_readpage_ctx ctx = {
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
while (iomap_iter(&iter, ops) > 0)
iter.processed = iomap_readahead_iter(&iter, &ctx);
if (ctx.bio)
submit_bio(ctx.bio);
if (ctx.cur_page) {
if (!ctx.cur_page_in_bio)
unlock_page(ctx.cur_page);
put_page(ctx.cur_page);
}
}
EXPORT_SYMBOL_GPL(iomap_readahead);
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
/*
* iomap_is_partially_uptodate checks whether blocks within a page are
* uptodate or not.
*
* Returns true if all blocks which correspond to a file portion
* we want to read within the page are uptodate.
*/
int
iomap_is_partially_uptodate(struct page *page, unsigned long from,
unsigned long count)
{
struct iomap_page *iop = to_iomap_page(page);
struct inode *inode = page->mapping->host;
unsigned len, first, last;
unsigned i;
/* Limit range to one page */
len = min_t(unsigned, PAGE_SIZE - from, count);
/* First and last blocks in range within page */
first = from >> inode->i_blkbits;
last = (from + len - 1) >> inode->i_blkbits;
if (iop) {
for (i = first; i <= last; i++)
if (!test_bit(i, iop->uptodate))
return 0;
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
int
iomap_releasepage(struct page *page, gfp_t gfp_mask)
{
trace_iomap_releasepage(page->mapping->host, page_offset(page),
PAGE_SIZE);
/*
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(); skip those here.
*/
if (PageDirty(page) || PageWriteback(page))
return 0;
iomap_page_release(page);
return 1;
}
EXPORT_SYMBOL_GPL(iomap_releasepage);
void
iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
{
trace_iomap_invalidatepage(page->mapping->host, offset, len);
* If we're invalidating the entire page, clear the dirty state from it
* and release it to avoid unnecessary buildup of the LRU.
*/
if (offset == 0 && len == PAGE_SIZE) {
WARN_ON_ONCE(PageWriteback(page));
cancel_dirty_page(page);
iomap_page_release(page);
}
}
EXPORT_SYMBOL_GPL(iomap_invalidatepage);
#ifdef CONFIG_MIGRATION
int
iomap_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode)
{
int ret;
Linus Torvalds
committed
ret = migrate_page_move_mapping(mapping, newpage, page, 0);
if (ret != MIGRATEPAGE_SUCCESS)
return ret;
if (page_has_private(page))
attach_page_private(newpage, detach_page_private(page));
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
else
migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
EXPORT_SYMBOL_GPL(iomap_migrate_page);
#endif /* CONFIG_MIGRATION */
static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
loff_t i_size = i_size_read(inode);
/*
* Only truncate newly allocated pages beyoned EOF, even if the
* write started inside the existing inode size.
*/
if (pos + len > i_size)
truncate_pagecache_range(inode, max(pos, i_size), pos + len);
}
static int
iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
unsigned plen, const struct iomap *iomap)
{
struct bio_vec bvec;
struct bio bio;
bio_init(&bio, &bvec, 1);
bio.bi_opf = REQ_OP_READ;
bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
bio_set_dev(&bio, iomap->bdev);
__bio_add_page(&bio, page, plen, poff);
return submit_bio_wait(&bio);
}
static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct iomap_page *iop = iomap_page_create(iter->inode, page);
loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
unsigned from = offset_in_page(pos), to = from + len, poff, plen;
if (PageUptodate(page))
return 0;
ClearPageError(page);
iomap_adjust_read_range(iter->inode, iop, &block_start,
block_end - block_start, &poff, &plen);
if (plen == 0)
break;
(from <= poff || from >= poff + plen) &&
(to <= poff || to >= poff + plen))
continue;
if (iomap_block_needs_zeroing(iter, block_start)) {
if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE))
zero_user_segments(page, poff, from, to, poff + plen);
} else {
int status = iomap_read_page_sync(block_start, page,
poff, plen, srcmap);
if (status)
return status;
iomap_set_range_uptodate(page, poff, plen);
} while ((block_start += plen) < block_end);
static int iomap_write_begin_inline(const struct iomap_iter *iter,
struct page *page)
{
/* needs more work for the tailpacking case; disable for now */
if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0))
return iomap_read_inline_data(iter, page);
static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
unsigned len, struct page **pagep)
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct page *page;
int status = 0;
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);
if (fatal_signal_pending(current))
return -EINTR;
if (page_ops && page_ops->page_prepare) {
status = page_ops->page_prepare(iter->inode, pos, len);
if (status)
return status;
}
page = grab_cache_page_write_begin(iter->inode->i_mapping,
pos >> PAGE_SHIFT, AOP_FLAG_NOFS);
if (!page) {
status = -ENOMEM;
goto out_no_page;
}
if (srcmap->type == IOMAP_INLINE)
status = iomap_write_begin_inline(iter, page);
else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(page, pos, len, NULL, srcmap);
status = __iomap_write_begin(iter, pos, len, page);
if (unlikely(status))
goto out_unlock;
*pagep = page;
return 0;
out_unlock:
unlock_page(page);
put_page(page);
iomap_write_failed(iter->inode, pos, len);
out_no_page:
if (page_ops && page_ops->page_done)
page_ops->page_done(iter->inode, pos, 0, NULL);
return status;
}
static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
size_t copied, struct page *page)
{
flush_dcache_page(page);
/*
* The blocks that were entirely written will now be uptodate, so we
* don't have to worry about a readpage reading them and overwriting a
* partial write. However, if we've encountered a short write and only
* partially written into a block, it will not be marked uptodate, so a
* readpage might come in and destroy our partial write.
*
* Do the simplest thing and just treat any short write to a
* non-uptodate page as a zero-length write, and force the caller to
* redo the whole thing.
*/
if (unlikely(copied < len && !PageUptodate(page)))
return 0;
iomap_set_range_uptodate(page, offset_in_page(pos), len);
__set_page_dirty_nobuffers(page);
return copied;
}
static size_t iomap_write_end_inline(const struct iomap_iter *iter,
struct page *page, loff_t pos, size_t copied)
const struct iomap *iomap = &iter->iomap;
void *addr;
WARN_ON_ONCE(!PageUptodate(page));
BUG_ON(!iomap_inline_data_valid(iomap));
addr = kmap_local_page(page) + pos;
memcpy(iomap_inline_data(iomap, pos), addr, copied);
kunmap_local(addr);
mark_inode_dirty(iter->inode);
return copied;
}
/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t copied, struct page *page)
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t old_size = iter->inode->i_size;
if (srcmap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(iter, page, pos, copied);
} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
ret = block_write_end(NULL, iter->inode->i_mapping, pos, len,
copied, page, NULL);
ret = __iomap_write_end(iter->inode, pos, len, copied, page);
}
/*
* Update the in-memory inode size after copying the data into the page
* cache. It's up to the file system to write the updated size to disk,
* preferably after I/O completion so that no stale data is exposed.
*/
if (pos + ret > old_size) {
i_size_write(iter->inode, pos + ret);
iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
}
unlock_page(page);
if (old_size < pos)
pagecache_isize_extended(iter->inode, old_size, pos);
if (page_ops && page_ops->page_done)
page_ops->page_done(iter->inode, pos, ret, page);
put_page(page);
if (ret < len)
iomap_write_failed(iter->inode, pos, len);
return ret;
}
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
loff_t length = iomap_length(iter);
loff_t pos = iter->pos;
ssize_t written = 0;
long status = 0;
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */
unsigned long bytes; /* Bytes to write to page */
size_t copied; /* Bytes copied from user */
offset = offset_in_page(pos);
bytes = min_t(unsigned long, PAGE_SIZE - offset,
iov_iter_count(i));
again:
if (bytes > length)
bytes = length;
/*
* Bring in the user page that we'll copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
*/
Andreas Gruenbacher
committed
if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
status = -EFAULT;
break;
}
status = iomap_write_begin(iter, pos, bytes, &page);
if (unlikely(status))
break;
if (mapping_writably_mapped(iter->inode->i_mapping))
flush_dcache_page(page);
Al Viro
committed
copied = copy_page_from_iter_atomic(page, offset, bytes, i);
status = iomap_write_end(iter, pos, bytes, copied, page);
Al Viro
committed
if (unlikely(copied != status))
iov_iter_revert(i, copied - status);
Al Viro
committed
cond_resched();
if (unlikely(status == 0)) {
* A short copy made iomap_write_end() reject the
* thing entirely. Might be memory poisoning
* halfway through, might be a race with munmap,
* might be severe memory pressure.
if (copied)
bytes = copied;
goto again;
}
Al Viro
committed
pos += status;
written += status;
length -= status;
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
} while (iov_iter_count(i) && length);
return written ? written : status;
}
ssize_t
iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = iocb->ki_filp->f_mapping->host,
.pos = iocb->ki_pos,
.len = iov_iter_count(i),
.flags = IOMAP_WRITE,
};
int ret;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_write_iter(&iter, i);
if (iter.pos == iocb->ki_pos)
return ret;
return iter.pos - iocb->ki_pos;
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
long status = 0;
loff_t written = 0;
/* don't bother with blocks that are not shared to start with */
if (!(iomap->flags & IOMAP_F_SHARED))
return length;
/* don't bother with holes or unwritten extents */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
return length;
unsigned long offset = offset_in_page(pos);
unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
struct page *page;
status = iomap_write_begin(iter, pos, bytes, &page);
if (unlikely(status))
return status;
status = iomap_write_end(iter, pos, bytes, bytes, page);
if (WARN_ON_ONCE(status == 0))
return -EIO;
cond_resched();
pos += status;
written += status;
length -= status;
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
} while (length);
return written;
}
int
iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = inode,
.pos = pos,
.len = len,
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_unshare_iter(&iter);
return ret;
EXPORT_SYMBOL_GPL(iomap_file_unshare);
static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length)
{
struct page *page;
int status;
unsigned offset = offset_in_page(pos);
unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
status = iomap_write_begin(iter, pos, bytes, &page);
if (status)
return status;
zero_user(page, offset, bytes);
mark_page_accessed(page);
return iomap_write_end(iter, pos, bytes, bytes, page);
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
loff_t written = 0;
/* already zeroed? we're done. */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
if (IS_DAX(iter->inode))
bytes = dax_iomap_zero(pos, length, iomap);
bytes = __iomap_zero_iter(iter, pos, length);
if (bytes < 0)
return bytes;
pos += bytes;
written += bytes;
if (did_zero)
*did_zero = true;
} while (length > 0);
return written;
}
int
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = inode,
.pos = pos,
.len = len,
.flags = IOMAP_ZERO,
};
int ret;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_zero_iter(&iter, did_zero);
return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);
int
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops)
{
unsigned int blocksize = i_blocksize(inode);
unsigned int off = pos & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!off)
return 0;
return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
}
EXPORT_SYMBOL_GPL(iomap_truncate_page);
static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter,
struct page *page)
loff_t length = iomap_length(iter);
if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) {
ret = __block_write_begin_int(page, iter->pos, length, NULL,
&iter->iomap);
if (ret)
return ret;
block_commit_write(page, 0, length);
} else {
WARN_ON_ONCE(!PageUptodate(page));
set_page_dirty(page);
}
return length;
}
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = file_inode(vmf->vma->vm_file),
.flags = IOMAP_WRITE | IOMAP_FAULT,
};
struct page *page = vmf->page;
ssize_t ret;
lock_page(page);
ret = page_mkwrite_check_truncate(page, iter.inode);
goto out_unlock;
iter.pos = page_offset(page);
iter.len = ret;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_page_mkwrite_iter(&iter, page);
if (ret < 0)
goto out_unlock;
wait_for_stable_page(page);