data.c 26.2 KB
Newer Older
Jaegeuk Kim's avatar
Jaegeuk Kim committed
1
/*
2
3
4
5
6
7
8
9
10
11
12
13
14
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
15
#include <linux/aio.h>
16
17
18
19
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
20
#include <linux/prefetch.h>
21
22
23
24

#include "f2fs.h"
#include "node.h"
#include "segment.h"
25
#include <trace/events/f2fs.h>
26

27
28
static void f2fs_read_end_io(struct bio *bio, int err)
{
29
30
	struct bio_vec *bvec;
	int i;
31

32
	bio_for_each_segment_all(bvec, bio, i) {
33
34
		struct page *page = bvec->bv_page;

35
36
37
		if (!err) {
			SetPageUptodate(page);
		} else {
38
39
40
41
			ClearPageUptodate(page);
			SetPageError(page);
		}
		unlock_page(page);
42
	}
43
44
45
46
47
	bio_put(bio);
}

static void f2fs_write_end_io(struct bio *bio, int err)
{
48
	struct f2fs_sb_info *sbi = bio->bi_private;
49
50
	struct bio_vec *bvec;
	int i;
51

52
	bio_for_each_segment_all(bvec, bio, i) {
53
54
		struct page *page = bvec->bv_page;

55
		if (unlikely(err)) {
56
57
			SetPageError(page);
			set_bit(AS_EIO, &page->mapping->flags);
58
			f2fs_stop_checkpoint(sbi);
59
60
61
		}
		end_page_writeback(page);
		dec_page_count(sbi, F2FS_WRITEBACK);
62
	}
63

64
65
66
67
	if (sbi->wait_io) {
		complete(sbi->wait_io);
		sbi->wait_io = NULL;
	}
68
69
70
71
72
73
74
75

	if (!get_pages(sbi, F2FS_WRITEBACK) &&
			!list_empty(&sbi->cp_wait.task_list))
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

76
77
78
79
80
81
82
83
84
85
86
87
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
				int npages, bool is_read)
{
	struct bio *bio;

	/* No failure on bio allocation */
	bio = bio_alloc(GFP_NOIO, npages);

	bio->bi_bdev = sbi->sb->s_bdev;
88
	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
89
	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90
	bio->bi_private = sbi;
91
92
93
94

	return bio;
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
95
static void __submit_merged_bio(struct f2fs_bio_info *io)
96
{
Jaegeuk Kim's avatar
Jaegeuk Kim committed
97
98
	struct f2fs_io_info *fio = &io->fio;
	int rw;
99
100
101
102

	if (!io->bio)
		return;

103
	rw = fio->rw;
104
105

	if (is_read_io(rw)) {
106
107
		trace_f2fs_submit_read_bio(io->sbi->sb, rw,
						fio->type, io->bio);
108
		submit_bio(rw, io->bio);
109
	} else {
110
111
112
113
114
115
116
117
		trace_f2fs_submit_write_bio(io->sbi->sb, rw,
						fio->type, io->bio);
		/*
		 * META_FLUSH is only from the checkpoint procedure, and we
		 * should wait this metadata bio for FS consistency.
		 */
		if (fio->type == META_FLUSH) {
			DECLARE_COMPLETION_ONSTACK(wait);
118
			io->sbi->wait_io = &wait;
119
120
121
122
123
			submit_bio(rw, io->bio);
			wait_for_completion(&wait);
		} else {
			submit_bio(rw, io->bio);
		}
124
	}
125

126
127
128
129
	io->bio = NULL;
}

void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
Jaegeuk Kim's avatar
Jaegeuk Kim committed
130
				enum page_type type, int rw)
131
132
133
134
135
136
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io;

	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];

137
	down_write(&io->io_rwsem);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
138
139
140
141

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
142
		io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
Jaegeuk Kim's avatar
Jaegeuk Kim committed
143
144
	}
	__submit_merged_bio(io);
145
	up_write(&io->io_rwsem);
146
147
148
149
150
151
152
153
154
155
156
157
158
159
}

/*
 * Fill the locked page with data located in the block address.
 * Return unlocked page.
 */
int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
					block_t blk_addr, int rw)
{
	struct bio *bio;

	trace_f2fs_submit_page_bio(page, blk_addr, rw);

	/* Allocate a new bio */
160
	bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
161
162
163
164
165
166
167
168
169
170
171
172

	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
		bio_put(bio);
		f2fs_put_page(page, 1);
		return -EFAULT;
	}

	submit_bio(rw, bio);
	return 0;
}

void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
Jaegeuk Kim's avatar
Jaegeuk Kim committed
173
			block_t blk_addr, struct f2fs_io_info *fio)
174
{
Jaegeuk Kim's avatar
Jaegeuk Kim committed
175
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
176
	struct f2fs_bio_info *io;
177
	bool is_read = is_read_io(fio->rw);
178

179
	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
180
181
182

	verify_block_addr(sbi, blk_addr);

183
	down_write(&io->io_rwsem);
184

185
	if (!is_read)
186
187
		inc_page_count(sbi, F2FS_WRITEBACK);

188
	if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
Jaegeuk Kim's avatar
Jaegeuk Kim committed
189
190
						io->fio.rw != fio->rw))
		__submit_merged_bio(io);
191
192
alloc_new:
	if (io->bio == NULL) {
193
194
195
		int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));

		io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
196
		io->fio = *fio;
197
198
199
200
	}

	if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
							PAGE_CACHE_SIZE) {
Jaegeuk Kim's avatar
Jaegeuk Kim committed
201
		__submit_merged_bio(io);
202
203
204
205
206
		goto alloc_new;
	}

	io->last_block_in_bio = blk_addr;

207
	up_write(&io->io_rwsem);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
208
	trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
209
210
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
211
/*
212
213
214
215
216
217
218
219
220
221
222
223
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
{
	struct f2fs_node *rn;
	__le32 *addr_array;
	struct page *node_page = dn->node_page;
	unsigned int ofs_in_node = dn->ofs_in_node;

224
	f2fs_wait_on_page_writeback(node_page, NODE);
225

226
	rn = F2FS_NODE(node_page);
227
228
229
230
231
232
233
234
235
236
237

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
	addr_array[ofs_in_node] = cpu_to_le32(new_addr);
	set_page_dirty(node_page);
}

int reserve_new_block(struct dnode_of_data *dn)
{
	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);

238
	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
239
		return -EPERM;
240
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
241
242
		return -ENOSPC;

243
244
	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);

245
246
	__set_data_blkaddr(dn, NEW_ADDR);
	dn->data_blkaddr = NEW_ADDR;
247
	mark_inode_dirty(dn->inode);
248
249
250
251
	sync_inode_page(dn);
	return 0;
}

252
253
254
255
256
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

257
258
259
	/* if inode_page exists, index should be zero */
	f2fs_bug_on(!need_put && index);

260
261
262
	err = get_dnode_of_data(dn, index, ALLOC_NODE);
	if (err)
		return err;
263

264
265
	if (dn->data_blkaddr == NULL_ADDR)
		err = reserve_new_block(dn);
266
	if (err || need_put)
267
268
269
270
		f2fs_put_dnode(dn);
	return err;
}

271
272
273
274
275
276
277
static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
					struct buffer_head *bh_result)
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	pgoff_t start_fofs, end_fofs;
	block_t start_blkaddr;

278
279
280
	if (is_inode_flag_set(fi, FI_NO_EXTENT))
		return 0;

281
282
283
284
285
286
	read_lock(&fi->ext.ext_lock);
	if (fi->ext.len == 0) {
		read_unlock(&fi->ext.ext_lock);
		return 0;
	}

287
288
	stat_inc_total_hit(inode->i_sb);

289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
	start_fofs = fi->ext.fofs;
	end_fofs = fi->ext.fofs + fi->ext.len - 1;
	start_blkaddr = fi->ext.blk_addr;

	if (pgofs >= start_fofs && pgofs <= end_fofs) {
		unsigned int blkbits = inode->i_sb->s_blocksize_bits;
		size_t count;

		clear_buffer_new(bh_result);
		map_bh(bh_result, inode->i_sb,
				start_blkaddr + pgofs - start_fofs);
		count = end_fofs - pgofs + 1;
		if (count < (UINT_MAX >> blkbits))
			bh_result->b_size = (count << blkbits);
		else
			bh_result->b_size = UINT_MAX;

306
		stat_inc_read_hit(inode->i_sb);
307
308
309
310
311
312
313
314
315
316
317
318
		read_unlock(&fi->ext.ext_lock);
		return 1;
	}
	read_unlock(&fi->ext.ext_lock);
	return 0;
}

void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
{
	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
	pgoff_t fofs, start_fofs, end_fofs;
	block_t start_blkaddr, end_blkaddr;
319
	int need_update = true;
320

321
	f2fs_bug_on(blk_addr == NEW_ADDR);
322
323
	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
							dn->ofs_in_node;
324
325
326
327

	/* Update the page address in the parent node */
	__set_data_blkaddr(dn, blk_addr);

328
329
330
	if (is_inode_flag_set(fi, FI_NO_EXTENT))
		return;

331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
	write_lock(&fi->ext.ext_lock);

	start_fofs = fi->ext.fofs;
	end_fofs = fi->ext.fofs + fi->ext.len - 1;
	start_blkaddr = fi->ext.blk_addr;
	end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;

	/* Drop and initialize the matched extent */
	if (fi->ext.len == 1 && fofs == start_fofs)
		fi->ext.len = 0;

	/* Initial extent */
	if (fi->ext.len == 0) {
		if (blk_addr != NULL_ADDR) {
			fi->ext.fofs = fofs;
			fi->ext.blk_addr = blk_addr;
			fi->ext.len = 1;
		}
		goto end_update;
	}

Namjae Jeon's avatar
Namjae Jeon committed
352
	/* Front merge */
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
	if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
		fi->ext.fofs--;
		fi->ext.blk_addr--;
		fi->ext.len++;
		goto end_update;
	}

	/* Back merge */
	if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
		fi->ext.len++;
		goto end_update;
	}

	/* Split the existing extent */
	if (fi->ext.len > 1 &&
		fofs >= start_fofs && fofs <= end_fofs) {
		if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
			fi->ext.len = fofs - start_fofs;
		} else {
			fi->ext.fofs = fofs + 1;
			fi->ext.blk_addr = start_blkaddr +
					fofs - start_fofs + 1;
			fi->ext.len -= fofs - start_fofs + 1;
		}
377
378
	} else {
		need_update = false;
379
380
	}

381
382
383
384
385
386
	/* Finally, if the extent is very fragmented, let's drop the cache. */
	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
		fi->ext.len = 0;
		set_inode_flag(fi, FI_NO_EXTENT);
		need_update = true;
	}
387
388
end_update:
	write_unlock(&fi->ext.ext_lock);
389
390
391
	if (need_update)
		sync_inode_page(dn);
	return;
392
393
}

394
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
395
396
397
398
399
400
401
402
403
404
405
406
407
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
	int err;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

	set_new_dnode(&dn, inode, NULL, NULL, 0);
408
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
409
410
411
412
413
414
415
416
	if (err)
		return ERR_PTR(err);
	f2fs_put_dnode(&dn);

	if (dn.data_blkaddr == NULL_ADDR)
		return ERR_PTR(-ENOENT);

	/* By fallocate(), there is no cached page, but with NEW_ADDR */
417
	if (unlikely(dn.data_blkaddr == NEW_ADDR))
418
419
		return ERR_PTR(-EINVAL);

420
	page = grab_cache_page(mapping, index);
421
422
423
	if (!page)
		return ERR_PTR(-ENOMEM);

424
425
426
427
428
	if (PageUptodate(page)) {
		unlock_page(page);
		return page;
	}

429
	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
430
					sync ? READ_SYNC : READA);
431
432
433
	if (err)
		return ERR_PTR(err);

434
435
	if (sync) {
		wait_on_page_locked(page);
436
		if (unlikely(!PageUptodate(page))) {
437
438
439
			f2fs_put_page(page, 0);
			return ERR_PTR(-EIO);
		}
440
441
442
443
	}
	return page;
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
444
/*
445
446
447
448
449
450
451
452
453
454
455
456
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
	int err;

457
repeat:
458
	page = grab_cache_page(mapping, index);
459
460
461
	if (!page)
		return ERR_PTR(-ENOMEM);

462
	set_new_dnode(&dn, inode, NULL, NULL, 0);
463
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
464
465
	if (err) {
		f2fs_put_page(page, 1);
466
		return ERR_PTR(err);
467
	}
468
469
	f2fs_put_dnode(&dn);

470
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
471
		f2fs_put_page(page, 1);
472
		return ERR_PTR(-ENOENT);
473
	}
474
475
476
477

	if (PageUptodate(page))
		return page;

Jaegeuk Kim's avatar
Jaegeuk Kim committed
478
479
480
481
482
483
484
485
486
487
488
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
		SetPageUptodate(page);
		return page;
	}
489

490
	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
491
	if (err)
492
		return ERR_PTR(err);
493
494

	lock_page(page);
495
	if (unlikely(!PageUptodate(page))) {
496
497
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
498
	}
499
	if (unlikely(page->mapping != mapping)) {
500
501
		f2fs_put_page(page, 1);
		goto repeat;
502
503
504
505
	}
	return page;
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
506
/*
507
508
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
509
 *
Chao Yu's avatar
Chao Yu committed
510
511
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
512
 * Note that, ipage is set only by make_empty_dir.
513
 */
514
struct page *get_new_data_page(struct inode *inode,
515
		struct page *ipage, pgoff_t index, bool new_i_size)
516
517
518
519
520
521
522
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;

523
	set_new_dnode(&dn, inode, ipage, NULL, 0);
524
	err = f2fs_reserve_block(&dn, index);
525
526
	if (err)
		return ERR_PTR(err);
527
repeat:
528
	page = grab_cache_page(mapping, index);
529
530
531
532
	if (!page) {
		err = -ENOMEM;
		goto put_err;
	}
533
534
535
536
537
538

	if (PageUptodate(page))
		return page;

	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
539
		SetPageUptodate(page);
540
	} else {
541
542
		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
								READ_SYNC);
543
		if (err)
544
545
			goto put_err;

546
		lock_page(page);
547
		if (unlikely(!PageUptodate(page))) {
548
			f2fs_put_page(page, 1);
549
550
			err = -EIO;
			goto put_err;
551
		}
552
		if (unlikely(page->mapping != mapping)) {
553
554
			f2fs_put_page(page, 1);
			goto repeat;
555
556
557
558
559
560
		}
	}

	if (new_i_size &&
		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
561
562
		/* Only the directory inode sets new_i_size */
		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
563
564
	}
	return page;
565
566
567
568

put_err:
	f2fs_put_dnode(&dn);
	return ERR_PTR(err);
569
570
}

571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
static int __allocate_data_block(struct dnode_of_data *dn)
{
	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
	struct f2fs_summary sum;
	block_t new_blkaddr;
	struct node_info ni;
	int type;

	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
		return -EPERM;
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
		return -ENOSPC;

	__set_data_blkaddr(dn, NEW_ADDR);
	dn->data_blkaddr = NEW_ADDR;

	get_node_info(sbi, dn->nid, &ni);
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

	type = CURSEG_WARM_DATA;

	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);

	/* direct IO doesn't use extent cache to maximize the performance */
	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
	update_extent_cache(new_blkaddr, dn);
	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);

	dn->data_blkaddr = new_blkaddr;
	return 0;
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
603
/*
Chao Yu's avatar
Chao Yu committed
604
605
606
607
608
609
 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
610
 */
611
612
static int __get_data_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create, bool fiemap)
613
{
614
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
615
616
617
	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
	unsigned maxblocks = bh_result->b_size >> blkbits;
	struct dnode_of_data dn;
618
619
620
621
	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
	pgoff_t pgofs, end_offset;
	int err = 0, ofs = 1;
	bool allocated = false;
622
623
624
625

	/* Get the page offset from the block offset(iblock) */
	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));

626
627
628
629
630
	if (check_extent_cache(inode, pgofs, bh_result))
		goto out;

	if (create)
		f2fs_lock_op(sbi);
631
632
633

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
634
	err = get_dnode_of_data(&dn, pgofs, mode);
635
	if (err) {
636
637
638
		if (err == -ENOENT)
			err = 0;
		goto unlock_out;
639
	}
640
	if (dn.data_blkaddr == NEW_ADDR && !fiemap)
641
		goto put_out;
642

643
644
645
646
647
648
649
650
651
652
653
654
	if (dn.data_blkaddr != NULL_ADDR) {
		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
	} else if (create) {
		err = __allocate_data_block(&dn);
		if (err)
			goto put_out;
		allocated = true;
		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
	} else {
		goto put_out;
	}

655
	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
656
657
658
659
660
661
662
663
664
665
666
667
668
	bh_result->b_size = (((size_t)1) << blkbits);
	dn.ofs_in_node++;
	pgofs++;

get_next:
	if (dn.ofs_in_node >= end_offset) {
		if (allocated)
			sync_inode_page(&dn);
		allocated = false;
		f2fs_put_dnode(&dn);

		set_new_dnode(&dn, inode, NULL, NULL, 0);
		err = get_dnode_of_data(&dn, pgofs, mode);
669
		if (err) {
670
671
672
673
			if (err == -ENOENT)
				err = 0;
			goto unlock_out;
		}
674
		if (dn.data_blkaddr == NEW_ADDR && !fiemap)
675
676
			goto put_out;

677
		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
678
	}
679

680
681
682
683
684
685
686
687
688
	if (maxblocks > (bh_result->b_size >> blkbits)) {
		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
		if (blkaddr == NULL_ADDR && create) {
			err = __allocate_data_block(&dn);
			if (err)
				goto sync_out;
			allocated = true;
			blkaddr = dn.data_blkaddr;
		}
689
		/* Give more consecutive addresses for the read ahead */
690
691
692
693
694
695
696
		if (blkaddr == (bh_result->b_blocknr + ofs)) {
			ofs++;
			dn.ofs_in_node++;
			pgofs++;
			bh_result->b_size += (((size_t)1) << blkbits);
			goto get_next;
		}
697
	}
698
699
700
701
sync_out:
	if (allocated)
		sync_inode_page(&dn);
put_out:
702
	f2fs_put_dnode(&dn);
703
704
705
706
707
708
unlock_out:
	if (create)
		f2fs_unlock_op(sbi);
out:
	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
	return err;
709
710
}

711
712
713
714
715
716
717
718
719
720
721
722
static int get_data_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create, false);
}

static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create, true);
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
723
724
725
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
726
727
	return generic_block_fiemap(inode, fieinfo,
				start, len, get_data_block_fiemap);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
728
729
}

730
731
static int f2fs_read_data_page(struct file *file, struct page *page)
{
732
733
734
	struct inode *inode = page->mapping->host;
	int ret;

735
736
	trace_f2fs_readpage(page, DATA);

737
738
739
740
741
742
743
	/* If the file has inline data, try to read it directlly */
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
	else
		ret = mpage_readpage(page, get_data_block);

	return ret;
744
745
746
747
748
749
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
750
751
752
753
754
755
	struct inode *inode = file->f_mapping->host;

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

756
	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
757
758
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
759
int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
760
761
{
	struct inode *inode = page->mapping->host;
Jaegeuk Kim's avatar
Jaegeuk Kim committed
762
	block_t old_blkaddr, new_blkaddr;
763
764
765
766
	struct dnode_of_data dn;
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
767
	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
768
769
770
	if (err)
		return err;

Jaegeuk Kim's avatar
Jaegeuk Kim committed
771
	old_blkaddr = dn.data_blkaddr;
772
773

	/* This page is already truncated */
Jaegeuk Kim's avatar
Jaegeuk Kim committed
774
	if (old_blkaddr == NULL_ADDR)
775
776
777
778
779
780
781
782
		goto out_writepage;

	set_page_writeback(page);

	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
Jaegeuk Kim's avatar
Jaegeuk Kim committed
783
	if (unlikely(old_blkaddr != NEW_ADDR &&
784
785
			!is_cold_data(page) &&
			need_inplace_update(inode))) {
Jaegeuk Kim's avatar
Jaegeuk Kim committed
786
		rewrite_data_page(page, old_blkaddr, fio);
787
	} else {
Jaegeuk Kim's avatar
Jaegeuk Kim committed
788
789
		write_data_page(page, &dn, &new_blkaddr, fio);
		update_extent_cache(new_blkaddr, &dn);
790
791
792
793
794
795
796
797
798
799
800
801
802
803
	}
out_writepage:
	f2fs_put_dnode(&dn);
	return err;
}

static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
							>> PAGE_CACHE_SHIFT;
804
	unsigned offset = 0;
805
	bool need_balance_fs = false;
806
	int err = 0;
Jaegeuk Kim's avatar
Jaegeuk Kim committed
807
808
	struct f2fs_io_info fio = {
		.type = DATA,
Chris Fries's avatar
Chris Fries committed
809
		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
Jaegeuk Kim's avatar
Jaegeuk Kim committed
810
	};
811

812
813
	trace_f2fs_writepage(page, DATA);

814
	if (page->index < end_index)
815
		goto write;
816
817
818
819
820
821

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
	offset = i_size & (PAGE_CACHE_SIZE - 1);
822
	if ((page->index >= end_index + 1) || !offset)
823
		goto out;
824
825

	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
826
write:
827
	if (unlikely(sbi->por_doing))
828
829
		goto redirty_out;

830
	/* Dentry blocks are controlled by checkpoint */
831
	if (S_ISDIR(inode->i_mode)) {
Jaegeuk Kim's avatar
Jaegeuk Kim committed
832
		err = do_write_data_page(page, &fio);
833
834
		goto done;
	}
835

836
	if (!wbc->for_reclaim)
837
		need_balance_fs = true;
838
	else if (has_not_enough_free_secs(sbi, 0))
839
		goto redirty_out;
840

841
842
843
844
845
846
847
848
849
	f2fs_lock_op(sbi);
	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
		err = f2fs_write_inline_data(inode, page, offset);
	else
		err = do_write_data_page(page, &fio);
	f2fs_unlock_op(sbi);
done:
	if (err && err != -ENOENT)
		goto redirty_out;
850
851

	clear_cold_data(page);
852
out:
853
	inode_dec_dirty_dents(inode);
854
	unlock_page(page);
855
	if (need_balance_fs)
856
		f2fs_balance_fs(sbi);
857
858
	if (wbc->for_reclaim)
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
859
860
861
	return 0;

redirty_out:
862
	redirty_page_for_writepage(wbc, page);
863
	return AOP_WRITEPAGE_ACTIVATE;
864
865
}

866
867
868
869
870
871
872
873
874
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
			void *data)
{
	struct address_space *mapping = data;
	int ret = mapping->a_ops->writepage(page, wbc);
	mapping_set_error(mapping, ret);
	return ret;
}

875
static int f2fs_write_data_pages(struct address_space *mapping,
876
877
878
879
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
880
	bool locked = false;
881
	int ret;
882
	long diff;
883

884
885
	trace_f2fs_writepages(mapping->host, wbc, DATA);

P J P's avatar
P J P committed
886
887
888
889
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

890
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
891
892
			get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
			available_free_memory(sbi, DIRTY_DENTS))
893
		goto skip_write;
894

895
	diff = nr_pages_to_write(sbi, DATA, wbc);
896

897
	if (!S_ISDIR(inode->i_mode)) {
898
		mutex_lock(&sbi->writepages);
899
900
		locked = true;
	}
901
	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
902
	if (locked)
903
		mutex_unlock(&sbi->writepages);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
904
905

	f2fs_submit_merged_bio(sbi, DATA, WRITE);
906
907
908

	remove_dirty_dir_inode(inode);

909
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
910
	return ret;
911
912
913
914

skip_write:
	wbc->pages_skipped += get_dirty_dents(inode);
	return 0;
915
916
917
918
919
920
921
922
923
924
925
926
927
}

static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct page *page;
	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
	struct dnode_of_data dn;
	int err = 0;

928
929
	trace_f2fs_write_begin(inode, pos, len, flags);

930
	f2fs_balance_fs(sbi);
931
repeat:
932
933
934
935
	err = f2fs_convert_inline_data(inode, pos + len);
	if (err)
		return err;

936
937
938
	page = grab_cache_page_write_begin(mapping, index, flags);
	if (!page)
		return -ENOMEM;
939
940
941
942

	/* to avoid latency during memory pressure */
	unlock_page(page);

943
944
	*pagep = page;

945
946
	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
		goto inline_data;
947

948
	f2fs_lock_op(sbi);
949
	set_new_dnode(&dn, inode, NULL, NULL, 0);
950
	err = f2fs_reserve_block(&dn, index);
951
	f2fs_unlock_op(sbi);
952

953
	if (err) {
954
		f2fs_put_page(page, 0);
955
956
		return err;
	}
957
inline_data:
958
959
960
961
962
963
964
965
	lock_page(page);
	if (unlikely(page->mapping != mapping)) {
		f2fs_put_page(page, 1);
		goto repeat;
	}

	f2fs_wait_on_page_writeback(page, DATA);

966
967
968
969
970
971
972
973
974
	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
		return 0;

	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
		unsigned end = start + len;

		/* Reading beyond i_size is simple: memset to zero */
		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
975
		goto out;
976
977
978
979
980
	}

	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
	} else {
981
		if (f2fs_has_inline_data(inode)) {
982
			err = f2fs_read_inline_data(inode, page);
983
984
985
986
987
			if (err) {
				page_cache_release(page);
				return err;
			}
		} else {
988
			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
989
							READ_SYNC);
990
991
992
993
			if (err)
				return err;
		}

994
		lock_page(page);
995
		if (unlikely(!PageUptodate(page))) {
996
997
			f2fs_put_page(page, 1);
			return -EIO;
998
		}
999
		if (unlikely(page->mapping != mapping)) {
1000
1001
			f2fs_put_page(page, 1);
			goto repeat;
1002
1003
		}
	}
1004
out:
1005
1006
1007
1008
1009
	SetPageUptodate(page);
	clear_cold_data(page);
	return 0;
}

1010
1011
1012
1013
1014
1015
1016
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

1017
1018
	trace_f2fs_write_end(inode, pos, len, copied);

1019
1020
1021
1022
1023
1024
1025
1026
	set_page_dirty(page);

	if (pos + copied > i_size_read(inode)) {
		i_size_write(inode, pos + copied);
		mark_inode_dirty(inode);
		update_inode_page(inode);
	}

1027
	f2fs_put_page(page, 1);
1028
1029
1030
	return copied;
}

1031
static int check_direct_IO(struct inode *inode, int rw,
Al Viro's avatar
Al Viro committed
1032
		struct iov_iter *iter, loff_t offset)
1033
1034
1035
1036
1037
1038
1039
1040
1041
{
	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

	if (rw == READ)
		return 0;

	if (offset & blocksize_mask)
		return -EINVAL;

Al Viro's avatar
Al Viro committed
1042
1043
1044
	if (iov_iter_alignment(iter) & blocksize_mask)
		return -EINVAL;

1045
1046
1047
	return 0;
}

1048
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
Al Viro's avatar
Al Viro committed
1049
		struct iov_iter *iter, loff_t offset)
1050
1051
1052
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
1053

1054
1055
1056
1057
	/* Let buffer I/O handle the inline data case. */
	if (f2fs_has_inline_data(inode))
		return 0;

Al Viro's avatar
Al Viro committed
1058
	if (check_direct_IO(inode, rw, iter, offset))
1059
1060
		return 0;

1061
1062
1063
	/* clear fsync mark to recover these blocks */
	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);

1064
1065
	return blockdev_direct_IO(rw, iocb, inode, iter, offset,
				  get_data_block);
1066
1067
}

1068
1069
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
				      unsigned int length)
1070
1071
{
	struct inode *inode = page->mapping->host;
1072
	if (PageDirty(page))
1073
1074
1075
1076
1077
1078
1079
		inode_dec_dirty_dents(inode);
	ClearPagePrivate(page);
}

static int f2fs_release_data_page(struct page *page, gfp_t wait)
{
	ClearPagePrivate(page);
1080
	return 1;
1081
1082
1083
1084
1085
1086
1087
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

1088
1089
	trace_f2fs_set_page_dirty(page, DATA);

1090
	SetPageUptodate(page);
1091
1092
	mark_inode_dirty(inode);

1093
1094
1095
1096
1097
1098
1099
1100
	if (!PageDirty(page)) {
		__set_page_dirty_nobuffers(page);
		set_dirty_dir_page(inode, page);
		return 1;
	}
	return 0;
}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
1101
1102
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
1103
1104
1105
1106
1107
	struct inode *inode = mapping->host;

	if (f2fs_has_inline_data(inode))
		return 0;

1108
	return generic_block_bmap(mapping, block, get_data_block);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
1109
1110
}

1111
1112
1113
1114
1115
1116
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
1117
	.write_end	= f2fs_write_end,
1118
1119
1120
1121
	.set_page_dirty	= f2fs_set_data_page_dirty,
	.invalidatepage	= f2fs_invalidate_data_page,
	.releasepage	= f2fs_release_data_page,
	.direct_IO	= f2fs_direct_IO,
Jaegeuk Kim's avatar
Jaegeuk Kim committed
1122
	.bmap		= f2fs_bmap,
1123
};