recovery.c 15 KB
Newer Older
Jaegeuk Kim's avatar
Jaegeuk Kim committed
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87 88 89 90
	if (file_enc_name(inode)) {
		iput(dir);
		return 0;
	}

91 92
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
93 94 95 96

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
97
		goto out_err;
98
	}
99 100
retry:
	de = f2fs_find_entry(dir, &name, &page);
101
	if (de && inode->i_ino == le32_to_cpu(de->ino))
102
		goto out_unmap_put;
103

104 105 106 107
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
108 109
			err = PTR_ERR(einode);
			if (err == -ENOENT)
110
				err = -EEXIST;
111 112
			goto out_unmap_put;
		}
113
		err = acquire_orphan_inode(F2FS_I_SB(inode));
114 115 116
		if (err) {
			iput(einode);
			goto out_unmap_put;
117
		}
118
		f2fs_delete_entry(de, page, dir, einode);
119 120
		iput(einode);
		goto retry;
121
	}
122
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
123 124 125 126 127 128 129 130 131 132
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

133 134 135
	goto out;

out_unmap_put:
136
	f2fs_dentry_kunmap(dir, page);
137
	f2fs_put_page(page, 0);
138 139
out_err:
	iput(dir);
140
out:
Chris Fries's avatar
Chris Fries committed
141 142 143
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
Dan Carpenter's avatar
Dan Carpenter committed
144
			IS_ERR(dir) ? 0 : dir->i_ino, err);
145 146 147
	return err;
}

148
static void recover_inode(struct inode *inode, struct page *page)
149
{
150
	struct f2fs_inode *raw = F2FS_INODE(page);
151
	char *name;
152 153 154 155 156 157 158 159 160

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
161

162 163 164 165 166
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

167
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
168
			ino_of_node(page), name);
169 170
}

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
	struct f2fs_inode *ri = F2FS_INODE(ipage);
	struct timespec disk;

	if (!IS_INODE(ipage))
		return true;

	disk.tv_sec = le64_to_cpu(ri->i_ctime);
	disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	if (timespec_compare(&inode->i_ctime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_atime);
	disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	if (timespec_compare(&inode->i_atime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_mtime);
	disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	if (timespec_compare(&inode->i_mtime, &disk) > 0)
		return false;

	return true;
}

197 198
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
199
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
200
	struct curseg_info *curseg;
201
	struct page *page = NULL;
202 203 204 205 206
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
207
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
208

209
	ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
210

211 212 213
	while (1) {
		struct fsync_inode_entry *entry;

214
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
215
			return 0;
216

217
		page = get_tmp_page(sbi, blkaddr);
218

219
		if (cp_ver != cpver_of_node(page))
220
			break;
221 222 223 224 225

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
226 227 228 229
		if (entry) {
			if (!is_same_inode(entry->inode, page))
				goto next;
		} else {
230
			if (IS_INODE(page) && is_dent_dnode(page)) {
231 232
				err = recover_inode_page(sbi, page);
				if (err)
233
					break;
234 235 236
			}

			/* add this fsync inode to the list */
237
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
238 239
			if (!entry) {
				err = -ENOMEM;
240
				break;
241
			}
242 243 244 245
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
246 247 248
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
249
				kmem_cache_free(fsync_entry_slab, entry);
250 251
				if (err == -ENOENT) {
					err = 0;
252
					goto next;
253
				}
254
				break;
255
			}
256
			list_add_tail(&entry->list, head);
257
		}
258 259
		entry->blkaddr = blkaddr;

260 261 262 263 264
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
265 266 267
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
268
		f2fs_put_page(page, 1);
269 270

		ra_meta_pages_cond(sbi, blkaddr);
271
	}
272
	f2fs_put_page(page, 1);
273 274 275
	return err;
}

276
static void destroy_fsync_dnodes(struct list_head *head)
277
{
278 279 280
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
281 282 283 284 285 286
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

287
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
288
			block_t blkaddr, struct dnode_of_data *dn)
289 290 291
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
292
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
Jaegeuk Kim's avatar
Jaegeuk Kim committed
293
	struct f2fs_summary_block *sum_node;
294
	struct f2fs_summary sum;
Jaegeuk Kim's avatar
Jaegeuk Kim committed
295
	struct page *sum_page, *node_page;
296
	struct dnode_of_data tdn = *dn;
297
	nid_t ino, nid;
298
	struct inode *inode;
299
	unsigned int offset;
300 301 302 303 304
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
305
		return 0;
306 307 308 309 310 311

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
Jaegeuk Kim's avatar
Jaegeuk Kim committed
312
			goto got_it;
313 314 315
		}
	}

Jaegeuk Kim's avatar
Jaegeuk Kim committed
316 317 318 319 320
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
321 322 323 324
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
325 326
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
327
		tdn.node_page = dn->inode_page;
328
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
329
		goto truncate_out;
330
	} else if (dn->nid == nid) {
331
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
332
		goto truncate_out;
333 334
	}

335
	/* Get the node page */
336
	node_page = get_node_page(sbi, nid);
337 338
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
339 340

	offset = ofs_of_node(node_page);
341 342 343
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

344 345 346 347 348 349 350 351
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
352

353
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
354
			le16_to_cpu(sum.ofs_in_node);
355

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
373
		iput(inode);
374 375 376 377 378 379 380 381 382
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
383
	return 0;
384 385
}

386
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
387 388
					struct page *page, block_t blkaddr)
{
389
	struct f2fs_inode_info *fi = F2FS_I(inode);
390 391 392
	unsigned int start, end;
	struct dnode_of_data dn;
	struct node_info ni;
393
	int err = 0, recovered = 0;
394

395 396 397 398
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
399 400 401 402
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
403
		recover_xattr_data(inode, page, blkaddr);
404
		goto out;
405
	}
406

407 408
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
409 410
		goto out;

411
	/* step 3: recover data indices */
412
	start = start_bidx_of_node(ofs_of_node(page), fi);
413
	end = start + ADDRS_PER_PAGE(page, fi);
414 415

	set_new_dnode(&dn, inode, NULL, NULL, 0);
416

417
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
418
	if (err)
419
		goto out;
420

421
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
422 423

	get_node_info(sbi, dn.nid, &ni);
424 425
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
426

427
	for (; start < end; start++, dn.ofs_in_node++) {
428 429 430 431 432
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			err = reserve_new_block(&dn);
			f2fs_bug_on(sbi, err);
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
456

457
			if (src == NULL_ADDR) {
458
				err = reserve_new_block(&dn);
459
				/* We should not get -ENOSPC */
460
				f2fs_bug_on(sbi, err);
461 462 463
			}

			/* Check the previous node page having this index */
464 465 466
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
467 468

			/* write dummy data page */
469 470
			f2fs_replace_block(sbi, &dn, src, dest,
							ni.version, false);
471
			recovered++;
472 473 474 475 476 477 478 479 480 481
		}
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
482
err:
483
	f2fs_put_dnode(&dn);
484
out:
Chris Fries's avatar
Chris Fries committed
485 486 487
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
488
	return err;
489 490
}

491
static int recover_data(struct f2fs_sb_info *sbi,
492 493
				struct list_head *head, int type)
{
494
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
495
	struct curseg_info *curseg;
496
	struct page *page = NULL;
497
	int err = 0;
498 499 500 501 502 503 504 505 506
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

507
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
508
			break;
509

510 511
		ra_meta_pages_cond(sbi, blkaddr);

512
		page = get_tmp_page(sbi, blkaddr);
513

514 515
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
516
			break;
517
		}
518 519 520 521

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
522 523 524
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
525
		 * So, call recover_inode for the inode update.
526
		 */
527 528 529 530 531 532 533 534 535
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
536
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
537 538
		if (err) {
			f2fs_put_page(page, 1);
539
			break;
540
		}
541 542 543 544 545 546 547 548 549

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
550
		f2fs_put_page(page, 1);
551
	}
552 553 554
	if (!err)
		allocate_new_segments(sbi);
	return err;
555 556
}

557
int recover_fsync_data(struct f2fs_sb_info *sbi)
558
{
559
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
560
	struct list_head inode_list;
561
	block_t blkaddr;
562
	int err;
Haicheng Li's avatar
Haicheng Li committed
563
	bool need_writecp = false;
564 565

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
566
			sizeof(struct fsync_inode_entry));
567
	if (!fsync_entry_slab)
568
		return -ENOMEM;
569 570 571

	INIT_LIST_HEAD(&inode_list);

572 573 574
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

575 576
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

577
	/* step #1: find fsynced inode numbers */
578 579
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
580 581 582 583 584
		goto out;

	if (list_empty(&inode_list))
		goto out;

Haicheng Li's avatar
Haicheng Li committed
585
	need_writecp = true;
586

587
	/* step #2: recover data */
588
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
589
	if (!err)
590
		f2fs_bug_on(sbi, !list_empty(&inode_list));
591
out:
592
	destroy_fsync_dnodes(&inode_list);
593
	kmem_cache_destroy(fsync_entry_slab);
594

595 596
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
597
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
598

599 600 601 602 603
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

604
	clear_sbi_flag(sbi, SBI_POR_DOING);
605
	if (err) {
Chao Yu's avatar
Chao Yu committed
606 607 608 609
		bool invalidate = false;

		if (discard_next_dnode(sbi, blkaddr))
			invalidate = true;
610 611 612 613

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
Chao Yu's avatar
Chao Yu committed
614 615 616 617 618 619

		/* invalidate temporary meta page */
		if (invalidate)
			invalidate_mapping_pages(META_MAPPING(sbi),
							blkaddr, blkaddr);

620 621
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
622
	} else if (need_writecp) {
623
		struct cp_control cpc = {
624
			.reason = CP_RECOVERY,
625
		};
626
		mutex_unlock(&sbi->cp_mutex);
627
		write_checkpoint(sbi, &cpc);
628 629
	} else {
		mutex_unlock(&sbi->cp_mutex);
630
	}
631
	return err;
632
}