inode.c 93 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6
/*
 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
 */

#include <linux/time.h>
#include <linux/fs.h>
7
#include "reiserfs.h"
8
#include "acl.h"
9
#include "xattr.h"
10
#include <linux/exportfs.h>
Linus Torvalds's avatar
Linus Torvalds committed
11 12
#include <linux/pagemap.h>
#include <linux/highmem.h>
13
#include <linux/slab.h>
14
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
15 16 17 18 19
#include <asm/unaligned.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/quotaops.h>
20
#include <linux/swap.h>
21
#include <linux/uio.h>
Linus Torvalds's avatar
Linus Torvalds committed
22

23 24
int reiserfs_commit_write(struct file *f, struct page *page,
			  unsigned from, unsigned to);
Linus Torvalds's avatar
Linus Torvalds committed
25

26
void reiserfs_evict_inode(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
27
{
28 29 30 31
	/*
	 * We need blocks for transaction + (user+group) quota
	 * update (possibly delete)
	 */
32 33 34 35
	int jbegin_count =
	    JOURNAL_PER_BALANCE_CNT * 2 +
	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
	struct reiserfs_transaction_handle th;
36
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
37

38
	if (!inode->i_nlink && !is_bad_inode(inode))
39
		dquot_initialize(inode);
40

41
	truncate_inode_pages_final(&inode->i_data);
42 43
	if (inode->i_nlink)
		goto no_delete;
44

45 46 47 48 49 50
	/*
	 * The = 0 happens when we abort creating a new inode
	 * for some reason like lack of space..
	 * also handles bad_inode case
	 */
	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {
51

52
		reiserfs_delete_xattrs(inode);
Linus Torvalds's avatar
Linus Torvalds committed
53

54
		reiserfs_write_lock(inode->i_sb);
55

56
		if (journal_begin(&th, inode->i_sb, jbegin_count))
57 58
			goto out;
		reiserfs_update_inode_transaction(inode);
Linus Torvalds's avatar
Linus Torvalds committed
59

60 61
		reiserfs_discard_prealloc(&th, inode);

62
		err = reiserfs_delete_object(&th, inode);
Linus Torvalds's avatar
Linus Torvalds committed
63

64 65 66 67 68
		/*
		 * Do quota update inside a transaction for journaled quotas.
		 * We must do that after delete_object so that quota updates
		 * go into the same transaction as stat data deletion
		 */
69 70
		if (!err) {
			int depth = reiserfs_write_unlock_nested(inode->i_sb);
71
			dquot_free_inode(inode);
72 73
			reiserfs_write_lock_nested(inode->i_sb, depth);
		}
74

75
		if (journal_end(&th))
76
			goto out;
Linus Torvalds's avatar
Linus Torvalds committed
77

78 79
		/*
		 * check return value from reiserfs_delete_object after
80 81 82 83 84
		 * ending the transaction
		 */
		if (err)
		    goto out;

85 86 87 88 89 90
		/*
		 * all items of file are deleted, so we can remove
		 * "save" link
		 * we can't do anything about an error here
		 */
		remove_save_link(inode, 0 /* not truncate */);
91
out:
92
		reiserfs_write_unlock(inode->i_sb);
93 94 95 96
	} else {
		/* no object items are in the tree */
		;
	}
97 98 99 100

	/* note this must go after the journal_end to prevent deadlock */
	clear_inode(inode);

101
	dquot_drop(inode);
102
	inode->i_blocks = 0;
103
	return;
104 105

no_delete:
106
	clear_inode(inode);
107
	dquot_drop(inode);
Linus Torvalds's avatar
Linus Torvalds committed
108 109
}

110 111
static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
			  __u32 objectid, loff_t offset, int type, int length)
Linus Torvalds's avatar
Linus Torvalds committed
112
{
113
	key->version = version;
Linus Torvalds's avatar
Linus Torvalds committed
114

115 116 117 118 119
	key->on_disk_key.k_dir_id = dirid;
	key->on_disk_key.k_objectid = objectid;
	set_cpu_key_k_offset(key, offset);
	set_cpu_key_k_type(key, type);
	key->key_length = length;
Linus Torvalds's avatar
Linus Torvalds committed
120 121
}

122 123 124 125
/*
 * take base of inode_key (it comes from inode always) (dirid, objectid)
 * and version from an inode, set offset and type of key
 */
126 127
void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
		  int type, int length)
Linus Torvalds's avatar
Linus Torvalds committed
128
{
129 130 131 132
	_make_cpu_key(key, get_inode_item_key_version(inode),
		      le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
		      le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
		      length);
Linus Torvalds's avatar
Linus Torvalds committed
133 134
}

135
/* when key is 0, do not set version and short key */
136 137 138 139
inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
			      int version,
			      loff_t offset, int type, int length,
			      int entry_count /*or ih_free_space */ )
Linus Torvalds's avatar
Linus Torvalds committed
140
{
141 142 143 144 145 146 147 148 149 150
	if (key) {
		ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
		ih->ih_key.k_objectid =
		    cpu_to_le32(key->on_disk_key.k_objectid);
	}
	put_ih_version(ih, version);
	set_le_ih_k_offset(ih, offset);
	set_le_ih_k_type(ih, type);
	put_ih_item_len(ih, length);
	/*    set_ih_free_space (ih, 0); */
151 152 153 154
	/*
	 * for directory items it is entry count, for directs and stat
	 * datas - 0xffff, for indirects - 0
	 */
155
	put_ih_entry_count(ih, entry_count);
Linus Torvalds's avatar
Linus Torvalds committed
156 157
}

158 159 160 161 162 163
/*
 * FIXME: we might cache recently accessed indirect item
 * Ugh.  Not too eager for that....
 * I cut the code until such time as I see a convincing argument (benchmark).
 * I don't want a bloated inode struct..., and I don't like code complexity....
 */
Linus Torvalds's avatar
Linus Torvalds committed
164

165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
/*
 * cutting the code is fine, since it really isn't in use yet and is easy
 * to add back in.  But, Vladimir has a really good idea here.  Think
 * about what happens for reading a file.  For each page,
 * The VFS layer calls reiserfs_readpage, who searches the tree to find
 * an indirect item.  This indirect item has X number of pointers, where
 * X is a big number if we've done the block allocation right.  But,
 * we only use one or two of these pointers during each call to readpage,
 * needlessly researching again later on.
 *
 * The size of the cache could be dynamic based on the size of the file.
 *
 * I'd also like to see us cache the location the stat data item, since
 * we are needlessly researching for that frequently.
 *
 * --chris
 */

/*
 * If this page has a file tail in it, and
 * it was read in by get_block_create_0, the page data is valid,
 * but tail is still sitting in a direct item, and we can't write to
 * it.  So, look through this page, and check all the mapped buffers
 * to make sure they have valid block numbers.  Any that don't need
 * to be unmapped, so that __block_write_begin will correctly call
 * reiserfs_get_block to convert the tail into an unformatted node
 */
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
static inline void fix_tail_page_for_writing(struct page *page)
{
	struct buffer_head *head, *next, *bh;

	if (page && page_has_buffers(page)) {
		head = page_buffers(page);
		bh = head;
		do {
			next = bh->b_this_page;
			if (buffer_mapped(bh) && bh->b_blocknr == 0) {
				reiserfs_unmap_buffer(bh);
			}
			bh = next;
		} while (bh != head);
	}
Linus Torvalds's avatar
Linus Torvalds committed
207 208
}

209 210 211 212
/*
 * reiserfs_get_block does not need to allocate a block only if it has been
 * done already or non-hole position has been found in the indirect item
 */
213 214 215
static inline int allocation_needed(int retval, b_blocknr_t allocated,
				    struct item_head *ih,
				    __le32 * item, int pos_in_item)
Linus Torvalds's avatar
Linus Torvalds committed
216
{
217 218 219 220 221 222
	if (allocated)
		return 0;
	if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
	    get_block_num(item, pos_in_item))
		return 0;
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
223 224
}

225
static inline int indirect_item_found(int retval, struct item_head *ih)
Linus Torvalds's avatar
Linus Torvalds committed
226
{
227
	return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
Linus Torvalds's avatar
Linus Torvalds committed
228 229
}

230 231
static inline void set_block_dev_mapped(struct buffer_head *bh,
					b_blocknr_t block, struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
232 233 234 235
{
	map_bh(bh, inode->i_sb, block);
}

236 237 238 239
/*
 * files which were created in the earlier version can not be longer,
 * than 2 gb
 */
240
static int file_capable(struct inode *inode, sector_t block)
Linus Torvalds's avatar
Linus Torvalds committed
241
{
242 243 244 245
	/* it is new file. */
	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||
	    /* old file, but 'block' is inside of 2gb */
	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))
246
		return 1;
Linus Torvalds's avatar
Linus Torvalds committed
247

248
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
249 250
}

Adrian Bunk's avatar
Adrian Bunk committed
251 252
static int restart_transaction(struct reiserfs_transaction_handle *th,
			       struct inode *inode, struct treepath *path)
253 254 255 256 257 258 259
{
	struct super_block *s = th->t_super;
	int err;

	BUG_ON(!th->t_trans_id);
	BUG_ON(!th->t_refcount);

260 261
	pathrelse(path);

262 263 264 265 266
	/* we cannot restart while nested */
	if (th->t_refcount > 1) {
		return 0;
	}
	reiserfs_update_sd(th, inode);
267
	err = journal_end(th);
268 269 270 271 272 273
	if (!err) {
		err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
		if (!err)
			reiserfs_update_inode_transaction(inode);
	}
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
274 275
}

276 277 278 279 280 281 282 283
/*
 * it is called by get_block when create == 0. Returns block number
 * for 'block'-th logical block of file. When it hits direct item it
 * returns 0 (being called from bmap) or read direct item into piece
 * of page (bh_result)
 * Please improve the english/clarity in the comment above, as it is
 * hard to understand.
 */
284
static int _get_block_create_0(struct inode *inode, sector_t block,
285
			       struct buffer_head *bh_result, int args)
Linus Torvalds's avatar
Linus Torvalds committed
286
{
287 288 289 290
	INITIALIZE_PATH(path);
	struct cpu_key key;
	struct buffer_head *bh;
	struct item_head *ih, tmp_ih;
291
	b_blocknr_t blocknr;
292 293 294 295 296 297 298
	char *p = NULL;
	int chars;
	int ret;
	int result;
	int done = 0;
	unsigned long offset;

299
	/* prepare the key to look for the 'block'-th block of file */
300 301 302 303 304 305 306 307 308 309 310
	make_cpu_key(&key, inode,
		     (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
		     3);

	result = search_for_position_by_key(inode->i_sb, &key, &path);
	if (result != POSITION_FOUND) {
		pathrelse(&path);
		if (p)
			kunmap(bh_result->b_page);
		if (result == IO_ERROR)
			return -EIO;
311 312 313 314 315
		/*
		 * We do not return -ENOENT if there is a hole but page is
		 * uptodate, because it means that there is some MMAPED data
		 * associated with it that is yet to be written to disk.
		 */
316 317 318 319 320 321
		if ((args & GET_BLOCK_NO_HOLE)
		    && !PageUptodate(bh_result->b_page)) {
			return -ENOENT;
		}
		return 0;
	}
322

323
	bh = get_last_bh(&path);
324
	ih = tp_item_head(&path);
325
	if (is_indirect_le_ih(ih)) {
326
		__le32 *ind_item = (__le32 *) ih_item_body(bh, ih);
327

328 329 330 331 332
		/*
		 * FIXME: here we could cache indirect item or part of it in
		 * the inode to avoid search_by_key in case of subsequent
		 * access to file
		 */
333 334 335 336 337 338 339 340 341
		blocknr = get_block_num(ind_item, path.pos_in_item);
		ret = 0;
		if (blocknr) {
			map_bh(bh_result, inode->i_sb, blocknr);
			if (path.pos_in_item ==
			    ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
				set_buffer_boundary(bh_result);
			}
		} else
342 343 344 345 346 347
			/*
			 * We do not return -ENOENT if there is a hole but
			 * page is uptodate, because it means that there is
			 * some MMAPED data associated with it that is
			 * yet to be written to disk.
			 */
348 349 350 351 352 353 354 355 356 357
		if ((args & GET_BLOCK_NO_HOLE)
			    && !PageUptodate(bh_result->b_page)) {
			ret = -ENOENT;
		}

		pathrelse(&path);
		if (p)
			kunmap(bh_result->b_page);
		return ret;
	}
358
	/* requested data are in direct item(s) */
359
	if (!(args & GET_BLOCK_READ_DIRECT)) {
360 361 362 363
		/*
		 * we are called by bmap. FIXME: we can not map block of file
		 * when it is stored in direct item(s)
		 */
364 365 366 367 368 369
		pathrelse(&path);
		if (p)
			kunmap(bh_result->b_page);
		return -ENOENT;
	}

370 371 372 373
	/*
	 * if we've got a direct item, and the buffer or page was uptodate,
	 * we don't want to pull data off disk again.  skip to the
	 * end, where we map the buffer and return
374 375 376 377 378
	 */
	if (buffer_uptodate(bh_result)) {
		goto finished;
	} else
		/*
379 380 381 382
		 * grab_tail_page can trigger calls to reiserfs_get_block on
		 * up to date pages without any buffers.  If the page is up
		 * to date, we don't want read old data off disk.  Set the up
		 * to date bit on the buffer instead and jump to the end
383 384
		 */
	if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
Linus Torvalds's avatar
Linus Torvalds committed
385
		set_buffer_uptodate(bh_result);
386 387
		goto finished;
	}
388
	/* read file tail into part of page */
389
	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_SIZE - 1);
390 391
	copy_item_head(&tmp_ih, ih);

392 393 394 395 396
	/*
	 * we only want to kmap if we are reading the tail into the page.
	 * this is not the common case, so we don't kmap until we are
	 * sure we need to.  But, this means the item might move if
	 * kmap schedules
397
	 */
398
	if (!p)
399
		p = (char *)kmap(bh_result->b_page);
400

401 402 403 404 405 406
	p += offset;
	memset(p, 0, inode->i_sb->s_blocksize);
	do {
		if (!is_direct_le_ih(ih)) {
			BUG();
		}
407 408 409 410 411
		/*
		 * make sure we don't read more bytes than actually exist in
		 * the file.  This can happen in odd cases where i_size isn't
		 * correct, and when direct item padding results in a few
		 * extra bytes at the end of the direct item
412 413 414 415 416 417 418 419 420 421 422
		 */
		if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
			break;
		if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
			chars =
			    inode->i_size - (le_ih_k_offset(ih) - 1) -
			    path.pos_in_item;
			done = 1;
		} else {
			chars = ih_item_len(ih) - path.pos_in_item;
		}
423
		memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars);
424 425 426 427 428 429

		if (done)
			break;

		p += chars;

430 431 432 433 434 435
		/*
		 * we done, if read direct item is not the last item of
		 * node FIXME: we could try to check right delimiting key
		 * to see whether direct item continues in the right
		 * neighbor or rely on i_size
		 */
436 437 438
		if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
			break;

439
		/* update key to look for the next piece */
440 441 442
		set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
		result = search_for_position_by_key(inode->i_sb, &key, &path);
		if (result != POSITION_FOUND)
443
			/* i/o error most likely */
444 445
			break;
		bh = get_last_bh(&path);
446
		ih = tp_item_head(&path);
447 448 449 450 451
	} while (1);

	flush_dcache_page(bh_result->b_page);
	kunmap(bh_result->b_page);

452
finished:
453 454 455 456
	pathrelse(&path);

	if (result == IO_ERROR)
		return -EIO;
Linus Torvalds's avatar
Linus Torvalds committed
457

458 459
	/*
	 * this buffer has valid data, but isn't valid for io.  mapping it to
460 461 462 463 464 465
	 * block #0 tells the rest of reiserfs it just has a tail in it
	 */
	map_bh(bh_result, inode->i_sb, 0);
	set_buffer_uptodate(bh_result);
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
466

467 468 469 470
/*
 * this is called to create file map. So, _get_block_create_0 will not
 * read direct item
 */
471 472
static int reiserfs_bmap(struct inode *inode, sector_t block,
			 struct buffer_head *bh_result, int create)
Linus Torvalds's avatar
Linus Torvalds committed
473
{
474 475 476 477 478 479 480 481
	if (!file_capable(inode, block))
		return -EFBIG;

	reiserfs_write_lock(inode->i_sb);
	/* do not read the direct item */
	_get_block_create_0(inode, block, bh_result, 0);
	reiserfs_write_unlock(inode->i_sb);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
482 483
}

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
/*
 * special version of get_block that is only used by grab_tail_page right
 * now.  It is sent to __block_write_begin, and when you try to get a
 * block past the end of the file (or a block from a hole) it returns
 * -ENOENT instead of a valid buffer.  __block_write_begin expects to
 * be able to do i/o on the buffers returned, unless an error value
 * is also returned.
 *
 * So, this allows __block_write_begin to be used for reading a single block
 * in a page.  Where it does not produce a valid page for holes, or past the
 * end of the file.  This turns out to be exactly what we need for reading
 * tails for conversion.
 *
 * The point of the wrapper is forcing a certain value for create, even
 * though the VFS layer is calling this function with create==1.  If you
 * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
 * don't use this function.
Linus Torvalds's avatar
Linus Torvalds committed
501
*/
502 503 504 505 506
static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
				       struct buffer_head *bh_result,
				       int create)
{
	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
Linus Torvalds's avatar
Linus Torvalds committed
507 508
}

509 510 511 512
/*
 * This is special helper for reiserfs_get_block in case we are executing
 * direct_IO request.
 */
Linus Torvalds's avatar
Linus Torvalds committed
513 514 515 516 517
static int reiserfs_get_blocks_direct_io(struct inode *inode,
					 sector_t iblock,
					 struct buffer_head *bh_result,
					 int create)
{
518 519 520
	int ret;

	bh_result->b_page = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
521

522 523 524 525 526
	/*
	 * We set the b_size before reiserfs_get_block call since it is
	 * referenced in convert_tail_for_hole() that may be called from
	 * reiserfs_get_block()
	 */
527 528 529 530 531 532 533 534 535
	bh_result->b_size = (1 << inode->i_blkbits);

	ret = reiserfs_get_block(inode, iblock, bh_result,
				 create | GET_BLOCK_NO_DANGLE);
	if (ret)
		goto out;

	/* don't allow direct io onto tail pages */
	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
536 537 538
		/*
		 * make sure future calls to the direct io funcs for this
		 * offset in the file fail by unmapping the buffer
539 540 541 542
		 */
		clear_buffer_mapped(bh_result);
		ret = -EINVAL;
	}
543 544 545 546 547

	/*
	 * Possible unpacked tail. Flush the data before pages have
	 * disappeared
	 */
548 549
	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
		int err;
550 551 552

		reiserfs_write_lock(inode->i_sb);

553 554
		err = reiserfs_commit_for_inode(inode);
		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
555 556 557

		reiserfs_write_unlock(inode->i_sb);

558 559 560
		if (err < 0)
			ret = err;
	}
561
out:
562 563
	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
564 565

/*
566 567 568 569 570 571 572 573 574
 * helper function for when reiserfs_get_block is called for a hole
 * but the file tail is still in a direct item
 * bh_result is the buffer head for the hole
 * tail_offset is the offset of the start of the tail in the file
 *
 * This calls prepare_write, which will start a new transaction
 * you should not be in a transaction, or have any paths held when you
 * call this.
 */
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
static int convert_tail_for_hole(struct inode *inode,
				 struct buffer_head *bh_result,
				 loff_t tail_offset)
{
	unsigned long index;
	unsigned long tail_end;
	unsigned long tail_start;
	struct page *tail_page;
	struct page *hole_page = bh_result->b_page;
	int retval = 0;

	if ((tail_offset & (bh_result->b_size - 1)) != 1)
		return -EIO;

	/* always try to read until the end of the block */
590
	tail_start = tail_offset & (PAGE_SIZE - 1);
591 592
	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;

593
	index = tail_offset >> PAGE_SHIFT;
594 595 596 597
	/*
	 * hole_page can be zero in case of direct_io, we are sure
	 * that we cannot get here if we write with O_DIRECT into tail page
	 */
598 599 600 601 602 603 604 605 606 607
	if (!hole_page || index != hole_page->index) {
		tail_page = grab_cache_page(inode->i_mapping, index);
		retval = -ENOMEM;
		if (!tail_page) {
			goto out;
		}
	} else {
		tail_page = hole_page;
	}

608 609 610 611 612 613 614 615 616
	/*
	 * we don't have to make sure the conversion did not happen while
	 * we were locking the page because anyone that could convert
	 * must first take i_mutex.
	 *
	 * We must fix the tail page for writing because it might have buffers
	 * that are mapped, but have a block number of 0.  This indicates tail
	 * data that has been read directly into the page, and
	 * __block_write_begin won't trigger a get_block in this case.
617 618
	 */
	fix_tail_page_for_writing(tail_page);
619 620
	retval = __reiserfs_write_begin(tail_page, tail_start,
				      tail_end - tail_start);
621 622 623 624 625 626 627 628
	if (retval)
		goto unlock;

	/* tail conversion might change the data in the page */
	flush_dcache_page(tail_page);

	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);

629
unlock:
630 631
	if (tail_page != hole_page) {
		unlock_page(tail_page);
632
		put_page(tail_page);
633
	}
634
out:
635
	return retval;
Linus Torvalds's avatar
Linus Torvalds committed
636 637 638
}

static inline int _allocate_block(struct reiserfs_transaction_handle *th,
639
				  sector_t block,
640 641
				  struct inode *inode,
				  b_blocknr_t * allocated_block_nr,
642
				  struct treepath *path, int flags)
643 644 645
{
	BUG_ON(!th->t_trans_id);

Linus Torvalds's avatar
Linus Torvalds committed
646
#ifdef REISERFS_PREALLOCATE
647
	if (!(flags & GET_BLOCK_NO_IMUX)) {
648 649 650
		return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
						  path, block);
	}
Linus Torvalds's avatar
Linus Torvalds committed
651
#endif
652 653
	return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
					 block);
Linus Torvalds's avatar
Linus Torvalds committed
654 655
}

656 657
int reiserfs_get_block(struct inode *inode, sector_t block,
		       struct buffer_head *bh_result, int create)
Linus Torvalds's avatar
Linus Torvalds committed
658
{
659
	int repeat, retval = 0;
660 661
	/* b_blocknr_t is (unsigned) 32 bit int*/
	b_blocknr_t allocated_block_nr = 0;
662 663 664 665 666 667 668 669 670
	INITIALIZE_PATH(path);
	int pos_in_item;
	struct cpu_key key;
	struct buffer_head *bh, *unbh = NULL;
	struct item_head *ih, tmp_ih;
	__le32 *item;
	int done;
	int fs_gen;
	struct reiserfs_transaction_handle *th = NULL;
671 672 673 674 675 676 677 678
	/*
	 * space reserved in transaction batch:
	 * . 3 balancings in direct->indirect conversion
	 * . 1 block involved into reiserfs_update_sd()
	 * XXX in practically impossible worst case direct2indirect()
	 * can incur (much) more than 3 balancings.
	 * quota update for user, group
	 */
679 680 681 682 683 684 685 686
	int jbegin_count =
	    JOURNAL_PER_BALANCE_CNT * 3 + 1 +
	    2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
	int version;
	int dangle = 1;
	loff_t new_offset =
	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;

687
	reiserfs_write_lock(inode->i_sb);
688
	version = get_inode_item_key_version(inode);
Linus Torvalds's avatar
Linus Torvalds committed
689

690
	if (!file_capable(inode, block)) {
691
		reiserfs_write_unlock(inode->i_sb);
692 693 694
		return -EFBIG;
	}

695 696 697
	/*
	 * if !create, we aren't changing the FS, so we don't need to
	 * log anything, so we don't need to start a transaction
698 699 700 701 702 703
	 */
	if (!(create & GET_BLOCK_CREATE)) {
		int ret;
		/* find number of block-th logical block of the file */
		ret = _get_block_create_0(inode, block, bh_result,
					  create | GET_BLOCK_READ_DIRECT);
704
		reiserfs_write_unlock(inode->i_sb);
705 706
		return ret;
	}
707

708 709 710 711 712 713 714 715
	/*
	 * if we're already in a transaction, make sure to close
	 * any new transactions we start in this func
	 */
	if ((create & GET_BLOCK_NO_DANGLE) ||
	    reiserfs_transaction_running(inode->i_sb))
		dangle = 0;

716 717 718 719
	/*
	 * If file is of such a size, that it might have a tail and
	 * tails are enabled  we should mark it as possibly needing
	 * tail packing on close
720 721 722 723 724 725 726 727 728 729
	 */
	if ((have_large_tails(inode->i_sb)
	     && inode->i_size < i_block_size(inode) * 4)
	    || (have_small_tails(inode->i_sb)
		&& inode->i_size < i_block_size(inode)))
		REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;

	/* set the key of the first byte in the 'block'-th block of file */
	make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
730
start_trans:
731 732 733
		th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
		if (!th) {
			retval = -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
734 735
			goto failure;
		}
736 737
		reiserfs_update_inode_transaction(inode);
	}
738
research:
Linus Torvalds's avatar
Linus Torvalds committed
739

740
	retval = search_for_position_by_key(inode->i_sb, &key, &path);
Linus Torvalds's avatar
Linus Torvalds committed
741
	if (retval == IO_ERROR) {
742 743 744 745 746
		retval = -EIO;
		goto failure;
	}

	bh = get_last_bh(&path);
747 748
	ih = tp_item_head(&path);
	item = tp_item_body(&path);
Linus Torvalds's avatar
Linus Torvalds committed
749 750
	pos_in_item = path.pos_in_item;

751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
	fs_gen = get_generation(inode->i_sb);
	copy_item_head(&tmp_ih, ih);

	if (allocation_needed
	    (retval, allocated_block_nr, ih, item, pos_in_item)) {
		/* we have to allocate block for the unformatted node */
		if (!th) {
			pathrelse(&path);
			goto start_trans;
		}

		repeat =
		    _allocate_block(th, block, inode, &allocated_block_nr,
				    &path, create);

766 767 768 769 770
		/*
		 * restart the transaction to give the journal a chance to free
		 * some blocks.  releases the path, so we have to go back to
		 * research if we succeed on the second try
		 */
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
		if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
			SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
			retval = restart_transaction(th, inode, &path);
			if (retval)
				goto failure;
			repeat =
			    _allocate_block(th, block, inode,
					    &allocated_block_nr, NULL, create);

			if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
				goto research;
			}
			if (repeat == QUOTA_EXCEEDED)
				retval = -EDQUOT;
			else
				retval = -ENOSPC;
			goto failure;
		}

		if (fs_changed(fs_gen, inode->i_sb)
		    && item_moved(&tmp_ih, &path)) {
			goto research;
		}
	}

	if (indirect_item_found(retval, ih)) {
		b_blocknr_t unfm_ptr;
798 799 800 801 802
		/*
		 * 'block'-th block is in the file already (there is
		 * corresponding cell in some indirect item). But it may be
		 * zero unformatted node pointer (hole)
		 */
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
		unfm_ptr = get_block_num(item, pos_in_item);
		if (unfm_ptr == 0) {
			/* use allocated block to plug the hole */
			reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
			if (fs_changed(fs_gen, inode->i_sb)
			    && item_moved(&tmp_ih, &path)) {
				reiserfs_restore_prepared_buffer(inode->i_sb,
								 bh);
				goto research;
			}
			set_buffer_new(bh_result);
			if (buffer_dirty(bh_result)
			    && reiserfs_data_ordered(inode->i_sb))
				reiserfs_add_ordered_list(inode, bh_result);
			put_block_num(item, pos_in_item, allocated_block_nr);
			unfm_ptr = allocated_block_nr;
819
			journal_mark_dirty(th, bh);
820 821 822 823 824 825 826 827
			reiserfs_update_sd(th, inode);
		}
		set_block_dev_mapped(bh_result, unfm_ptr, inode);
		pathrelse(&path);
		retval = 0;
		if (!dangle && th)
			retval = reiserfs_end_persistent_transaction(th);

828
		reiserfs_write_unlock(inode->i_sb);
829

830 831 832 833
		/*
		 * the item was found, so new blocks were not added to the file
		 * there is no need to make sure the inode is updated with this
		 * transaction
834 835 836 837 838 839 840 841 842
		 */
		return retval;
	}

	if (!th) {
		pathrelse(&path);
		goto start_trans;
	}

843 844 845 846 847
	/*
	 * desired position is not found or is in the direct item. We have
	 * to append file with holes up to 'block'-th block converting
	 * direct items to indirect one if necessary
	 */
848 849 850 851 852 853 854 855 856 857 858
	done = 0;
	do {
		if (is_statdata_le_ih(ih)) {
			__le32 unp = 0;
			struct cpu_key tmp_key;

			/* indirect item has to be inserted */
			make_le_item_head(&tmp_ih, &key, version, 1,
					  TYPE_INDIRECT, UNFM_P_SIZE,
					  0 /* free_space */ );

859 860 861 862
			/*
			 * we are going to add 'block'-th block to the file.
			 * Use allocated block for that
			 */
863 864 865 866 867 868 869
			if (cpu_key_k_offset(&key) == 1) {
				unp = cpu_to_le32(allocated_block_nr);
				set_block_dev_mapped(bh_result,
						     allocated_block_nr, inode);
				set_buffer_new(bh_result);
				done = 1;
			}
870
			tmp_key = key;	/* ;) */
871 872 873 874 875 876 877 878 879
			set_cpu_key_k_offset(&tmp_key, 1);
			PATH_LAST_POSITION(&path)++;

			retval =
			    reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
						 inode, (char *)&unp);
			if (retval) {
				reiserfs_free_block(th, inode,
						    allocated_block_nr, 1);
880 881 882 883 884
				/*
				 * retval == -ENOSPC, -EDQUOT or -EIO
				 * or -EEXIST
				 */
				goto failure;
885 886 887 888 889 890 891 892
			}
		} else if (is_direct_le_ih(ih)) {
			/* direct item has to be converted */
			loff_t tail_offset;

			tail_offset =
			    ((le_ih_k_offset(ih) -
			      1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
893 894 895 896 897 898

			/*
			 * direct item we just found fits into block we have
			 * to map. Convert it into unformatted node: use
			 * bh_result for the conversion
			 */
899 900 901 902 903 904
			if (tail_offset == cpu_key_k_offset(&key)) {
				set_block_dev_mapped(bh_result,
						     allocated_block_nr, inode);
				unbh = bh_result;
				done = 1;
			} else {
905 906 907 908 909 910
				/*
				 * we have to pad file tail stored in direct
				 * item(s) up to block size and convert it
				 * to unformatted node. FIXME: this should
				 * also get into page cache
				 */
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931

				pathrelse(&path);
				/*
				 * ugly, but we can only end the transaction if
				 * we aren't nested
				 */
				BUG_ON(!th->t_refcount);
				if (th->t_refcount == 1) {
					retval =
					    reiserfs_end_persistent_transaction
					    (th);
					th = NULL;
					if (retval)
						goto failure;
				}

				retval =
				    convert_tail_for_hole(inode, bh_result,
							  tail_offset);
				if (retval) {
					if (retval != -ENOSPC)
932 933 934 935 936 937
						reiserfs_error(inode->i_sb,
							"clm-6004",
							"convert tail failed "
							"inode %lu, error %d",
							inode->i_ino,
							retval);
938
					if (allocated_block_nr) {
939 940 941 942
						/*
						 * the bitmap, the super,
						 * and the stat data == 3
						 */
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
						if (!th)
							th = reiserfs_persistent_transaction(inode->i_sb, 3);
						if (th)
							reiserfs_free_block(th,
									    inode,
									    allocated_block_nr,
									    1);
					}
					goto failure;
				}
				goto research;
			}
			retval =
			    direct2indirect(th, inode, &path, unbh,
					    tail_offset);
			if (retval) {
				reiserfs_unmap_buffer(unbh);
				reiserfs_free_block(th, inode,
						    allocated_block_nr, 1);
				goto failure;
			}
964 965 966 967 968 969 970 971 972
			/*
			 * it is important the set_buffer_uptodate is done
			 * after the direct2indirect.  The buffer might
			 * contain valid data newer than the data on disk
			 * (read by readpage, changed, and then sent here by
			 * writepage).  direct2indirect needs to know if unbh
			 * was already up to date, so it can decide if the
			 * data in unbh needs to be replaced with data from
			 * the disk
973 974 975
			 */
			set_buffer_uptodate(unbh);

976 977 978 979
			/*
			 * unbh->b_page == NULL in case of DIRECT_IO request,
			 * this means buffer will disappear shortly, so it
			 * should not be added to
980 981
			 */
			if (unbh->b_page) {
982 983 984
				/*
				 * we've converted the tail, so we must
				 * flush unbh before the transaction commits
985 986 987
				 */
				reiserfs_add_tail_list(inode, unbh);

988 989 990 991
				/*
				 * mark it dirty now to prevent commit_write
				 * from adding this buffer to the inode's
				 * dirty buffer list
992 993
				 */
				/*
994 995 996 997 998 999
				 * AKPM: changed __mark_buffer_dirty to
				 * mark_buffer_dirty().  It's still atomic,
				 * but it sets the page dirty too, which makes
				 * it eligible for writeback at any time by the
				 * VM (which was also the case with
				 * __mark_buffer_dirty())
1000 1001 1002 1003
				 */
				mark_buffer_dirty(unbh);
			}
		} else {
1004 1005 1006 1007 1008
			/*
			 * append indirect item with holes if needed, when
			 * appending pointer to 'block'-th block use block,
			 * which is already allocated
			 */
1009
			struct cpu_key tmp_key;
1010 1011 1012 1013 1014
			/*
			 * We use this in case we need to allocate
			 * only one block which is a fastpath
			 */
			unp_t unf_single = 0;
1015 1016 1017 1018 1019 1020 1021 1022
			unp_t *un;
			__u64 max_to_insert =
			    MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
			    UNFM_P_SIZE;
			__u64 blocks_needed;

			RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
			       "vs-804: invalid position for append");
1023 1024 1025 1026 1027
			/*
			 * indirect item has to be appended,
			 * set up key of that position
			 * (key type is unimportant)
			 */
1028 1029
			make_cpu_key(&tmp_key, inode,
				     le_key_k_offset(version,
1030
						     &ih->ih_key) +
1031 1032
				     op_bytes_number(ih,
						     inode->i_sb->s_blocksize),
1033
				     TYPE_INDIRECT, 3);
1034

1035 1036
			RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
			       "green-805: invalid offset");
1037 1038 1039 1040 1041 1042 1043 1044 1045
			blocks_needed =
			    1 +
			    ((cpu_key_k_offset(&key) -
			      cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
			     s_blocksize_bits);

			if (blocks_needed == 1) {
				un = &unf_single;
			} else {
1046
				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
1047 1048 1049 1050
				if (!un) {
					un = &unf_single;
					blocks_needed = 1;
					max_to_insert = 0;
1051
				}
1052 1053
			}
			if (blocks_needed <= max_to_insert) {
1054 1055 1056 1057
				/*
				 * we are going to add target block to
				 * the file. Use allocated block for that
				 */
1058 1059 1060 1061 1062 1063 1064 1065
				un[blocks_needed - 1] =
				    cpu_to_le32(allocated_block_nr);
				set_block_dev_mapped(bh_result,
						     allocated_block_nr, inode);
				set_buffer_new(bh_result);
				done = 1;
			} else {
				/* paste hole to the indirect item */
1066 1067 1068 1069 1070
				/*
				 * If kmalloc failed, max_to_insert becomes
				 * zero and it means we only have space for
				 * one block
				 */
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
				blocks_needed =
				    max_to_insert ? max_to_insert : 1;
			}
			retval =
			    reiserfs_paste_into_item(th, &path, &tmp_key, inode,
						     (char *)un,
						     UNFM_P_SIZE *
						     blocks_needed);

			if (blocks_needed != 1)
				kfree(un);

			if (retval) {
				reiserfs_free_block(th, inode,
						    allocated_block_nr, 1);
				goto failure;
			}
			if (!done) {
1089 1090 1091 1092 1093 1094
				/*
				 * We need to mark new file size in case
				 * this function will be interrupted/aborted
				 * later on. And we may do this only for
				 * holes.
				 */
1095 1096 1097 1098
				inode->i_size +=
				    inode->i_sb->s_blocksize * blocks_needed;
			}
		}
Linus Torvalds's avatar
Linus Torvalds committed
1099

1100 1101
		if (done == 1)
			break;
Linus Torvalds's avatar
Linus Torvalds committed
1102

1103 1104 1105 1106 1107 1108 1109
		/*
		 * this loop could log more blocks than we had originally
		 * asked for.  So, we have to allow the transaction to end
		 * if it is too big or too full.  Update the inode so things
		 * are consistent if we crash before the function returns
		 * release the path so that anybody waiting on the path before
		 * ending their transaction will be able to continue.
1110 1111 1112 1113 1114 1115
		 */
		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
			retval = restart_transaction(th, inode, &path);
			if (retval)
				goto failure;
		}
1116 1117 1118 1119
		/*
		 * inserting indirect pointers for a hole can take a
		 * long time.  reschedule if needed and also release the write
		 * lock for others.
1120
		 */
1121
		reiserfs_cond_resched(inode->i_sb);
Linus Torvalds's avatar
Linus Torvalds committed
1122

1123 1124 1125 1126 1127 1128
		retval = search_for_position_by_key(inode->i_sb, &key, &path);
		if (retval == IO_ERROR) {
			retval = -EIO;
			goto failure;
		}
		if (retval == POSITION_FOUND) {
1129
			reiserfs_warning(inode->i_sb, "vs-825",
1130 1131 1132 1133 1134 1135 1136 1137 1138
					 "%K should not be found", &key);
			retval = -EEXIST;
			if (allocated_block_nr)
				reiserfs_free_block(th, inode,
						    allocated_block_nr, 1);
			pathrelse(&path);
			goto failure;
		}
		bh = get_last_bh(&path);
1139 1140
		ih = tp_item_head(&path);
		item = tp_item_body(&path);
1141 1142 1143 1144 1145
		pos_in_item = path.pos_in_item;
	} while (1);

	retval = 0;

1146
failure:
1147 1148 1149 1150 1151 1152 1153 1154 1155
	if (th && (!dangle || (retval && !th->t_trans_id))) {
		int err;
		if (th->t_trans_id)
			reiserfs_update_sd(th, inode);
		err = reiserfs_end_persistent_transaction(th);
		if (err)
			retval = err;
	}

1156
	reiserfs_write_unlock(inode->i_sb);
1157 1158
	reiserfs_check_path(&path);
	return retval;
Linus Torvalds's avatar
Linus Torvalds committed
1159 1160 1161 1162
}

static int
reiserfs_readpages(struct file *file, struct address_space *mapping,
1163
		   struct list_head *pages, unsigned nr_pages)
Linus Torvalds's avatar
Linus Torvalds committed
1164
{
1165
	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
Linus Torvalds's avatar
Linus Torvalds committed
1166 1167
}

1168 1169 1170 1171
/*
 * Compute real number of used bytes by file
 * Following three functions can go away when we'll have enough space in
 * stat item
Linus Torvalds's avatar
Linus Torvalds committed
1172 1173 1174
 */
static int real_space_diff(struct inode *inode, int sd_size)
{
1175 1176 1177 1178 1179 1180
	int bytes;
	loff_t blocksize = inode->i_sb->s_blocksize;

	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
		return sd_size;

1181 1182 1183 1184 1185 1186 1187 1188
	/*
	 * End of file is also in full block with indirect reference, so round
	 * up to the next block.
	 *
	 * there is just no way to know if the tail is actually packed
	 * on the file, so we have to assume it isn't.  When we pack the
	 * tail, we add 4 bytes to pretend there really is an unformatted
	 * node pointer
1189 1190 1191 1192 1193 1194
	 */
	bytes =
	    ((inode->i_size +
	      (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
	    sd_size;
	return bytes;
Linus Torvalds's avatar
Linus Torvalds committed
1195 1196 1197
}

static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1198
					int sd_size)
Linus Torvalds's avatar
Linus Torvalds committed
1199
{
1200 1201 1202 1203 1204 1205
	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
		return inode->i_size +
		    (loff_t) (real_space_diff(inode, sd_size));
	}
	return ((loff_t) real_space_diff(inode, sd_size)) +
	    (((loff_t) blocks) << 9);
Linus Torvalds's avatar
Linus Torvalds committed
1206 1207 1208 1209 1210
}

/* Compute number of blocks used by file in ReiserFS counting */
static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
{
1211 1212 1213 1214 1215 1216 1217 1218
	loff_t bytes = inode_get_bytes(inode);
	loff_t real_space = real_space_diff(inode, sd_size);

	/* keeps fsck and non-quota versions of reiserfs happy */
	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
		bytes += (loff_t) 511;
	}

1219 1220 1221 1222
	/*
	 * files from before the quota patch might i_blocks such that
	 * bytes < real_space.  Deal with that here to prevent it from
	 * going negative.
1223 1224 1225 1226
	 */
	if (bytes < real_space)
		return 0;
	return (bytes - real_space) >> 9;
Linus Torvalds's avatar
Linus Torvalds committed
1227 1228
}

1229 1230 1231 1232 1233 1234
/*
 * BAD: new directories have stat data of new type and all other items
 * of old type. Version stored in the inode says about body items, so
 * in update_stat_data we can not rely on inode, but have to check
 * item version directly
 */
Linus Torvalds's avatar
Linus Torvalds committed
1235

1236
/* called by read_locked_inode */
1237
static void init_inode(struct inode *inode, struct treepath *path)
Linus Torvalds's avatar
Linus Torvalds committed
1238
{
1239 1240 1241 1242 1243
	struct buffer_head *bh;
	struct item_head *ih;
	__u32 rdev;

	bh = PATH_PLAST_BUFFER(path);
1244
	ih = tp_item_head(path);
1245

1246
	copy_key(INODE_PKEY(inode), &ih->ih_key);
1247

1248
	INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
1249 1250 1251 1252 1253
	REISERFS_I(inode)->i_flags = 0;
	REISERFS_I(inode)->i_prealloc_block = 0;
	REISERFS_I(inode)->i_prealloc_count = 0;
	REISERFS_I(inode)->i_trans_id = 0;
	REISERFS_I(inode)->i_jl = NULL;
1254
	reiserfs_init_xattr_rwsem(inode);
1255 1256 1257

	if (stat_data_v1(ih)) {
		struct stat_data_v1 *sd =
1258
		    (struct stat_data_v1 *)ih_item_body(bh, ih);
1259 1260 1261 1262 1263
		unsigned long blocks;

		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
		set_inode_sd_version(inode, STAT_DATA_V1);
		inode->i_mode = sd_v1_mode(sd);
1264
		set_nlink(inode, sd_v1_nlink(sd));
1265 1266
		i_uid_write(inode, sd_v1_uid(sd));
		i_gid_write(inode, sd_v1_gid(sd));
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
		inode->i_size = sd_v1_size(sd);
		inode->i_atime.tv_sec = sd_v1_atime(sd);
		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
		inode->i_ctime.tv_sec = sd_v1_ctime(sd);
		inode->i_atime.tv_nsec = 0;
		inode->i_ctime.tv_nsec = 0;
		inode->i_mtime.tv_nsec = 0;

		inode->i_blocks = sd_v1_blocks(sd);
		inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
		blocks = (inode->i_size + 511) >> 9;
		blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1279 1280 1281 1282 1283 1284 1285 1286 1287

		/*
		 * there was a bug in <=3.5.23 when i_blocks could take
		 * negative values. Starting from 3.5.17 this value could
		 * even be stored in stat data. For such files we set
		 * i_blocks based on file size. Just 2 notes: this can be
		 * wrong for sparse files. On-disk value will be only
		 * updated if file's inode will ever change
		 */
1288 1289 1290
		if (inode->i_blocks > blocks) {
			inode->i_blocks = blocks;
		}
Linus Torvalds's avatar
Linus Torvalds committed
1291

1292 1293 1294
		rdev = sd_v1_rdev(sd);
		REISERFS_I(inode)->i_first_direct_byte =
		    sd_v1_first_direct_byte(sd);
1295 1296 1297 1298

		/*
		 * an early bug in the quota code can give us an odd
		 * number for the block count.  This is incorrect, fix it here.
1299 1300 1301 1302 1303 1304 1305
		 */
		if (inode->i_blocks & 1) {
			inode->i_blocks++;
		}
		inode_set_bytes(inode,
				to_real_used_space(inode, inode->i_blocks,
						   SD_V1_SIZE));
1306 1307 1308 1309
		/*
		 * nopack is initially zero for v1 objects. For v2 objects,
		 * nopack is initialised from sd_attrs
		 */
1310 1311
		REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
	} else {
1312 1313 1314 1315
		/*
		 * new stat data found, but object may have old items
		 * (directories and symlinks)
		 */
1316
		struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih);
1317 1318

		inode->i_mode = sd_v2_mode(sd);
1319
		set_nlink(inode, sd_v2_nlink(sd));
1320
		i_uid_write(inode, sd_v2_uid(sd));
1321
		inode->i_size = sd_v2_size(sd);
1322
		i_gid_write(inode, sd_v2_gid(sd));
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
		inode->i_atime.tv_sec = sd_v2_atime(sd);
		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
		inode->i_ctime.tv_nsec = 0;
		inode->i_mtime.tv_nsec = 0;
		inode->i_atime.tv_nsec = 0;
		inode->i_blocks = sd_v2_blocks(sd);
		rdev = sd_v2_rdev(sd);
		if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
			inode->i_generation =
			    le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
		else
			inode->i_generation = sd_v2_generation(sd);
Linus Torvalds's avatar
Linus Torvalds committed
1336

1337 1338 1339 1340 1341 1342 1343 1344 1345
		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
			set_inode_item_key_version(inode, KEY_FORMAT_3_5);
		else
			set_inode_item_key_version(inode, KEY_FORMAT_3_6);
		REISERFS_I(inode)->i_first_direct_byte = 0;
		set_inode_sd_version(inode, STAT_DATA_V2);
		inode_set_bytes(inode,
				to_real_used_space(inode, inode->i_blocks,
						   SD_V2_SIZE));
1346 1347 1348 1349
		/*
		 * read persistent inode attributes from sd and initialise
		 * generic inode flags from them
		 */
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
		REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
	}

	pathrelse(path);
	if (S_ISREG(inode->i_mode)) {
		inode->i_op = &reiserfs_file_inode_operations;
		inode->i_fop = &reiserfs_file_operations;
		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
	} else if (S_ISDIR(inode->i_mode)) {
		inode->i_op = &reiserfs_dir_inode_operations;
		inode->i_fop = &reiserfs_dir_operations;
	} else if (S_ISLNK(inode->i_mode)) {
		inode->i_op = &reiserfs_symlink_inode_operations;
1364
		inode_nohighmem(inode);
1365 1366 1367 1368 1369 1370 1371
		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
	} else {
		inode->i_blocks = 0;
		inode->i_op = &reiserfs_special_inode_operations;
		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
	}
}
Linus Torvalds's avatar
Linus Torvalds committed
1372

1373
/* update new stat data with inode fields */
1374
static void inode2sd(void *sd, struct inode *inode, loff_t size)
Linus Torvalds's avatar
Linus Torvalds committed
1375
{
1376 1377 1378 1379 1380
	struct stat_data *sd_v2 = (struct stat_data *)sd;
	__u16 flags;

	set_sd_v2_mode(sd_v2, inode->i_mode);
	set_sd_v2_nlink(sd_v2, inode->i_nlink);
1381
	set_sd_v2_uid(sd_v2, i_uid_read(inode));
1382
	set_sd_v2_size(sd_v2, size);
1383
	set_sd_v2_gid(sd_v2, i_gid_read(inode));
1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394
	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
	else
		set_sd_v2_generation(sd_v2, inode->i_generation);
	flags = REISERFS_I(inode)->i_attrs;
	i_attrs_to_sd_attrs(inode, &flags);
	set_sd_v2_attrs(sd_v2, flags);
Linus Torvalds's avatar
Linus Torvalds committed
1395 1396
}

1397
/* used to copy inode's fields to old stat data */
1398
static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
Linus Torvalds's avatar
Linus Torvalds committed
1399
{
1400 1401 1402
	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;

	set_sd_v1_mode(sd_v1, inode->i_mode);
1403 1404
	set_sd_v1_uid(sd_v1, i_uid_read(inode));
	set_sd_v1_gid(sd_v1, i_gid_read(inode));
1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
	set_sd_v1_nlink(sd_v1, inode->i_nlink);
	set_sd_v1_size(sd_v1, size);
	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);

	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
		set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
	else
		set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
Linus Torvalds's avatar
Linus Torvalds committed
1415

1416
	/* Sigh. i_first_direct_byte is back */
1417 1418 1419
	set_sd_v1_first_direct_byte(sd_v1,
				    REISERFS_I(inode)->i_first_direct_byte);
}
Linus Torvalds's avatar
Linus Torvalds committed
1420

1421 1422 1423 1424
/*
 * NOTE, you must prepare the buffer head before sending it here,
 * and then log it after the call
 */
1425
static void update_stat_data(struct treepath *path, struct inode *inode,
1426
			     loff_t size)
Linus Torvalds's avatar
Linus Torvalds committed
1427
{
1428 1429 1430 1431
	struct buffer_head *bh;
	struct item_head *ih;

	bh = PATH_PLAST_BUFFER(path);
1432
	ih = tp_item_head(path);
1433 1434

	if (!is_statdata_le_ih(ih))
1435
		reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1436 1437
			       INODE_PKEY(inode), ih);

1438
	/* path points to old stat data */
1439
	if (stat_data_v1(ih)) {
1440
		inode2sd_v1(ih_item_body(bh, ih), inode, size);
1441
	} else {
1442
		inode2sd(ih_item_body(bh, ih), inode, size);
1443
	}
Linus Torvalds's avatar
Linus Torvalds committed
1444

1445 1446
	return;
}
Linus Torvalds's avatar
Linus Torvalds committed
1447

1448 1449
void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
			     struct inode *inode, loff_t size)
Linus Torvalds's avatar
Linus Torvalds committed
1450
{
1451 1452 1453 1454 1455 1456 1457 1458 1459
	struct cpu_key key;
	INITIALIZE_PATH(path);
	struct buffer_head *bh;
	int fs_gen;
	struct item_head *ih, tmp_ih;
	int retval;

	BUG_ON(!th->t_trans_id);

1460 1461
	/* key type is unimportant */
	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);
1462 1463 1464 1465 1466 1467

	for (;;) {
		int pos;
		/* look for the object's stat data */
		retval = search_item(inode->i_sb, &key, &path);
		if (retval == IO_ERROR) {
1468 1469 1470
			reiserfs_error(inode->i_sb, "vs-13050",
				       "i/o failure occurred trying to "
				       "update %K stat data", &key);
1471 1472 1473 1474 1475 1476 1477 1478 1479
			return;
		}
		if (retval == ITEM_NOT_FOUND) {
			pos = PATH_LAST_POSITION(&path);
			pathrelse(&path);
			if (inode->i_nlink == 0) {
				/*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
				return;
			}
1480 1481 1482
			reiserfs_warning(inode->i_sb, "vs-13060",
					 "stat data of object %k (nlink == %d) "
					 "not found (pos %d)",
1483 1484 1485 1486 1487 1488
					 INODE_PKEY(inode), inode->i_nlink,
					 pos);
			reiserfs_check_path(&path);
			return;
		}

1489 1490 1491 1492
		/*
		 * sigh, prepare_for_journal might schedule.  When it
		 * schedules the FS might change.  We have to detect that,
		 * and loop back to the search if the stat data item has moved
1493 1494
		 */
		bh = get_last_bh(&path);
1495
		ih = tp_item_head(&path);
1496 1497 1498
		copy_item_head(&tmp_ih, ih);
		fs_gen = get_generation(inode->i_sb);
		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1499 1500

		/* Stat_data item has been moved after scheduling. */
1501 1502 1503
		if (fs_changed(fs_gen, inode->i_sb)
		    && item_moved(&tmp_ih, &path)) {
			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1504
			continue;
1505 1506 1507 1508
		}
		break;
	}
	update_stat_data(&path, inode, size);
1509
	journal_mark_dirty(th, bh);
1510 1511
	pathrelse(&path);
	return;
Linus Torvalds's avatar
Linus Torvalds committed
1512 1513
}

1514 1515 1516 1517 1518 1519 1520
/*
 * reiserfs_read_locked_inode is called to read the inode off disk, and it
 * does a make_bad_inode when things go wrong.  But, we need to make sure
 * and clear the key in the private portion of the inode, otherwise a
 * corresponding iput might try to delete whatever object the inode last
 * represented.
 */
1521 1522 1523 1524
static void reiserfs_make_bad_inode(struct inode *inode)
{
	memset(INODE_PKEY(inode), 0, KEY_SIZE);
	make_bad_inode(inode);
Linus Torvalds's avatar
Linus Torvalds committed
1525 1526
}

1527 1528 1529 1530
/*
 * initially this function was derived from minix or ext2's analog and
 * evolved as the prototype did
 */
1531
int reiserfs_init_locked_inode(struct inode *inode, void *p)
Linus Torvalds's avatar
Linus Torvalds committed
1532
{
1533 1534 1535 1536
	struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
	inode->i_ino = args->objectid;
	INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1537 1538
}

1539 1540 1541 1542
/*
 * looks for stat data in the tree, and fills up the fields of in-core
 * inode stat data fields
 */
1543 1544
void reiserfs_read_locked_inode(struct inode *inode,
				struct reiserfs_iget_args *args)
Linus Torvalds's avatar
Linus Torvalds committed
1545
{
1546 1547 1548 1549 1550 1551 1552
	INITIALIZE_PATH(path_to_sd);
	struct cpu_key key;
	unsigned long dirino;
	int retval;

	dirino = args->dirid;

1553 1554 1555 1556
	/*
	 * set version 1, version 2 could be used too, because stat data
	 * key is the same in both versions
	 */
1557