namei.c 84.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 *  linux/fs/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * Some corrections by tytso.
 */

/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
 * lookup logic.
 */
/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
 */

#include <linux/init.h>
18
#include <linux/export.h>
19
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22 23
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
Robert Love's avatar
Robert Love committed
24
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
25 26
#include <linux/personality.h>
#include <linux/security.h>
Mimi Zohar's avatar
Mimi Zohar committed
27
#include <linux/ima.h>
Linus Torvalds's avatar
Linus Torvalds committed
28 29 30
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
31
#include <linux/capability.h>
32
#include <linux/file.h>
33
#include <linux/fcntl.h>
34
#include <linux/device_cgroup.h>
35
#include <linux/fs_struct.h>
36
#include <linux/posix_acl.h>
Linus Torvalds's avatar
Linus Torvalds committed
37 38
#include <asm/uaccess.h>

39
#include "internal.h"
40
#include "mount.h"
41

Linus Torvalds's avatar
Linus Torvalds committed
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
/* [Feb-1997 T. Schoebel-Theuer]
 * Fundamental changes in the pathname lookup mechanisms (namei)
 * were necessary because of omirr.  The reason is that omirr needs
 * to know the _real_ pathname, not the user-supplied one, in case
 * of symlinks (and also when transname replacements occur).
 *
 * The new code replaces the old recursive symlink resolution with
 * an iterative one (in case of non-nested symlink chains).  It does
 * this with calls to <fs>_follow_link().
 * As a side effect, dir_namei(), _namei() and follow_link() are now 
 * replaced with a single function lookup_dentry() that can handle all 
 * the special cases of the former code.
 *
 * With the new dcache, the pathname is stored at each inode, at least as
 * long as the refcount of the inode is positive.  As a side effect, the
 * size of the dcache depends on the inode cache and thus is dynamic.
 *
 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
 * resolution to correspond with current state of the code.
 *
 * Note that the symlink resolution is not *completely* iterative.
 * There is still a significant amount of tail- and mid- recursion in
 * the algorithm.  Also, note that <fs>_readlink() is not used in
 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
 * may return different results than <fs>_follow_link().  Many virtual
 * filesystems (including /proc) exhibit this behavior.
 */

/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
 * and the name already exists in form of a symlink, try to create the new
 * name indicated by the symlink. The old code always complained that the
 * name already exists, due to not following the symlink even if its target
 * is nonexistent.  The new semantics affects also mknod() and link() when
Lucas De Marchi's avatar
Lucas De Marchi committed
76
 * the name is a symlink pointing to a non-existent name.
Linus Torvalds's avatar
Linus Torvalds committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
 *
 * I don't know which semantics is the right one, since I have no access
 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
 * "old" one. Personally, I think the new semantics is much more logical.
 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
 * file does succeed in both HP-UX and SunOs, but not in Solaris
 * and in the old Linux semantics.
 */

/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
 * semantics.  See the comments in "open_namei" and "do_link" below.
 *
 * [10-Sep-98 Alan Modra] Another symlink change.
 */

/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
 *	inside the path - always follow.
 *	in the last component in creation/removal/renaming - never follow.
 *	if LOOKUP_FOLLOW passed - follow.
 *	if the pathname has trailing slashes - follow.
 *	otherwise - don't follow.
 * (applied in that order).
 *
 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
 * During the 2.4 we need to fix the userland stuff depending on it -
 * hopefully we will be able to get rid of that wart in 2.5. So far only
 * XEmacs seems to be relying on it...
 */
/*
 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
109
 * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
Linus Torvalds's avatar
Linus Torvalds committed
110 111 112 113 114 115 116 117 118 119
 * any extra contention...
 */

/* In order to reduce some races, while at the same time doing additional
 * checking and hopefully speeding things up, we copy filenames to the
 * kernel data space before using them..
 *
 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 * PATH_MAX includes the nul terminator --RR.
 */
120
static char *getname_flags(const char __user *filename, int flags, int *empty)
Linus Torvalds's avatar
Linus Torvalds committed
121
{
122 123
	char *result = __getname(), *err;
	int len;
124

125
	if (unlikely(!result))
126 127
		return ERR_PTR(-ENOMEM);

128 129 130 131 132 133 134 135
	len = strncpy_from_user(result, filename, PATH_MAX);
	err = ERR_PTR(len);
	if (unlikely(len < 0))
		goto error;

	/* The empty path is special. */
	if (unlikely(!len)) {
		if (empty)
136
			*empty = 1;
137 138 139
		err = ERR_PTR(-ENOENT);
		if (!(flags & LOOKUP_EMPTY))
			goto error;
Linus Torvalds's avatar
Linus Torvalds committed
140
	}
141 142 143 144 145 146 147 148 149 150

	err = ERR_PTR(-ENAMETOOLONG);
	if (likely(len < PATH_MAX)) {
		audit_getname(result);
		return result;
	}

error:
	__putname(result);
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
151 152
}

Al Viro's avatar
Al Viro committed
153 154
char *getname(const char __user * filename)
{
155
	return getname_flags(filename, 0, NULL);
Al Viro's avatar
Al Viro committed
156 157
}

Linus Torvalds's avatar
Linus Torvalds committed
158 159 160
#ifdef CONFIG_AUDITSYSCALL
void putname(const char *name)
{
161
	if (unlikely(!audit_dummy_context()))
Linus Torvalds's avatar
Linus Torvalds committed
162 163 164 165 166 167 168
		audit_putname(name);
	else
		__putname(name);
}
EXPORT_SYMBOL(putname);
#endif

169 170
static int check_acl(struct inode *inode, int mask)
{
171
#ifdef CONFIG_FS_POSIX_ACL
172 173 174
	struct posix_acl *acl;

	if (mask & MAY_NOT_BLOCK) {
175 176
		acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
	        if (!acl)
177
	                return -EAGAIN;
178 179 180
		/* no ->get_acl() calls in RCU mode... */
		if (acl == ACL_NOT_CACHED)
			return -ECHILD;
181
	        return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
182 183 184 185 186
	}

	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);

	/*
187 188 189
	 * A filesystem can force a ACL callback by just never filling the
	 * ACL cache. But normally you'd fill the cache either at inode
	 * instantiation time, or on the first ->get_acl call.
190
	 *
191 192
	 * If the filesystem doesn't have a get_acl() function at all, we'll
	 * just create the negative cache entry.
193 194
	 */
	if (acl == ACL_NOT_CACHED) {
195 196 197 198 199 200 201 202
	        if (inode->i_op->get_acl) {
			acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS);
			if (IS_ERR(acl))
				return PTR_ERR(acl);
		} else {
		        set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
		        return -EAGAIN;
		}
203 204 205 206 207 208 209
	}

	if (acl) {
	        int error = posix_acl_permission(inode, acl, mask);
	        posix_acl_release(acl);
	        return error;
	}
210
#endif
211 212 213 214

	return -EAGAIN;
}

215
/*
216
 * This does the basic permission checking
Linus Torvalds's avatar
Linus Torvalds committed
217
 */
218
static int acl_permission_check(struct inode *inode, int mask)
Linus Torvalds's avatar
Linus Torvalds committed
219
{
220
	unsigned int mode = inode->i_mode;
Linus Torvalds's avatar
Linus Torvalds committed
221

222
	if (likely(uid_eq(current_fsuid(), inode->i_uid)))
Linus Torvalds's avatar
Linus Torvalds committed
223 224
		mode >>= 6;
	else {
225
		if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
226
			int error = check_acl(inode, mask);
227 228
			if (error != -EAGAIN)
				return error;
Linus Torvalds's avatar
Linus Torvalds committed
229 230 231 232 233 234 235 236 237
		}

		if (in_group_p(inode->i_gid))
			mode >>= 3;
	}

	/*
	 * If the DACs are ok we don't need any capability check.
	 */
238
	if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
Linus Torvalds's avatar
Linus Torvalds committed
239
		return 0;
240 241 242 243
	return -EACCES;
}

/**
244
 * generic_permission -  check for access rights on a Posix-like filesystem
245
 * @inode:	inode to check access rights for
246
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
247 248 249 250
 *
 * Used to check for read/write/execute permissions on a file.
 * We use "fsuid" for this, letting us set arbitrary permissions
 * for filesystem access without changing the "normal" uids which
251 252 253 254 255
 * are used for other things.
 *
 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
 * request cannot be satisfied (eg. requires blocking or too much complexity).
 * It would then be called again in ref-walk mode.
256
 */
257
int generic_permission(struct inode *inode, int mask)
258 259 260 261
{
	int ret;

	/*
262
	 * Do the basic permission checks.
263
	 */
264
	ret = acl_permission_check(inode, mask);
265 266
	if (ret != -EACCES)
		return ret;
Linus Torvalds's avatar
Linus Torvalds committed
267

268 269
	if (S_ISDIR(inode->i_mode)) {
		/* DACs are overridable for directories */
270
		if (inode_capable(inode, CAP_DAC_OVERRIDE))
271 272
			return 0;
		if (!(mask & MAY_WRITE))
273
			if (inode_capable(inode, CAP_DAC_READ_SEARCH))
274 275 276
				return 0;
		return -EACCES;
	}
Linus Torvalds's avatar
Linus Torvalds committed
277 278
	/*
	 * Read/write DACs are always overridable.
279 280
	 * Executable DACs are overridable when there is
	 * at least one exec bit set.
Linus Torvalds's avatar
Linus Torvalds committed
281
	 */
282
	if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
283
		if (inode_capable(inode, CAP_DAC_OVERRIDE))
Linus Torvalds's avatar
Linus Torvalds committed
284 285 286 287 288
			return 0;

	/*
	 * Searching includes executable on directories, else just read.
	 */
289
	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
290
	if (mask == MAY_READ)
291
		if (inode_capable(inode, CAP_DAC_READ_SEARCH))
Linus Torvalds's avatar
Linus Torvalds committed
292 293 294 295 296
			return 0;

	return -EACCES;
}

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
/*
 * We _really_ want to just do "generic_permission()" without
 * even looking at the inode->i_op values. So we keep a cache
 * flag in inode->i_opflags, that says "this has not special
 * permission function, use the fast case".
 */
static inline int do_inode_permission(struct inode *inode, int mask)
{
	if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
		if (likely(inode->i_op->permission))
			return inode->i_op->permission(inode, mask);

		/* This gets set once for the inode lifetime */
		spin_lock(&inode->i_lock);
		inode->i_opflags |= IOP_FASTPERM;
		spin_unlock(&inode->i_lock);
	}
	return generic_permission(inode, mask);
}

Christoph Hellwig's avatar
Christoph Hellwig committed
317 318 319
/**
 * inode_permission  -  check for access rights to a given inode
 * @inode:	inode to check permission on
320
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
Christoph Hellwig's avatar
Christoph Hellwig committed
321 322 323 324 325
 *
 * Used to check for read/write/execute permissions on an inode.
 * We use "fsuid" for this, letting us set arbitrary permissions
 * for filesystem access without changing the "normal" uids which
 * are used for other things.
326 327
 *
 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
Christoph Hellwig's avatar
Christoph Hellwig committed
328
 */
329
int inode_permission(struct inode *inode, int mask)
Linus Torvalds's avatar
Linus Torvalds committed
330
{
331
	int retval;
Linus Torvalds's avatar
Linus Torvalds committed
332

333
	if (unlikely(mask & MAY_WRITE)) {
334
		umode_t mode = inode->i_mode;
Linus Torvalds's avatar
Linus Torvalds committed
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349

		/*
		 * Nobody gets write access to a read-only fs.
		 */
		if (IS_RDONLY(inode) &&
		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
			return -EROFS;

		/*
		 * Nobody gets write access to an immutable file.
		 */
		if (IS_IMMUTABLE(inode))
			return -EACCES;
	}

350
	retval = do_inode_permission(inode, mask);
Linus Torvalds's avatar
Linus Torvalds committed
351 352 353
	if (retval)
		return retval;

354 355 356 357
	retval = devcgroup_inode_permission(inode, mask);
	if (retval)
		return retval;

358
	return security_inode_permission(inode, mask);
Linus Torvalds's avatar
Linus Torvalds committed
359 360
}

Jan Blunck's avatar
Jan Blunck committed
361 362 363 364 365 366 367 368 369 370 371 372 373
/**
 * path_get - get a reference to a path
 * @path: path to get the reference to
 *
 * Given a path increment the reference count to the dentry and the vfsmount.
 */
void path_get(struct path *path)
{
	mntget(path->mnt);
	dget(path->dentry);
}
EXPORT_SYMBOL(path_get);

Jan Blunck's avatar
Jan Blunck committed
374 375 376 377 378 379 380
/**
 * path_put - put a reference to a path
 * @path: path to put the reference to
 *
 * Given a path decrement the reference count to the dentry and the vfsmount.
 */
void path_put(struct path *path)
Linus Torvalds's avatar
Linus Torvalds committed
381
{
Jan Blunck's avatar
Jan Blunck committed
382 383
	dput(path->dentry);
	mntput(path->mnt);
Linus Torvalds's avatar
Linus Torvalds committed
384
}
Jan Blunck's avatar
Jan Blunck committed
385
EXPORT_SYMBOL(path_put);
Linus Torvalds's avatar
Linus Torvalds committed
386

387
/*
Nick Piggin's avatar
Nick Piggin committed
388
 * Path walking has 2 modes, rcu-walk and ref-walk (see
389 390 391 392 393 394 395
 * Documentation/filesystems/path-lookup.txt).  In situations when we can't
 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
 * normal reference counts on dentries and vfsmounts to transition to rcu-walk
 * mode.  Refcounts are grabbed at the last known good point before rcu-walk
 * got stuck, so ref-walk may continue from there. If this is not successful
 * (eg. a seqcount has changed), then failure is returned and it's up to caller
 * to restart the path walk from the beginning in ref-walk mode.
Nick Piggin's avatar
Nick Piggin committed
396 397 398
 */

/**
399 400 401
 * unlazy_walk - try to switch to ref-walk mode.
 * @nd: nameidata pathwalk data
 * @dentry: child of nd->path.dentry or NULL
402
 * Returns: 0 on success, -ECHILD on failure
Nick Piggin's avatar
Nick Piggin committed
403
 *
404 405 406
 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
 * for ref-walk mode.  @dentry must be a path found by a do_lookup call on
 * @nd or NULL.  Must be called from rcu-walk context.
Nick Piggin's avatar
Nick Piggin committed
407
 */
408
static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
Nick Piggin's avatar
Nick Piggin committed
409 410 411
{
	struct fs_struct *fs = current->fs;
	struct dentry *parent = nd->path.dentry;
412
	int want_root = 0;
Nick Piggin's avatar
Nick Piggin committed
413 414

	BUG_ON(!(nd->flags & LOOKUP_RCU));
415 416
	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
		want_root = 1;
Nick Piggin's avatar
Nick Piggin committed
417 418 419 420 421 422
		spin_lock(&fs->lock);
		if (nd->root.mnt != fs->root.mnt ||
				nd->root.dentry != fs->root.dentry)
			goto err_root;
	}
	spin_lock(&parent->d_lock);
423 424 425 426 427
	if (!dentry) {
		if (!__d_rcu_to_refcount(parent, nd->seq))
			goto err_parent;
		BUG_ON(nd->inode != parent->d_inode);
	} else {
428 429
		if (dentry->d_parent != parent)
			goto err_parent;
430 431 432 433 434 435 436 437 438 439 440 441 442 443
		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
		if (!__d_rcu_to_refcount(dentry, nd->seq))
			goto err_child;
		/*
		 * If the sequence check on the child dentry passed, then
		 * the child has not been removed from its parent. This
		 * means the parent dentry must be valid and able to take
		 * a reference at this point.
		 */
		BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
		BUG_ON(!parent->d_count);
		parent->d_count++;
		spin_unlock(&dentry->d_lock);
	}
Nick Piggin's avatar
Nick Piggin committed
444
	spin_unlock(&parent->d_lock);
445
	if (want_root) {
Nick Piggin's avatar
Nick Piggin committed
446 447 448 449 450 451 452 453 454
		path_get(&nd->root);
		spin_unlock(&fs->lock);
	}
	mntget(nd->path.mnt);

	rcu_read_unlock();
	br_read_unlock(vfsmount_lock);
	nd->flags &= ~LOOKUP_RCU;
	return 0;
455 456

err_child:
Nick Piggin's avatar
Nick Piggin committed
457
	spin_unlock(&dentry->d_lock);
458
err_parent:
Nick Piggin's avatar
Nick Piggin committed
459 460
	spin_unlock(&parent->d_lock);
err_root:
461
	if (want_root)
Nick Piggin's avatar
Nick Piggin committed
462 463 464 465
		spin_unlock(&fs->lock);
	return -ECHILD;
}

466 467 468 469 470 471
/**
 * release_open_intent - free up open intent resources
 * @nd: pointer to nameidata
 */
void release_open_intent(struct nameidata *nd)
{
472 473 474 475 476 477 478 479
	struct file *file = nd->intent.open.file;

	if (file && !IS_ERR(file)) {
		if (file->f_path.dentry == NULL)
			put_filp(file);
		else
			fput(file);
	}
480 481
}

Al Viro's avatar
Al Viro committed
482
static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
483
{
Al Viro's avatar
Al Viro committed
484
	return dentry->d_op->d_revalidate(dentry, nd);
485 486
}

487 488 489
/**
 * complete_walk - successful completion of path walk
 * @nd:  pointer nameidata
490
 *
491 492 493 494 495
 * If we had been in RCU mode, drop out of it and legitimize nd->path.
 * Revalidate the final result, unless we'd already done that during
 * the path walk or the filesystem doesn't ask for it.  Return 0 on
 * success, -error on failure.  In case of failure caller does not
 * need to drop nd->path.
496
 */
497
static int complete_walk(struct nameidata *nd)
498
{
Al Viro's avatar
Al Viro committed
499
	struct dentry *dentry = nd->path.dentry;
500 501
	int status;

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
	if (nd->flags & LOOKUP_RCU) {
		nd->flags &= ~LOOKUP_RCU;
		if (!(nd->flags & LOOKUP_ROOT))
			nd->root.mnt = NULL;
		spin_lock(&dentry->d_lock);
		if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
			spin_unlock(&dentry->d_lock);
			rcu_read_unlock();
			br_read_unlock(vfsmount_lock);
			return -ECHILD;
		}
		BUG_ON(nd->inode != dentry->d_inode);
		spin_unlock(&dentry->d_lock);
		mntget(nd->path.mnt);
		rcu_read_unlock();
		br_read_unlock(vfsmount_lock);
	}

Al Viro's avatar
Al Viro committed
520 521 522 523
	if (likely(!(nd->flags & LOOKUP_JUMPED)))
		return 0;

	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
524 525
		return 0;

Al Viro's avatar
Al Viro committed
526 527 528 529
	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
		return 0;

	/* Note: we do not d_invalidate() */
530
	status = d_revalidate(dentry, nd);
531 532 533
	if (status > 0)
		return 0;

Al Viro's avatar
Al Viro committed
534
	if (!status)
535
		status = -ESTALE;
Al Viro's avatar
Al Viro committed
536

537
	path_put(&nd->path);
538 539 540
	return status;
}

Al Viro's avatar
Al Viro committed
541 542
static __always_inline void set_root(struct nameidata *nd)
{
543 544
	if (!nd->root.mnt)
		get_fs_root(current->fs, &nd->root);
Al Viro's avatar
Al Viro committed
545 546
}

547 548
static int link_path_walk(const char *, struct nameidata *);

Nick Piggin's avatar
Nick Piggin committed
549 550 551 552
static __always_inline void set_root_rcu(struct nameidata *nd)
{
	if (!nd->root.mnt) {
		struct fs_struct *fs = current->fs;
Nick Piggin's avatar
Nick Piggin committed
553 554 555 556 557
		unsigned seq;

		do {
			seq = read_seqcount_begin(&fs->seq);
			nd->root = fs->root;
558
			nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
Nick Piggin's avatar
Nick Piggin committed
559
		} while (read_seqcount_retry(&fs->seq, seq));
Nick Piggin's avatar
Nick Piggin committed
560 561 562
	}
}

563
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
Linus Torvalds's avatar
Linus Torvalds committed
564
{
Nick Piggin's avatar
Nick Piggin committed
565 566
	int ret;

Linus Torvalds's avatar
Linus Torvalds committed
567 568 569 570
	if (IS_ERR(link))
		goto fail;

	if (*link == '/') {
Al Viro's avatar
Al Viro committed
571
		set_root(nd);
Jan Blunck's avatar
Jan Blunck committed
572
		path_put(&nd->path);
Al Viro's avatar
Al Viro committed
573 574
		nd->path = nd->root;
		path_get(&nd->root);
Al Viro's avatar
Al Viro committed
575
		nd->flags |= LOOKUP_JUMPED;
Linus Torvalds's avatar
Linus Torvalds committed
576
	}
Nick Piggin's avatar
Nick Piggin committed
577
	nd->inode = nd->path.dentry->d_inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
578

Nick Piggin's avatar
Nick Piggin committed
579 580
	ret = link_path_walk(link, nd);
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
581
fail:
Jan Blunck's avatar
Jan Blunck committed
582
	path_put(&nd->path);
Linus Torvalds's avatar
Linus Torvalds committed
583 584 585
	return PTR_ERR(link);
}

Jan Blunck's avatar
Jan Blunck committed
586
static void path_put_conditional(struct path *path, struct nameidata *nd)
587 588
{
	dput(path->dentry);
589
	if (path->mnt != nd->path.mnt)
590 591 592
		mntput(path->mnt);
}

593 594
static inline void path_to_nameidata(const struct path *path,
					struct nameidata *nd)
595
{
Nick Piggin's avatar
Nick Piggin committed
596 597 598 599
	if (!(nd->flags & LOOKUP_RCU)) {
		dput(nd->path.dentry);
		if (nd->path.mnt != path->mnt)
			mntput(nd->path.mnt);
600
	}
Nick Piggin's avatar
Nick Piggin committed
601
	nd->path.mnt = path->mnt;
602
	nd->path.dentry = path->dentry;
603 604
}

605 606 607 608 609 610 611 612
static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
{
	struct inode *inode = link->dentry->d_inode;
	if (!IS_ERR(cookie) && inode->i_op->put_link)
		inode->i_op->put_link(link->dentry, nd, cookie);
	path_put(link);
}

Al Viro's avatar
Al Viro committed
613
static __always_inline int
614
follow_link(struct path *link, struct nameidata *nd, void **p)
Linus Torvalds's avatar
Linus Torvalds committed
615 616
{
	int error;
617
	struct dentry *dentry = link->dentry;
Linus Torvalds's avatar
Linus Torvalds committed
618

619 620
	BUG_ON(nd->flags & LOOKUP_RCU);

Al Viro's avatar
Al Viro committed
621 622 623
	if (link->mnt == nd->path.mnt)
		mntget(link->mnt);

624 625 626 627 628 629 630 631
	if (unlikely(current->total_link_count >= 40)) {
		*p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
		path_put(&nd->path);
		return -ELOOP;
	}
	cond_resched();
	current->total_link_count++;

Al Viro's avatar
Al Viro committed
632
	touch_atime(link);
Linus Torvalds's avatar
Linus Torvalds committed
633
	nd_set_link(nd, NULL);
Al Viro's avatar
Al Viro committed
634

635 636 637 638 639 640 641
	error = security_inode_follow_link(link->dentry, nd);
	if (error) {
		*p = ERR_PTR(error); /* no ->put_link(), please */
		path_put(&nd->path);
		return error;
	}

642
	nd->last_type = LAST_BIND;
Al Viro's avatar
Al Viro committed
643 644 645
	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
	error = PTR_ERR(*p);
	if (!IS_ERR(*p)) {
Linus Torvalds's avatar
Linus Torvalds committed
646
		char *s = nd_get_link(nd);
647
		error = 0;
Linus Torvalds's avatar
Linus Torvalds committed
648 649
		if (s)
			error = __vfs_follow_link(nd, s);
Al Viro's avatar
Al Viro committed
650
		else if (nd->last_type == LAST_BIND) {
Al Viro's avatar
Al Viro committed
651
			nd->flags |= LOOKUP_JUMPED;
652 653
			nd->inode = nd->path.dentry->d_inode;
			if (nd->inode->i_op->follow_link) {
Al Viro's avatar
Al Viro committed
654 655 656 657 658
				/* stepped on a _really_ weird one */
				path_put(&nd->path);
				error = -ELOOP;
			}
		}
Linus Torvalds's avatar
Linus Torvalds committed
659 660 661 662
	}
	return error;
}

Nick Piggin's avatar
Nick Piggin committed
663 664
static int follow_up_rcu(struct path *path)
{
665 666
	struct mount *mnt = real_mount(path->mnt);
	struct mount *parent;
Nick Piggin's avatar
Nick Piggin committed
667 668
	struct dentry *mountpoint;

669 670
	parent = mnt->mnt_parent;
	if (&parent->mnt == path->mnt)
Nick Piggin's avatar
Nick Piggin committed
671
		return 0;
672
	mountpoint = mnt->mnt_mountpoint;
Nick Piggin's avatar
Nick Piggin committed
673
	path->dentry = mountpoint;
674
	path->mnt = &parent->mnt;
Nick Piggin's avatar
Nick Piggin committed
675 676 677
	return 1;
}

Al Viro's avatar
Al Viro committed
678
int follow_up(struct path *path)
Linus Torvalds's avatar
Linus Torvalds committed
679
{
680 681
	struct mount *mnt = real_mount(path->mnt);
	struct mount *parent;
Linus Torvalds's avatar
Linus Torvalds committed
682
	struct dentry *mountpoint;
Nick Piggin's avatar
Nick Piggin committed
683 684

	br_read_lock(vfsmount_lock);
685 686
	parent = mnt->mnt_parent;
	if (&parent->mnt == path->mnt) {
Nick Piggin's avatar
Nick Piggin committed
687
		br_read_unlock(vfsmount_lock);
Linus Torvalds's avatar
Linus Torvalds committed
688 689
		return 0;
	}
690
	mntget(&parent->mnt);
691
	mountpoint = dget(mnt->mnt_mountpoint);
Nick Piggin's avatar
Nick Piggin committed
692
	br_read_unlock(vfsmount_lock);
Al Viro's avatar
Al Viro committed
693 694 695
	dput(path->dentry);
	path->dentry = mountpoint;
	mntput(path->mnt);
696
	path->mnt = &parent->mnt;
Linus Torvalds's avatar
Linus Torvalds committed
697 698 699
	return 1;
}

Nick Piggin's avatar
Nick Piggin committed
700
/*
701 702 703
 * Perform an automount
 * - return -EISDIR to tell follow_managed() to stop and return the path we
 *   were called with.
Linus Torvalds's avatar
Linus Torvalds committed
704
 */
705 706
static int follow_automount(struct path *path, unsigned flags,
			    bool *need_mntput)
Nick Piggin's avatar
Nick Piggin committed
707
{
708
	struct vfsmount *mnt;
709
	int err;
710 711 712 713

	if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
		return -EREMOTE;

714 715 716 717 718 719 720 721 722 723
	/* We don't want to mount if someone's just doing a stat -
	 * unless they're stat'ing a directory and appended a '/' to
	 * the name.
	 *
	 * We do, however, want to mount if someone wants to open or
	 * create a file of any type under the mountpoint, wants to
	 * traverse through the mountpoint or wants to open the
	 * mounted directory.  Also, autofs may mark negative dentries
	 * as being automount points.  These will need the attentions
	 * of the daemon to instantiate them before they can be used.
724
	 */
725
	if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
726
		     LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
727 728 729
	    path->dentry->d_inode)
		return -EISDIR;

730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
	current->total_link_count++;
	if (current->total_link_count >= 40)
		return -ELOOP;

	mnt = path->dentry->d_op->d_automount(path);
	if (IS_ERR(mnt)) {
		/*
		 * The filesystem is allowed to return -EISDIR here to indicate
		 * it doesn't want to automount.  For instance, autofs would do
		 * this so that its userspace daemon can mount on this dentry.
		 *
		 * However, we can only permit this if it's a terminal point in
		 * the path being looked up; if it wasn't then the remainder of
		 * the path is inaccessible and we should say so.
		 */
Al Viro's avatar
Al Viro committed
745
		if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
746 747
			return -EREMOTE;
		return PTR_ERR(mnt);
Nick Piggin's avatar
Nick Piggin committed
748
	}
749

750 751
	if (!mnt) /* mount collision */
		return 0;
Nick Piggin's avatar
Nick Piggin committed
752

753 754 755 756 757
	if (!*need_mntput) {
		/* lock_mount() may release path->mnt on error */
		mntget(path->mnt);
		*need_mntput = true;
	}
758
	err = finish_automount(mnt, path);
759

760 761 762
	switch (err) {
	case -EBUSY:
		/* Someone else made a mount here whilst we were busy */
763
		return 0;
764
	case 0:
765
		path_put(path);
766 767 768
		path->mnt = mnt;
		path->dentry = dget(mnt->mnt_root);
		return 0;
769 770
	default:
		return err;
771
	}
772

Al Viro's avatar
Al Viro committed
773 774
}

775 776
/*
 * Handle a dentry that is managed in some way.
777
 * - Flagged for transit management (autofs)
778 779 780 781 782 783 784 785
 * - Flagged as mountpoint
 * - Flagged as automount point
 *
 * This may only be called in refwalk mode.
 *
 * Serialization is taken care of in namespace.c
 */
static int follow_managed(struct path *path, unsigned flags)
Linus Torvalds's avatar
Linus Torvalds committed
786
{
787
	struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
788 789
	unsigned managed;
	bool need_mntput = false;
790
	int ret = 0;
791 792 793 794 795 796 797

	/* Given that we're not holding a lock here, we retain the value in a
	 * local variable for each dentry as we look at it so that we don't see
	 * the components of that value change under us */
	while (managed = ACCESS_ONCE(path->dentry->d_flags),
	       managed &= DCACHE_MANAGED_DENTRY,
	       unlikely(managed != 0)) {
798 799 800 801 802
		/* Allow the filesystem to manage the transit without i_mutex
		 * being held. */
		if (managed & DCACHE_MANAGE_TRANSIT) {
			BUG_ON(!path->dentry->d_op);
			BUG_ON(!path->dentry->d_op->d_manage);
803
			ret = path->dentry->d_op->d_manage(path->dentry, false);
804
			if (ret < 0)
805
				break;
806 807
		}

808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
		/* Transit to a mounted filesystem. */
		if (managed & DCACHE_MOUNTED) {
			struct vfsmount *mounted = lookup_mnt(path);
			if (mounted) {
				dput(path->dentry);
				if (need_mntput)
					mntput(path->mnt);
				path->mnt = mounted;
				path->dentry = dget(mounted->mnt_root);
				need_mntput = true;
				continue;
			}

			/* Something is mounted on this dentry in another
			 * namespace and/or whatever was mounted there in this
			 * namespace got unmounted before we managed to get the
			 * vfsmount_lock */
		}

		/* Handle an automount point */
		if (managed & DCACHE_NEED_AUTOMOUNT) {
			ret = follow_automount(path, flags, &need_mntput);
			if (ret < 0)
831
				break;
832 833 834 835 836
			continue;
		}

		/* We didn't change the current path point */
		break;
Linus Torvalds's avatar
Linus Torvalds committed
837
	}
838 839 840 841 842

	if (need_mntput && path->mnt == mnt)
		mntput(path->mnt);
	if (ret == -EISDIR)
		ret = 0;
843
	return ret < 0 ? ret : need_mntput;
Linus Torvalds's avatar
Linus Torvalds committed
844 845
}

846
int follow_down_one(struct path *path)
Linus Torvalds's avatar
Linus Torvalds committed
847 848 849
{
	struct vfsmount *mounted;

Al Viro's avatar
Al Viro committed
850
	mounted = lookup_mnt(path);
Linus Torvalds's avatar
Linus Torvalds committed
851
	if (mounted) {
Al Viro's avatar
Al Viro committed
852 853 854 855
		dput(path->dentry);
		mntput(path->mnt);
		path->mnt = mounted;
		path->dentry = dget(mounted->mnt_root);
Linus Torvalds's avatar
Linus Torvalds committed
856 857 858 859 860
		return 1;
	}
	return 0;
}

861 862 863 864 865 866
static inline bool managed_dentry_might_block(struct dentry *dentry)
{
	return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
		dentry->d_op->d_manage(dentry, true) < 0);
}

867
/*
868 869
 * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
 * we meet a managed dentry that would need blocking.
870 871
 */
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
872
			       struct inode **inode)
873
{
874
	for (;;) {
875
		struct mount *mounted;
876 877 878 879
		/*
		 * Don't forget we might have a non-mountpoint managed dentry
		 * that wants to block transit.
		 */
880
		if (unlikely(managed_dentry_might_block(path->dentry)))
881
			return false;
882 883 884 885

		if (!d_mountpoint(path->dentry))
			break;

886 887 888
		mounted = __lookup_mnt(path->mnt, path->dentry, 1);
		if (!mounted)
			break;
889 890
		path->mnt = &mounted->mnt;
		path->dentry = mounted->mnt.mnt_root;
891
		nd->flags |= LOOKUP_JUMPED;
892
		nd->seq = read_seqcount_begin(&path->dentry->d_seq);
893 894 895 896 897 898
		/*
		 * Update the inode too. We don't need to re-check the
		 * dentry sequence number here after this d_inode read,
		 * because a mount-point is always pinned.
		 */
		*inode = path->dentry->d_inode;
899 900 901 902
	}
	return true;
}

903
static void follow_mount_rcu(struct nameidata *nd)
904
{
905
	while (d_mountpoint(nd->path.dentry)) {
906
		struct mount *mounted;
907
		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
908 909
		if (!mounted)
			break;
910 911
		nd->path.mnt = &mounted->mnt;
		nd->path.dentry = mounted->mnt.mnt_root;
912
		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
913 914 915
	}
}

Nick Piggin's avatar
Nick Piggin committed
916 917 918 919
static int follow_dotdot_rcu(struct nameidata *nd)
{
	set_root_rcu(nd);

920
	while (1) {
Nick Piggin's avatar
Nick Piggin committed
921 922 923 924 925 926 927 928 929 930 931
		if (nd->path.dentry == nd->root.dentry &&
		    nd->path.mnt == nd->root.mnt) {
			break;
		}
		if (nd->path.dentry != nd->path.mnt->mnt_root) {
			struct dentry *old = nd->path.dentry;
			struct dentry *parent = old->d_parent;
			unsigned seq;

			seq = read_seqcount_begin(&parent->d_seq);
			if (read_seqcount_retry(&old->d_seq, nd->seq))
932
				goto failed;
Nick Piggin's avatar
Nick Piggin committed
933 934 935 936 937 938 939 940
			nd->path.dentry = parent;
			nd->seq = seq;
			break;
		}
		if (!follow_up_rcu(&nd->path))
			break;
		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
	}
941 942
	follow_mount_rcu(nd);
	nd->inode = nd->path.dentry->d_inode;
Nick Piggin's avatar
Nick Piggin committed
943
	return 0;
944 945 946

failed:
	nd->flags &= ~LOOKUP_RCU;
947 948
	if (!(nd->flags & LOOKUP_ROOT))
		nd->root.mnt = NULL;
949 950 951
	rcu_read_unlock();
	br_read_unlock(vfsmount_lock);
	return -ECHILD;
Nick Piggin's avatar
Nick Piggin committed
952 953
}

954 955 956 957 958
/*
 * Follow down to the covering mount currently visible to userspace.  At each
 * point, the filesystem owning that dentry may be queried as to whether the
 * caller is permitted to proceed or not.
 */
959
int follow_down(struct path *path)
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
{
	unsigned managed;
	int ret;

	while (managed = ACCESS_ONCE(path->dentry->d_flags),
	       unlikely(managed & DCACHE_MANAGED_DENTRY)) {
		/* Allow the filesystem to manage the transit without i_mutex
		 * being held.
		 *
		 * We indicate to the filesystem if someone is trying to mount
		 * something here.  This gives autofs the chance to deny anyone
		 * other than its daemon the right to mount on its
		 * superstructure.
		 *
		 * The filesystem may sleep at this point.
		 */
		if (managed & DCACHE_MANAGE_TRANSIT) {
			BUG_ON(!path->dentry->d_op);
			BUG_ON(!path->dentry->d_op->d_manage);
979
			ret = path->dentry->d_op->d_manage(
980
				path->dentry, false);
981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
			if (ret < 0)
				return ret == -EISDIR ? 0 : ret;
		}

		/* Transit to a mounted filesystem. */
		if (managed & DCACHE_MOUNTED) {
			struct vfsmount *mounted = lookup_mnt(path);
			if (!mounted)
				break;
			dput(path->dentry);
			mntput(path->mnt);
			path->mnt = mounted;
			path->dentry = dget(mounted->mnt_root);
			continue;
		}

		/* Don't handle automount points here */
		break;
	}
	return 0;
}

1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
/*
 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
 */
static void follow_mount(struct path *path)
{
	while (d_mountpoint(path->dentry)) {
		struct vfsmount *mounted = lookup_mnt(path);
		if (!mounted)
			break;
		dput(path->dentry);
		mntput(path->mnt);
		path->mnt = mounted;
		path->dentry = dget(mounted->mnt_root);
	}
}

Nick Piggin's avatar
Nick Piggin committed
1019
static void follow_dotdot(struct nameidata *nd)
Linus Torvalds's avatar
Linus Torvalds committed
1020
{
Al Viro's avatar
Al Viro committed
1021
	set_root(nd);
1022

Linus Torvalds's avatar
Linus Torvalds committed
1023
	while(1) {
1024
		struct dentry *old = nd->path.dentry;
Linus Torvalds's avatar
Linus Torvalds committed
1025

Al Viro's avatar
Al Viro committed
1026 1027
		if (nd->path.dentry == nd->root.dentry &&
		    nd->path.mnt == nd->root.mnt) {
Linus Torvalds's avatar
Linus Torvalds committed
1028 1029
			break;
		}
1030
		if (nd->path.dentry != nd->path.mnt->mnt_root) {
Al Viro's avatar
Al Viro committed
1031 1032
			/* rare case of legitimate dget_parent()... */
			nd->path.dentry = dget_parent(nd->path.dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1033 1034 1035
			dput(old);
			break;
		}
Al Viro's avatar
Al Viro committed
1036
		if (!follow_up(&nd->path))
Linus Torvalds's avatar
Linus Torvalds committed
1037 1038
			break;
	}
Al Viro's avatar
Al Viro committed
1039
	follow_mount(&nd->path);
Nick Piggin's avatar
Nick Piggin committed
1040
	nd->inode = nd->path.dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
1041 1042
}

1043
/*
Miklos Szeredi's avatar
Miklos Szeredi committed
1044 1045 1046 1047 1048
 * This looks up the name in dcache, possibly revalidates the old dentry and
 * allocates a new one if not found or not valid.  In the need_lookup argument
 * returns whether i_op->lookup is necessary.
 *
 * dir->d_inode->i_mutex must be held
1049
 */
Miklos Szeredi's avatar
Miklos Szeredi committed
1050 1051
static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
				    struct nameidata *nd, bool *need_lookup)
1052 1053
{
	struct dentry *dentry;
Miklos Szeredi's avatar
Miklos Szeredi committed
1054
	int error;
1055

Miklos Szeredi's avatar
Miklos Szeredi committed
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
	*need_lookup = false;
	dentry = d_lookup(dir, name);
	if (dentry) {
		if (d_need_lookup(dentry)) {
			*need_lookup = true;
		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
			error = d_revalidate(dentry, nd);
			if (unlikely(error <= 0)) {
				if (error < 0) {
					dput(dentry);
					return ERR_PTR(error);
				} else if (!d_invalidate(dentry)) {
					dput(dentry);
					dentry = NULL;
				}
			}
		}
	}
1074

Miklos Szeredi's avatar
Miklos Szeredi committed
1075 1076 1077 1078
	if (!dentry) {
		dentry = d_alloc(dir, name);
		if (unlikely(!dentry))
			return ERR_PTR(-ENOMEM);
1079

Miklos Szeredi's avatar
Miklos Szeredi committed
1080
		*need_lookup = true;
1081 1082 1083 1084
	}
	return dentry;
}

1085
/*
Miklos Szeredi's avatar
Miklos Szeredi committed
1086 1087 1088 1089
 * Call i_op->lookup on the dentry.  The dentry must be negative but may be
 * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
 *
 * dir->d_inode->i_mutex must be held
1090
 */
Miklos Szeredi's avatar
Miklos Szeredi committed
1091 1092
static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
				  struct nameidata *nd)
1093 1094 1095 1096
{
	struct dentry *old;

	/* Don't create child dentry for a dead directory. */
Miklos Szeredi's avatar
Miklos Szeredi committed
1097
	if (unlikely(IS_DEADDIR(dir))) {
1098
		dput(dentry);
1099
		return ERR_PTR(-ENOENT);
1100
	}
1101

Miklos Szeredi's avatar
Miklos Szeredi committed
1102
	old = dir->i_op->lookup(dir, dentry, nd);
1103 1104 1105 1106 1107 1108 1109
	if (unlikely(old)) {
		dput(dentry);
		dentry = old;
	}
	return dentry;
}

1110 1111 1112
static struct dentry *__lookup_hash(struct qstr *name,
		struct dentry *base, struct nameidata *nd)
{
Miklos Szeredi's avatar
Miklos Szeredi committed
1113
	bool need_lookup;
1114 1115
	struct dentry *dentry;

Miklos Szeredi's avatar
Miklos Szeredi committed
1116 1117 1118
	dentry = lookup_dcache(name, base, nd, &need_lookup);
	if (!need_lookup)
		return dentry;
1119

Miklos Szeredi's avatar
Miklos Szeredi committed
1120
	return lookup_real(base->d_inode, dentry, nd);
1121 1122
}

Linus Torvalds's avatar
Linus Torvalds committed
1123 1124 1125 1126 1127 1128
/*
 *  It's more convoluted than I'd like it to be, but... it's still fairly
 *  small and for now I'd prefer to have fast path as straight as possible.
 *  It _is_ time-critical.
 */
static int do_lookup(struct nameidata *nd, struct qstr *name,
Nick Piggin's avatar
Nick Piggin committed
1129
			struct path *path, struct inode **inode)
Linus Torvalds's avatar
Linus Torvalds committed
1130
{
1131
	struct vfsmount *mnt = nd->path.mnt;
Nick Piggin's avatar
Nick Piggin committed
1132
	struct dentry *dentry, *parent = nd->path.dentry;
Al Viro's avatar
Al Viro committed
1133 1134
	int need_reval = 1;
	int status = 1;
1135 1136
	int err;

1137 1138 1139 1140 1141
	/*
	 * Rename seqlock is not required here because in the off chance
	 * of a false negative due to a concurrent rename, we're going to
	 * do the non-racy lookup, below.
	 */
Nick Piggin's avatar
Nick Piggin committed
1142 1143
	if (nd->flags & LOOKUP_RCU) {
		unsigned seq;
1144
		dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
Al Viro's avatar
Al Viro committed
1145 1146 1147
		if (!dentry)
			goto unlazy;

1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
		/*
		 * This sequence count validates that the inode matches
		 * the dentry name information from lookup.
		 */
		*inode = dentry->d_inode;
		if (read_seqcount_retry(&dentry->d_seq, seq))
			return -ECHILD;

		/*
		 * This sequence count validates that the parent had no
		 * changes while we did the lookup of the dentry above.
		 *
		 * The memory barrier in read_seqcount_begin of child is
		 *  enough, we can use __read_seqcount_retry here.
		 */
Nick Piggin's avatar
Nick Piggin committed
1163 1164 1165
		if (__read_seqcount_retry(&parent->d_seq, nd->seq))
			return -ECHILD;
		nd->seq = seq;
Al Viro's avatar
Al Viro committed
1166

1167 1168
		if (unlikely(d_need_lookup(dentry)))
			goto unlazy;
1169
		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
Al Viro's avatar
Al Viro committed
1170 1171 1172 1173 1174 1175
			status = d_revalidate(dentry, nd);
			if (unlikely(status <= 0)) {
				if (status != -ECHILD)
					need_reval = 0;
				goto unlazy;
			}
1176
		}
Nick Piggin's avatar
Nick Piggin committed
1177 1178
		path->mnt = mnt;
		path->dentry = dentry;
1179 1180 1181 1182 1183
		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
			goto unlazy;
		if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
			goto unlazy;
		return 0;
Al Viro's avatar
Al Viro committed
1184
unlazy:
1185 1186
		if (unlazy_walk(nd, dentry))
			return -ECHILD;
Al Viro's avatar
Al Viro committed
1187 1188
	} else {
		dentry = __d_lookup(parent, name);
1189
	}
Al Viro's avatar
Al Viro committed
1190

1191 1192 1193 1194
	if (unlikely(!dentry))
		goto need_lookup;

	if (unlikely(d_need_lookup(dentry))) {
1195
		dput(dentry);
1196
		goto need_lookup;
Al Viro's avatar
Al Viro committed
1197
	}
1198

Al Viro's avatar
Al Viro committed
1199 1200 1201 1202 1203 1204 1205 1206 1207
	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
		status = d_revalidate(dentry, nd);
	if (unlikely(status <= 0)) {
		if (status < 0) {
			dput(dentry);
			return status;
		}
		if (!d_invalidate(dentry)) {
			dput(dentry);
1208
			goto need_lookup;
Al Viro's avatar
Al Viro committed
1209
		}
1210
	}
1211
done:
1212 1213 1214
	path->mnt = mnt;
	path->dentry = dentry;
	err = follow_managed(path, nd->flags);
1215 1216
	if (unlikely(err < 0)) {
		path_put_conditional(path, nd);
1217
		return err;
1218
	}
1219 1220
	if (err)
		nd->flags |= LOOKUP_JUMPED;
1221
	*inode = path->dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
1222
	return 0;
1223 1224 1225 1226 1227 1228 1229 1230 1231 1232