ioctl.c 145 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Christoph Hellwig's avatar
Christoph Hellwig committed
2 3 4 5 6 7 8 9 10
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
11
#include <linux/fsnotify.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
12 13 14 15 16 17
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
18
#include <linux/mount.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
19
#include <linux/mpage.h>
20
#include <linux/namei.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
21 22 23 24
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/bit_spinlock.h>
25
#include <linux/security.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
26
#include <linux/xattr.h>
27
#include <linux/mm.h>
28
#include <linux/slab.h>
29
#include <linux/blkdev.h>
30
#include <linux/uuid.h>
31
#include <linux/btrfs.h>
Mark Fasheh's avatar
Mark Fasheh committed
32
#include <linux/uaccess.h>
33
#include <linux/iversion.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
34 35 36 37 38 39
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "volumes.h"
40
#include "locking.h"
41
#include "inode-map.h"
42
#include "backref.h"
43
#include "rcu-string.h"
44
#include "send.h"
45
#include "dev-replace.h"
46
#include "props.h"
47
#include "sysfs.h"
Josef Bacik's avatar
Josef Bacik committed
48
#include "qgroup.h"
49
#include "tree-log.h"
50
#include "compression.h"
Christoph Hellwig's avatar
Christoph Hellwig committed
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
 * structures are incorrect, as the timespec structure from userspace
 * is 4 bytes too small. We define these alternatives here to teach
 * the kernel about the 32-bit struct packing.
 */
struct btrfs_ioctl_timespec_32 {
	__u64 sec;
	__u32 nsec;
} __attribute__ ((__packed__));

struct btrfs_ioctl_received_subvol_args_32 {
	char	uuid[BTRFS_UUID_SIZE];	/* in */
	__u64	stransid;		/* in */
	__u64	rtransid;		/* out */
	struct btrfs_ioctl_timespec_32 stime; /* in */
	struct btrfs_ioctl_timespec_32 rtime; /* out */
	__u64	flags;			/* in */
	__u64	reserved[16];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
				struct btrfs_ioctl_received_subvol_args_32)
#endif

77 78 79 80 81 82 83 84 85 86 87 88 89
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
	__s64 send_fd;			/* in */
	__u64 clone_sources_count;	/* in */
	compat_uptr_t clone_sources;	/* in */
	__u64 parent_root;		/* in */
	__u64 flags;			/* in */
	__u64 reserved[4];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
			       struct btrfs_ioctl_send_args_32)
#endif
90

Mark Fasheh's avatar
Mark Fasheh committed
91
static int btrfs_clone(struct inode *src, struct inode *inode,
92 93
		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
		       int no_time_update);
Mark Fasheh's avatar
Mark Fasheh committed
94

95
/* Mask out flags that are inappropriate for the given type of inode. */
96 97
static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
		unsigned int flags)
98
{
99
	if (S_ISDIR(inode->i_mode))
100
		return flags;
101
	else if (S_ISREG(inode->i_mode))
102 103 104 105 106 107
		return flags & ~FS_DIRSYNC_FL;
	else
		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}

/*
108 109
 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
 * ioctl.
110
 */
111
static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
{
	unsigned int iflags = 0;

	if (flags & BTRFS_INODE_SYNC)
		iflags |= FS_SYNC_FL;
	if (flags & BTRFS_INODE_IMMUTABLE)
		iflags |= FS_IMMUTABLE_FL;
	if (flags & BTRFS_INODE_APPEND)
		iflags |= FS_APPEND_FL;
	if (flags & BTRFS_INODE_NODUMP)
		iflags |= FS_NODUMP_FL;
	if (flags & BTRFS_INODE_NOATIME)
		iflags |= FS_NOATIME_FL;
	if (flags & BTRFS_INODE_DIRSYNC)
		iflags |= FS_DIRSYNC_FL;
Li Zefan's avatar
Li Zefan committed
127 128 129
	if (flags & BTRFS_INODE_NODATACOW)
		iflags |= FS_NOCOW_FL;

130
	if (flags & BTRFS_INODE_NOCOMPRESS)
Li Zefan's avatar
Li Zefan committed
131
		iflags |= FS_NOCOMP_FL;
132 133
	else if (flags & BTRFS_INODE_COMPRESS)
		iflags |= FS_COMPR_FL;
134 135 136 137 138 139 140

	return iflags;
}

/*
 * Update inode->i_flags based on the btrfs internal flags.
 */
141
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
142
{
143
	struct btrfs_inode *binode = BTRFS_I(inode);
144
	unsigned int new_fl = 0;
145

146
	if (binode->flags & BTRFS_INODE_SYNC)
147
		new_fl |= S_SYNC;
148
	if (binode->flags & BTRFS_INODE_IMMUTABLE)
149
		new_fl |= S_IMMUTABLE;
150
	if (binode->flags & BTRFS_INODE_APPEND)
151
		new_fl |= S_APPEND;
152
	if (binode->flags & BTRFS_INODE_NOATIME)
153
		new_fl |= S_NOATIME;
154
	if (binode->flags & BTRFS_INODE_DIRSYNC)
155 156 157 158 159
		new_fl |= S_DIRSYNC;

	set_mask_bits(&inode->i_flags,
		      S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
		      new_fl);
160 161 162 163
}

static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
{
164 165
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
166 167 168 169 170 171

	if (copy_to_user(arg, &flags, sizeof(flags)))
		return -EFAULT;
	return 0;
}

172 173
/* Check if @flags are a supported and valid set of FS_*_FL flags */
static int check_fsflags(unsigned int flags)
174 175 176 177
{
	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
		      FS_NOATIME_FL | FS_NODUMP_FL | \
		      FS_SYNC_FL | FS_DIRSYNC_FL | \
Li Zefan's avatar
Li Zefan committed
178 179
		      FS_NOCOMP_FL | FS_COMPR_FL |
		      FS_NOCOW_FL))
180 181 182 183 184 185 186 187
		return -EOPNOTSUPP;

	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
		return -EINVAL;

	return 0;
}

188 189
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
Al Viro's avatar
Al Viro committed
190
	struct inode *inode = file_inode(file);
191
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
192 193
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
194
	struct btrfs_trans_handle *trans;
195
	unsigned int fsflags, old_fsflags;
196
	int ret;
197 198
	u64 old_flags;
	unsigned int old_i_flags;
199
	umode_t mode;
200

201 202 203
	if (!inode_owner_or_capable(inode))
		return -EPERM;

204 205 206
	if (btrfs_root_readonly(root))
		return -EROFS;

207
	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
208 209
		return -EFAULT;

210
	ret = check_fsflags(fsflags);
211 212
	if (ret)
		return ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
213

214 215 216 217
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

Al Viro's avatar
Al Viro committed
218
	inode_lock(inode);
219

220 221
	old_flags = binode->flags;
	old_i_flags = inode->i_flags;
222
	mode = inode->i_mode;
223

224 225 226
	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
	if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
227 228 229 230 231 232
		if (!capable(CAP_LINUX_IMMUTABLE)) {
			ret = -EPERM;
			goto out_unlock;
		}
	}

233 234
	if (fsflags & FS_SYNC_FL)
		binode->flags |= BTRFS_INODE_SYNC;
235
	else
236 237 238
		binode->flags &= ~BTRFS_INODE_SYNC;
	if (fsflags & FS_IMMUTABLE_FL)
		binode->flags |= BTRFS_INODE_IMMUTABLE;
239
	else
240 241 242
		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
	if (fsflags & FS_APPEND_FL)
		binode->flags |= BTRFS_INODE_APPEND;
243
	else
244 245 246
		binode->flags &= ~BTRFS_INODE_APPEND;
	if (fsflags & FS_NODUMP_FL)
		binode->flags |= BTRFS_INODE_NODUMP;
247
	else
248 249 250
		binode->flags &= ~BTRFS_INODE_NODUMP;
	if (fsflags & FS_NOATIME_FL)
		binode->flags |= BTRFS_INODE_NOATIME;
251
	else
252 253 254
		binode->flags &= ~BTRFS_INODE_NOATIME;
	if (fsflags & FS_DIRSYNC_FL)
		binode->flags |= BTRFS_INODE_DIRSYNC;
255
	else
256 257
		binode->flags &= ~BTRFS_INODE_DIRSYNC;
	if (fsflags & FS_NOCOW_FL) {
258 259 260 261 262 263 264
		if (S_ISREG(mode)) {
			/*
			 * It's safe to turn csums off here, no extents exist.
			 * Otherwise we want the flag to reflect the real COW
			 * status of the file and will not set it.
			 */
			if (inode->i_size == 0)
265 266
				binode->flags |= BTRFS_INODE_NODATACOW
					      | BTRFS_INODE_NODATASUM;
267
		} else {
268
			binode->flags |= BTRFS_INODE_NODATACOW;
269 270 271
		}
	} else {
		/*
272
		 * Revert back under same assumptions as above
273 274 275
		 */
		if (S_ISREG(mode)) {
			if (inode->i_size == 0)
276
				binode->flags &= ~(BTRFS_INODE_NODATACOW
277 278
				             | BTRFS_INODE_NODATASUM);
		} else {
279
			binode->flags &= ~BTRFS_INODE_NODATACOW;
280 281
		}
	}
282

283 284 285 286 287
	/*
	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
	 * flag may be changed automatically if compression code won't make
	 * things smaller.
	 */
288 289 290
	if (fsflags & FS_NOCOMP_FL) {
		binode->flags &= ~BTRFS_INODE_COMPRESS;
		binode->flags |= BTRFS_INODE_NOCOMPRESS;
291 292 293 294

		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
		if (ret && ret != -ENODATA)
			goto out_drop;
295
	} else if (fsflags & FS_COMPR_FL) {
296 297
		const char *comp;

298 299
		binode->flags |= BTRFS_INODE_COMPRESS;
		binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
300

301 302 303 304
		comp = btrfs_compress_type2str(fs_info->compress_type);
		if (!comp || comp[0] == 0)
			comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);

305 306 307 308 309
		ret = btrfs_set_prop(inode, "btrfs.compression",
				     comp, strlen(comp), 0);
		if (ret)
			goto out_drop;

Li Zefan's avatar
Li Zefan committed
310
	} else {
311 312 313
		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
		if (ret && ret != -ENODATA)
			goto out_drop;
314
		binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
315
	}
316

317
	trans = btrfs_start_transaction(root, 1);
318 319 320 321
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_drop;
	}
322

323
	btrfs_sync_inode_flags_to_i_flags(inode);
324
	inode_inc_iversion(inode);
325
	inode->i_ctime = current_time(inode);
326 327
	ret = btrfs_update_inode(trans, root, inode);

328
	btrfs_end_transaction(trans);
329 330
 out_drop:
	if (ret) {
331 332
		binode->flags = old_flags;
		inode->i_flags = old_i_flags;
333
	}
334 335

 out_unlock:
Al Viro's avatar
Al Viro committed
336
	inode_unlock(inode);
337
	mnt_drop_write_file(file);
338
	return ret;
339 340
}

341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
/*
 * Translate btrfs internal inode flags to xflags as expected by the
 * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
 * silently dropped.
 */
static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
{
	unsigned int xflags = 0;

	if (flags & BTRFS_INODE_APPEND)
		xflags |= FS_XFLAG_APPEND;
	if (flags & BTRFS_INODE_IMMUTABLE)
		xflags |= FS_XFLAG_IMMUTABLE;
	if (flags & BTRFS_INODE_NOATIME)
		xflags |= FS_XFLAG_NOATIME;
	if (flags & BTRFS_INODE_NODUMP)
		xflags |= FS_XFLAG_NODUMP;
	if (flags & BTRFS_INODE_SYNC)
		xflags |= FS_XFLAG_SYNC;

	return xflags;
}

/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
static int check_xflags(unsigned int flags)
{
	if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
		      FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
		return -EOPNOTSUPP;
	return 0;
}

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
/*
 * Set the xflags from the internal inode flags. The remaining items of fsxattr
 * are zeroed.
 */
static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
{
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	struct fsxattr fa;

	memset(&fa, 0, sizeof(fa));
	fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);

	if (copy_to_user(arg, &fa, sizeof(fa)))
		return -EFAULT;

	return 0;
}

391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
{
	struct inode *inode = file_inode(file);
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
	struct btrfs_trans_handle *trans;
	struct fsxattr fa;
	unsigned old_flags;
	unsigned old_i_flags;
	int ret = 0;

	if (!inode_owner_or_capable(inode))
		return -EPERM;

	if (btrfs_root_readonly(root))
		return -EROFS;

	memset(&fa, 0, sizeof(fa));
	if (copy_from_user(&fa, arg, sizeof(fa)))
		return -EFAULT;

	ret = check_xflags(fa.fsx_xflags);
	if (ret)
		return ret;

	if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
		return -EOPNOTSUPP;

	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	inode_lock(inode);

	old_flags = binode->flags;
	old_i_flags = inode->i_flags;

	/* We need the capabilities to change append-only or immutable inode */
	if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
	     (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
	    !capable(CAP_LINUX_IMMUTABLE)) {
		ret = -EPERM;
		goto out_unlock;
	}

	if (fa.fsx_xflags & FS_XFLAG_SYNC)
		binode->flags |= BTRFS_INODE_SYNC;
	else
		binode->flags &= ~BTRFS_INODE_SYNC;
	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
		binode->flags |= BTRFS_INODE_IMMUTABLE;
	else
		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
	if (fa.fsx_xflags & FS_XFLAG_APPEND)
		binode->flags |= BTRFS_INODE_APPEND;
	else
		binode->flags &= ~BTRFS_INODE_APPEND;
	if (fa.fsx_xflags & FS_XFLAG_NODUMP)
		binode->flags |= BTRFS_INODE_NODUMP;
	else
		binode->flags &= ~BTRFS_INODE_NODUMP;
	if (fa.fsx_xflags & FS_XFLAG_NOATIME)
		binode->flags |= BTRFS_INODE_NOATIME;
	else
		binode->flags &= ~BTRFS_INODE_NOATIME;

	/* 1 item for the inode */
	trans = btrfs_start_transaction(root, 1);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_unlock;
	}

	btrfs_sync_inode_flags_to_i_flags(inode);
	inode_inc_iversion(inode);
	inode->i_ctime = current_time(inode);
	ret = btrfs_update_inode(trans, root, inode);

	btrfs_end_transaction(trans);

out_unlock:
	if (ret) {
		binode->flags = old_flags;
		inode->i_flags = old_i_flags;
	}

	inode_unlock(inode);
	mnt_drop_write_file(file);

	return ret;
}

483 484
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
Al Viro's avatar
Al Viro committed
485
	struct inode *inode = file_inode(file);
486 487 488

	return put_user(inode->i_generation, arg);
}
Christoph Hellwig's avatar
Christoph Hellwig committed
489

490 491
static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
{
492 493
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
494 495 496 497 498
	struct btrfs_device *device;
	struct request_queue *q;
	struct fstrim_range range;
	u64 minlen = ULLONG_MAX;
	u64 num_devices = 0;
499
	u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
500 501 502 503 504
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

505 506 507
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
				dev_list) {
508 509 510 511 512
		if (!device->bdev)
			continue;
		q = bdev_get_queue(device->bdev);
		if (blk_queue_discard(q)) {
			num_devices++;
513
			minlen = min_t(u64, q->limits.discard_granularity,
514 515 516
				     minlen);
		}
	}
517
	rcu_read_unlock();
518

519 520 521 522
	if (!num_devices)
		return -EOPNOTSUPP;
	if (copy_from_user(&range, arg, sizeof(range)))
		return -EFAULT;
523 524
	if (range.start > total_bytes ||
	    range.len < fs_info->sb->s_blocksize)
525
		return -EINVAL;
526

527
	range.len = min(range.len, total_bytes - range.start);
528
	range.minlen = max(range.minlen, minlen);
529
	ret = btrfs_trim_fs(fs_info, &range);
530 531 532 533 534 535 536 537 538
	if (ret < 0)
		return ret;

	if (copy_to_user(arg, &range, sizeof(range)))
		return -EFAULT;

	return 0;
}

539 540
int btrfs_is_empty_uuid(u8 *uuid)
{
541 542 543 544 545 546 547
	int i;

	for (i = 0; i < BTRFS_UUID_SIZE; i++) {
		if (uuid[i])
			return 0;
	}
	return 1;
548 549
}

550
static noinline int create_subvol(struct inode *dir,
551
				  struct dentry *dentry,
552
				  const char *name, int namelen,
Arne Jansen's avatar
Arne Jansen committed
553
				  u64 *async_transid,
554
				  struct btrfs_qgroup_inherit *inherit)
Christoph Hellwig's avatar
Christoph Hellwig committed
555
{
556
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
Christoph Hellwig's avatar
Christoph Hellwig committed
557 558
	struct btrfs_trans_handle *trans;
	struct btrfs_key key;
559
	struct btrfs_root_item *root_item;
Christoph Hellwig's avatar
Christoph Hellwig committed
560 561
	struct btrfs_inode_item *inode_item;
	struct extent_buffer *leaf;
562
	struct btrfs_root *root = BTRFS_I(dir)->root;
563
	struct btrfs_root *new_root;
564
	struct btrfs_block_rsv block_rsv;
565
	struct timespec64 cur_time = current_time(dir);
566
	struct inode *inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
567 568 569 570
	int ret;
	int err;
	u64 objectid;
	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
571
	u64 index = 0;
572
	uuid_le new_uuid;
Christoph Hellwig's avatar
Christoph Hellwig committed
573

574 575 576 577
	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
	if (!root_item)
		return -ENOMEM;

578
	ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
579
	if (ret)
580
		goto fail_free;
581

582 583
	/*
	 * Don't create subvolume whose level is not zero. Or qgroup will be
584
	 * screwed up since it assumes subvolume qgroup's level to be 0.
585
	 */
586 587 588 589
	if (btrfs_qgroup_level(objectid)) {
		ret = -ENOSPC;
		goto fail_free;
	}
590

591
	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
Josef Bacik's avatar
Josef Bacik committed
592
	/*
593 594
	 * The same as the snapshot creation, please see the comment
	 * of create_snapshot().
Josef Bacik's avatar
Josef Bacik committed
595
	 */
596
	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
597
	if (ret)
598
		goto fail_free;
599 600 601 602

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
603
		btrfs_subvolume_release_metadata(fs_info, &block_rsv);
604
		goto fail_free;
605 606 607
	}
	trans->block_rsv = &block_rsv;
	trans->bytes_reserved = block_rsv.size;
Christoph Hellwig's avatar
Christoph Hellwig committed
608

609
	ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit);
Arne Jansen's avatar
Arne Jansen committed
610 611 612
	if (ret)
		goto fail;

613
	leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
614 615 616 617
	if (IS_ERR(leaf)) {
		ret = PTR_ERR(leaf);
		goto fail;
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
618

619
	memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
Christoph Hellwig's avatar
Christoph Hellwig committed
620 621
	btrfs_set_header_bytenr(leaf, leaf->start);
	btrfs_set_header_generation(leaf, trans->transid);
622
	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
Christoph Hellwig's avatar
Christoph Hellwig committed
623 624
	btrfs_set_header_owner(leaf, objectid);

625 626
	write_extent_buffer_fsid(leaf, fs_info->fsid);
	write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
Christoph Hellwig's avatar
Christoph Hellwig committed
627 628
	btrfs_mark_buffer_dirty(leaf);

629
	inode_item = &root_item->inode;
630 631 632
	btrfs_set_stack_inode_generation(inode_item, 1);
	btrfs_set_stack_inode_size(inode_item, 3);
	btrfs_set_stack_inode_nlink(inode_item, 1);
633
	btrfs_set_stack_inode_nbytes(inode_item,
634
				     fs_info->nodesize);
635
	btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
Christoph Hellwig's avatar
Christoph Hellwig committed
636

637 638
	btrfs_set_root_flags(root_item, 0);
	btrfs_set_root_limit(root_item, 0);
639
	btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
640

641 642 643 644 645 646
	btrfs_set_root_bytenr(root_item, leaf->start);
	btrfs_set_root_generation(root_item, trans->transid);
	btrfs_set_root_level(root_item, 0);
	btrfs_set_root_refs(root_item, 1);
	btrfs_set_root_used(root_item, leaf->len);
	btrfs_set_root_last_snapshot(root_item, 0);
Christoph Hellwig's avatar
Christoph Hellwig committed
647

648 649
	btrfs_set_root_generation_v2(root_item,
			btrfs_root_generation(root_item));
650
	uuid_le_gen(&new_uuid);
651 652 653 654 655 656
	memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
	btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
	btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
	root_item->ctime = root_item->otime;
	btrfs_set_root_ctransid(root_item, trans->transid);
	btrfs_set_root_otransid(root_item, trans->transid);
Christoph Hellwig's avatar
Christoph Hellwig committed
657

658
	btrfs_tree_unlock(leaf);
Christoph Hellwig's avatar
Christoph Hellwig committed
659 660 661
	free_extent_buffer(leaf);
	leaf = NULL;

662
	btrfs_set_root_dirid(root_item, new_dirid);
Christoph Hellwig's avatar
Christoph Hellwig committed
663 664

	key.objectid = objectid;
665
	key.offset = 0;
666
	key.type = BTRFS_ROOT_ITEM_KEY;
667
	ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
668
				root_item);
Christoph Hellwig's avatar
Christoph Hellwig committed
669 670 671
	if (ret)
		goto fail;

672
	key.offset = (u64)-1;
673
	new_root = btrfs_read_fs_root_no_name(fs_info, &key);
674 675
	if (IS_ERR(new_root)) {
		ret = PTR_ERR(new_root);
676
		btrfs_abort_transaction(trans, ret);
677 678
		goto fail;
	}
679 680 681

	btrfs_record_root_in_trans(trans, new_root);

682
	ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
683 684
	if (ret) {
		/* We potentially lose an unused inode item here */
685
		btrfs_abort_transaction(trans, ret);
686 687 688
		goto fail;
	}

689 690 691 692
	mutex_lock(&new_root->objectid_mutex);
	new_root->highest_objectid = new_dirid;
	mutex_unlock(&new_root->objectid_mutex);

Christoph Hellwig's avatar
Christoph Hellwig committed
693 694 695
	/*
	 * insert the directory item
	 */
696
	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
697
	if (ret) {
698
		btrfs_abort_transaction(trans, ret);
699 700
		goto fail;
	}
701 702

	ret = btrfs_insert_dir_item(trans, root,
703
				    name, namelen, BTRFS_I(dir), &key,
704
				    BTRFS_FT_DIR, index);
705
	if (ret) {
706
		btrfs_abort_transaction(trans, ret);
Christoph Hellwig's avatar
Christoph Hellwig committed
707
		goto fail;
708
	}
709

710
	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
711 712 713
	ret = btrfs_update_inode(trans, root, dir);
	BUG_ON(ret);

714
	ret = btrfs_add_root_ref(trans, fs_info,
715
				 objectid, root->root_key.objectid,
716
				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
717
	BUG_ON(ret);
Christoph Hellwig's avatar
Christoph Hellwig committed
718

719
	ret = btrfs_uuid_tree_add(trans, root_item->uuid,
720
				  BTRFS_UUID_KEY_SUBVOL, objectid);
721
	if (ret)
722
		btrfs_abort_transaction(trans, ret);
723

Christoph Hellwig's avatar
Christoph Hellwig committed
724
fail:
725
	kfree(root_item);
726 727
	trans->block_rsv = NULL;
	trans->bytes_reserved = 0;
728
	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
729

Sage Weil's avatar
Sage Weil committed
730 731
	if (async_transid) {
		*async_transid = trans->transid;
732
		err = btrfs_commit_transaction_async(trans, 1);
733
		if (err)
734
			err = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
735
	} else {
736
		err = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
737
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
738 739
	if (err && !ret)
		ret = err;
740

741 742
	if (!ret) {
		inode = btrfs_lookup_dentry(dir, dentry);
743 744
		if (IS_ERR(inode))
			return PTR_ERR(inode);
745 746
		d_instantiate(dentry, inode);
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
747
	return ret;
748 749 750 751

fail_free:
	kfree(root_item);
	return ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
752 753
}

754
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
755
			   struct dentry *dentry,
756 757
			   u64 *async_transid, bool readonly,
			   struct btrfs_qgroup_inherit *inherit)
Christoph Hellwig's avatar
Christoph Hellwig committed
758
{
759
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
760
	struct inode *inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
761 762
	struct btrfs_pending_snapshot *pending_snapshot;
	struct btrfs_trans_handle *trans;
763
	int ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
764

765
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Christoph Hellwig's avatar
Christoph Hellwig committed
766 767
		return -EINVAL;

768
	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
769 770 771
	if (!pending_snapshot)
		return -ENOMEM;

772
	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
773
			GFP_KERNEL);
774 775
	pending_snapshot->path = btrfs_alloc_path();
	if (!pending_snapshot->root_item || !pending_snapshot->path) {
776 777 778 779
		ret = -ENOMEM;
		goto free_pending;
	}

780
	atomic_inc(&root->will_be_snapshotted);
781
	smp_mb__after_atomic();
782 783 784
	/* wait for no snapshot writes */
	wait_event(root->subv_writers->wait,
		   percpu_counter_sum(&root->subv_writers->counter) == 0);
785

786
	ret = btrfs_start_delalloc_inodes(root);
787
	if (ret)
788
		goto dec_and_free;
789

790
	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
791

792 793
	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
			     BTRFS_BLOCK_RSV_TEMP);
794 795 796 797 798 799
	/*
	 * 1 - parent dir inode
	 * 2 - dir entries
	 * 1 - root item
	 * 2 - root ref/backref
	 * 1 - root of snapshot
800
	 * 1 - UUID item
801 802
	 */
	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
803
					&pending_snapshot->block_rsv, 8,
804
					false);
805
	if (ret)
806
		goto dec_and_free;
807

808
	pending_snapshot->dentry = dentry;
Christoph Hellwig's avatar
Christoph Hellwig committed
809
	pending_snapshot->root = root;
810
	pending_snapshot->readonly = readonly;
811
	pending_snapshot->dir = dir;
812
	pending_snapshot->inherit = inherit;
813

814
	trans = btrfs_start_transaction(root, 0);
815 816 817 818 819
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto fail;
	}

820
	spin_lock(&fs_info->trans_lock);
Christoph Hellwig's avatar
Christoph Hellwig committed
821 822
	list_add(&pending_snapshot->list,
		 &trans->transaction->pending_snapshots);
823
	spin_unlock(&fs_info->trans_lock);
Sage Weil's avatar
Sage Weil committed
824 825
	if (async_transid) {
		*async_transid = trans->transid;
826
		ret = btrfs_commit_transaction_async(trans, 1);
827
		if (ret)
828
			ret = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
829
	} else {
830
		ret = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
831
	}
832
	if (ret)
833
		goto fail;
834 835 836 837 838

	ret = pending_snapshot->error;
	if (ret)
		goto fail;

839 840 841 842
	ret = btrfs_orphan_cleanup(pending_snapshot->snap);
	if (ret)
		goto fail;

843
	inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
844 845 846 847
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		goto fail;
	}
848

849 850 851
	d_instantiate(dentry, inode);
	ret = 0;
fail:
852
	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
853
dec_and_free:
854
	if (atomic_dec_and_test(&root->will_be_snapshotted))
855
		wake_up_var(&root->will_be_snapshotted);
856 857
free_pending:
	kfree(pending_snapshot->root_item);
858
	btrfs_free_path(pending_snapshot->path);
859 860
	kfree(pending_snapshot);

Christoph Hellwig's avatar
Christoph Hellwig committed
861 862 863
	return ret;
}