Blame - marvell/linux/fs/overlayfs/super.c - T108

blob: 3e18b8a0a6a3ddec4382bb7dc34e71d47abb4ae9 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	*
				4	* Copyright (C) 2011 Novell Inc.
				5	*/
				6
				7	#include <uapi/linux/magic.h>
				8	#include <linux/fs.h>
				9	#include <linux/namei.h>
				10	#include <linux/xattr.h>
				11	#include <linux/mount.h>
				12	#include <linux/parser.h>
				13	#include <linux/module.h>
				14	#include <linux/statfs.h>
				15	#include <linux/seq_file.h>
				16	#include <linux/posix_acl_xattr.h>
				17	#include <linux/exportfs.h>
				18	#include "overlayfs.h"
				19
				20	MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
				21	MODULE_DESCRIPTION("Overlay filesystem");
				22	MODULE_LICENSE("GPL");
				23
				24
				25	struct ovl_dir_cache;
				26
				27	#define OVL_MAX_STACK 500
				28
				29	static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
				30	module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
				31	MODULE_PARM_DESC(redirect_dir,
				32	"Default to on or off for the redirect_dir feature");
				33
				34	static bool ovl_redirect_always_follow =
				35	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
				36	module_param_named(redirect_always_follow, ovl_redirect_always_follow,
				37	bool, 0644);
				38	MODULE_PARM_DESC(redirect_always_follow,
				39	"Follow redirects even if redirect_dir feature is turned off");
				40
				41	static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
				42	module_param_named(index, ovl_index_def, bool, 0644);
				43	MODULE_PARM_DESC(index,
				44	"Default to on or off for the inodes index feature");
				45
				46	static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
				47	module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
				48	MODULE_PARM_DESC(nfs_export,
				49	"Default to on or off for the NFS export feature");
				50
				51	static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
				52	module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
				53	MODULE_PARM_DESC(xino_auto,
				54	"Auto enable xino feature");
				55
				56	static bool __read_mostly ovl_override_creds_def = true;
				57	module_param_named(override_creds, ovl_override_creds_def, bool, 0644);
				58	MODULE_PARM_DESC(ovl_override_creds_def,
				59	"Use mounter's credentials for accesses");
				60
				61	static void ovl_entry_stack_free(struct ovl_entry *oe)
				62	{
				63	unsigned int i;
				64
				65	for (i = 0; i < oe->numlower; i++)
				66	dput(oe->lowerstack[i].dentry);
				67	}
				68
				69	static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
				70	module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
				71	MODULE_PARM_DESC(metacopy,
				72	"Default to on or off for the metadata only copy up feature");
				73
				74	static void ovl_dentry_release(struct dentry *dentry)
				75	{
				76	struct ovl_entry *oe = dentry->d_fsdata;
				77
				78	if (oe) {
				79	ovl_entry_stack_free(oe);
				80	kfree_rcu(oe, rcu);
				81	}
				82	}
				83
				84	static struct dentry ovl_d_real(struct dentry dentry,
				85	const struct inode *inode)
				86	{
				87	struct dentry real = NULL, lower;
				88
				89	/* It's an overlay file */
				90	if (inode && d_inode(dentry) == inode)
				91	return dentry;
				92
				93	if (!d_is_reg(dentry)) {
				94	if (!inode \|\| inode == d_inode(dentry))
				95	return dentry;
				96	goto bug;
				97	}
				98
				99	real = ovl_dentry_upper(dentry);
				100	if (real && (inode == d_inode(real)))
				101	return real;
				102
				103	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
				104	return real;
				105
				106	lower = ovl_dentry_lowerdata(dentry);
				107	if (!lower)
				108	goto bug;
				109	real = lower;
				110
				111	/* Handle recursion */
				112	real = d_real(real, inode);
				113
				114	if (!inode \|\| inode == d_inode(real))
				115	return real;
				116	bug:
				117	WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
				118	__func__, dentry, inode ? inode->i_sb->s_id : "NULL",
				119	inode ? inode->i_ino : 0, real,
				120	real && d_inode(real) ? d_inode(real)->i_ino : 0);
				121	return dentry;
				122	}
				123
				124	static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
				125	{
				126	struct ovl_entry *oe = dentry->d_fsdata;
				127	unsigned int i;
				128	int ret = 1;
				129
				130	for (i = 0; i < oe->numlower; i++) {
				131	struct dentry *d = oe->lowerstack[i].dentry;
				132
				133	if (d->d_flags & DCACHE_OP_REVALIDATE) {
				134	ret = d->d_op->d_revalidate(d, flags);
				135	if (ret < 0)
				136	return ret;
				137	if (!ret) {
				138	if (!(flags & LOOKUP_RCU))
				139	d_invalidate(d);
				140	return -ESTALE;
				141	}
				142	}
				143	}
				144	return 1;
				145	}
				146
				147	static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
				148	{
				149	struct ovl_entry *oe = dentry->d_fsdata;
				150	unsigned int i;
				151	int ret = 1;
				152
				153	for (i = 0; i < oe->numlower; i++) {
				154	struct dentry *d = oe->lowerstack[i].dentry;
				155
				156	if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
				157	ret = d->d_op->d_weak_revalidate(d, flags);
				158	if (ret <= 0)
				159	break;
				160	}
				161	}
				162	return ret;
				163	}
				164
				165	static const struct dentry_operations ovl_dentry_operations = {
				166	.d_release = ovl_dentry_release,
				167	.d_real = ovl_d_real,
				168	};
				169
				170	static const struct dentry_operations ovl_reval_dentry_operations = {
				171	.d_release = ovl_dentry_release,
				172	.d_real = ovl_d_real,
				173	.d_revalidate = ovl_dentry_revalidate,
				174	.d_weak_revalidate = ovl_dentry_weak_revalidate,
				175	};
				176
				177	static struct kmem_cache *ovl_inode_cachep;
				178
				179	static struct inode ovl_alloc_inode(struct super_block sb)
				180	{
				181	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
				182
				183	if (!oi)
				184	return NULL;
				185
				186	oi->cache = NULL;
				187	oi->redirect = NULL;
				188	oi->version = 0;
				189	oi->flags = 0;
				190	oi->__upperdentry = NULL;
				191	oi->lower = NULL;
				192	oi->lowerdata = NULL;
				193	mutex_init(&oi->lock);
				194
				195	return &oi->vfs_inode;
				196	}
				197
				198	static void ovl_free_inode(struct inode *inode)
				199	{
				200	struct ovl_inode *oi = OVL_I(inode);
				201
				202	kfree(oi->redirect);
				203	mutex_destroy(&oi->lock);
				204	kmem_cache_free(ovl_inode_cachep, oi);
				205	}
				206
				207	static void ovl_destroy_inode(struct inode *inode)
				208	{
				209	struct ovl_inode *oi = OVL_I(inode);
				210
				211	dput(oi->__upperdentry);
				212	iput(oi->lower);
				213	if (S_ISDIR(inode->i_mode))
				214	ovl_dir_cache_free(inode);
				215	else
				216	iput(oi->lowerdata);
				217	}
				218
				219	static void ovl_free_fs(struct ovl_fs *ofs)
				220	{
				221	unsigned i;
				222
				223	iput(ofs->workbasedir_trap);
				224	iput(ofs->indexdir_trap);
				225	iput(ofs->workdir_trap);
				226	iput(ofs->upperdir_trap);
				227	dput(ofs->indexdir);
				228	dput(ofs->workdir);
				229	if (ofs->workdir_locked)
				230	ovl_inuse_unlock(ofs->workbasedir);
				231	dput(ofs->workbasedir);
				232	if (ofs->upperdir_locked)
				233	ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
				234	mntput(ofs->upper_mnt);
				235	for (i = 0; i < ofs->numlower; i++) {
				236	iput(ofs->lower_layers[i].trap);
				237	mntput(ofs->lower_layers[i].mnt);
				238	}
				239	for (i = 0; i < ofs->numlowerfs; i++)
				240	free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
				241	kfree(ofs->lower_layers);
				242	kfree(ofs->lower_fs);
				243
				244	kfree(ofs->config.lowerdir);
				245	kfree(ofs->config.upperdir);
				246	kfree(ofs->config.workdir);
				247	kfree(ofs->config.redirect_mode);
				248	if (ofs->creator_cred)
				249	put_cred(ofs->creator_cred);
				250	kfree(ofs);
				251	}
				252
				253	static void ovl_put_super(struct super_block *sb)
				254	{
				255	struct ovl_fs *ofs = sb->s_fs_info;
				256
				257	ovl_free_fs(ofs);
				258	}
				259
				260	/* Sync real dirty inodes in upper filesystem (if it exists) */
				261	static int ovl_sync_fs(struct super_block *sb, int wait)
				262	{
				263	struct ovl_fs *ofs = sb->s_fs_info;
				264	struct super_block *upper_sb;
				265	int ret;
				266
				267	if (!ofs->upper_mnt)
				268	return 0;
				269
				270	/*
				271	* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
				272	* All the super blocks will be iterated, including upper_sb.
				273	*
				274	* If this is a syncfs(2) call, then we do need to call
				275	* sync_filesystem() on upper_sb, but enough if we do it when being
				276	* called with wait == 1.
				277	*/
				278	if (!wait)
				279	return 0;
				280
				281	upper_sb = ofs->upper_mnt->mnt_sb;
				282
				283	down_read(&upper_sb->s_umount);
				284	ret = sync_filesystem(upper_sb);
				285	up_read(&upper_sb->s_umount);
				286
				287	return ret;
				288	}
				289
				290	/**
				291	* ovl_statfs
				292	* @sb: The overlayfs super block
				293	* @buf: The struct kstatfs to fill in with stats
				294	*
				295	* Get the filesystem statistics. As writes always target the upper layer
				296	* filesystem pass the statfs to the upper filesystem (if it exists)
				297	*/
				298	static int ovl_statfs(struct dentry dentry, struct kstatfs buf)
				299	{
				300	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
				301	struct dentry *root_dentry = dentry->d_sb->s_root;
				302	struct path path;
				303	int err;
				304
				305	ovl_path_real(root_dentry, &path);
				306
				307	err = vfs_statfs(&path, buf);
				308	if (!err) {
				309	buf->f_namelen = ofs->namelen;
				310	buf->f_type = OVERLAYFS_SUPER_MAGIC;
				311	}
				312
				313	return err;
				314	}
				315
				316	/* Will this overlay be forced to mount/remount ro? */
				317	static bool ovl_force_readonly(struct ovl_fs *ofs)
				318	{
				319	return (!ofs->upper_mnt \|\| !ofs->workdir);
				320	}
				321
				322	static const char *ovl_redirect_mode_def(void)
				323	{
				324	return ovl_redirect_dir_def ? "on" : "off";
				325	}
				326
				327	enum {
				328	OVL_XINO_OFF,
				329	OVL_XINO_AUTO,
				330	OVL_XINO_ON,
				331	};
				332
				333	static const char * const ovl_xino_str[] = {
				334	"off",
				335	"auto",
				336	"on",
				337	};
				338
				339	static inline int ovl_xino_def(void)
				340	{
				341	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
				342	}
				343
				344	static const char * const ovl_fsync_mode_str[] = {
				345	"strict",
				346	"ordered",
				347	"volatile",
				348	};
				349
				350	/**
				351	* ovl_show_options
				352	*
				353	* Prints the mount options for a given superblock.
				354	* Returns zero; does not fail.
				355	*/
				356	static int ovl_show_options(struct seq_file m, struct dentry dentry)
				357	{
				358	struct super_block *sb = dentry->d_sb;
				359	struct ovl_fs *ofs = sb->s_fs_info;
				360
				361	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
				362	if (ofs->config.upperdir) {
				363	seq_show_option(m, "upperdir", ofs->config.upperdir);
				364	seq_show_option(m, "workdir", ofs->config.workdir);
				365	}
				366	if (ofs->config.default_permissions)
				367	seq_puts(m, ",default_permissions");
				368	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
				369	seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
				370	if (ofs->config.index != ovl_index_def)
				371	seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
				372	if (ofs->config.nfs_export != ovl_nfs_export_def)
				373	seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
				374	"on" : "off");
				375	if (ofs->config.xino != ovl_xino_def())
				376	seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
				377	if (ofs->config.metacopy != ovl_metacopy_def)
				378	seq_printf(m, ",metacopy=%s",
				379	ofs->config.metacopy ? "on" : "off");
				380	if (ofs->config.override_creds != ovl_override_creds_def)
				381	seq_show_option(m, "override_creds",
				382	ofs->config.override_creds ? "on" : "off");
				383	if (ofs->config.fsync_mode)
				384	seq_printf(m, ",fsync=%s",
				385	ovl_fsync_mode_str[ofs->config.fsync_mode]);
				386	return 0;
				387	}
				388
				389	static int ovl_remount(struct super_block sb, int flags, char *data)
				390	{
				391	struct ovl_fs *ofs = sb->s_fs_info;
				392
				393	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
				394	return -EROFS;
				395
				396	return 0;
				397	}
				398
				399	static const struct super_operations ovl_super_operations = {
				400	.alloc_inode = ovl_alloc_inode,
				401	.free_inode = ovl_free_inode,
				402	.destroy_inode = ovl_destroy_inode,
				403	.drop_inode = generic_delete_inode,
				404	.put_super = ovl_put_super,
				405	.sync_fs = ovl_sync_fs,
				406	.statfs = ovl_statfs,
				407	.show_options = ovl_show_options,
				408	.remount_fs = ovl_remount,
				409	};
				410
				411	enum {
				412	OPT_LOWERDIR,
				413	OPT_UPPERDIR,
				414	OPT_WORKDIR,
				415	OPT_DEFAULT_PERMISSIONS,
				416	OPT_REDIRECT_DIR,
				417	OPT_INDEX_ON,
				418	OPT_INDEX_OFF,
				419	OPT_NFS_EXPORT_ON,
				420	OPT_NFS_EXPORT_OFF,
				421	OPT_XINO_ON,
				422	OPT_XINO_OFF,
				423	OPT_XINO_AUTO,
				424	OPT_METACOPY_ON,
				425	OPT_METACOPY_OFF,
				426	OPT_OVERRIDE_CREDS_ON,
				427	OPT_OVERRIDE_CREDS_OFF,
				428	OPT_FSYNC_STRICT,
				429	OPT_FSYNC_ORDERED,
				430	OPT_FSYNC_VOLATILE,
				431	OPT_ERR,
				432	};
				433
				434	static const match_table_t ovl_tokens = {
				435	{OPT_LOWERDIR, "lowerdir=%s"},
				436	{OPT_UPPERDIR, "upperdir=%s"},
				437	{OPT_WORKDIR, "workdir=%s"},
				438	{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
				439	{OPT_REDIRECT_DIR, "redirect_dir=%s"},
				440	{OPT_INDEX_ON, "index=on"},
				441	{OPT_INDEX_OFF, "index=off"},
				442	{OPT_NFS_EXPORT_ON, "nfs_export=on"},
				443	{OPT_NFS_EXPORT_OFF, "nfs_export=off"},
				444	{OPT_XINO_ON, "xino=on"},
				445	{OPT_XINO_OFF, "xino=off"},
				446	{OPT_XINO_AUTO, "xino=auto"},
				447	{OPT_METACOPY_ON, "metacopy=on"},
				448	{OPT_METACOPY_OFF, "metacopy=off"},
				449	{OPT_OVERRIDE_CREDS_ON, "override_creds=on"},
				450	{OPT_OVERRIDE_CREDS_OFF, "override_creds=off"},
				451	{OPT_FSYNC_STRICT, "fsync=strict"},
				452	{OPT_FSYNC_ORDERED, "fsync=ordered"},
				453	{OPT_FSYNC_VOLATILE, "fsync=volatile"},
				454	{OPT_ERR, NULL}
				455	};
				456
				457	static char ovl_next_opt(char *s)
				458	{
				459	char sbegin = s;
				460	char *p;
				461
				462	if (sbegin == NULL)
				463	return NULL;
				464
				465	for (p = sbegin; *p; p++) {
				466	if (*p == '\\') {
				467	p++;
				468	if (!*p)
				469	break;
				470	} else if (*p == ',') {
				471	*p = '\0';
				472	*s = p + 1;
				473	return sbegin;
				474	}
				475	}
				476	*s = NULL;
				477	return sbegin;
				478	}
				479
				480	static int ovl_parse_redirect_mode(struct ovl_config config, const char mode)
				481	{
				482	if (strcmp(mode, "on") == 0) {
				483	config->redirect_dir = true;
				484	/*
				485	* Does not make sense to have redirect creation without
				486	* redirect following.
				487	*/
				488	config->redirect_follow = true;
				489	} else if (strcmp(mode, "follow") == 0) {
				490	config->redirect_follow = true;
				491	} else if (strcmp(mode, "off") == 0) {
				492	if (ovl_redirect_always_follow)
				493	config->redirect_follow = true;
				494	} else if (strcmp(mode, "nofollow") != 0) {
				495	pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
				496	mode);
				497	return -EINVAL;
				498	}
				499
				500	return 0;
				501	}
				502
				503	static int ovl_parse_opt(char opt, struct ovl_config config)
				504	{
				505	char *p;
				506	int err;
				507	bool metacopy_opt = false, redirect_opt = false;
				508
				509	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
				510	if (!config->redirect_mode)
				511	return -ENOMEM;
				512	config->override_creds = ovl_override_creds_def;
				513
				514	while ((p = ovl_next_opt(&opt)) != NULL) {
				515	int token;
				516	substring_t args[MAX_OPT_ARGS];
				517
				518	if (!*p)
				519	continue;
				520
				521	token = match_token(p, ovl_tokens, args);
				522	switch (token) {
				523	case OPT_UPPERDIR:
				524	kfree(config->upperdir);
				525	config->upperdir = match_strdup(&args[0]);
				526	if (!config->upperdir)
				527	return -ENOMEM;
				528	break;
				529
				530	case OPT_LOWERDIR:
				531	kfree(config->lowerdir);
				532	config->lowerdir = match_strdup(&args[0]);
				533	if (!config->lowerdir)
				534	return -ENOMEM;
				535	break;
				536
				537	case OPT_WORKDIR:
				538	kfree(config->workdir);
				539	config->workdir = match_strdup(&args[0]);
				540	if (!config->workdir)
				541	return -ENOMEM;
				542	break;
				543
				544	case OPT_DEFAULT_PERMISSIONS:
				545	config->default_permissions = true;
				546	break;
				547
				548	case OPT_REDIRECT_DIR:
				549	kfree(config->redirect_mode);
				550	config->redirect_mode = match_strdup(&args[0]);
				551	if (!config->redirect_mode)
				552	return -ENOMEM;
				553	redirect_opt = true;
				554	break;
				555
				556	case OPT_INDEX_ON:
				557	config->index = true;
				558	break;
				559
				560	case OPT_INDEX_OFF:
				561	config->index = false;
				562	break;
				563
				564	case OPT_NFS_EXPORT_ON:
				565	config->nfs_export = true;
				566	break;
				567
				568	case OPT_NFS_EXPORT_OFF:
				569	config->nfs_export = false;
				570	break;
				571
				572	case OPT_XINO_ON:
				573	config->xino = OVL_XINO_ON;
				574	break;
				575
				576	case OPT_XINO_OFF:
				577	config->xino = OVL_XINO_OFF;
				578	break;
				579
				580	case OPT_XINO_AUTO:
				581	config->xino = OVL_XINO_AUTO;
				582	break;
				583
				584	case OPT_METACOPY_ON:
				585	config->metacopy = true;
				586	metacopy_opt = true;
				587	break;
				588
				589	case OPT_METACOPY_OFF:
				590	config->metacopy = false;
				591	break;
				592
				593	case OPT_OVERRIDE_CREDS_ON:
				594	config->override_creds = true;
				595	break;
				596
				597	case OPT_OVERRIDE_CREDS_OFF:
				598	config->override_creds = false;
				599	break;
				600
				601	case OPT_FSYNC_STRICT:
				602	config->fsync_mode = OVL_FSYNC_STRICT;
				603	break;
				604
				605	case OPT_FSYNC_ORDERED:
				606	config->fsync_mode = OVL_FSYNC_ORDERED;
				607	break;
				608
				609	case OPT_FSYNC_VOLATILE:
				610	config->fsync_mode = OVL_FSYNC_VOLATILE;
				611	break;
				612
				613	default:
				614	pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
				615	return -EINVAL;
				616	}
				617	}
				618
				619	/* Workdir is useless in non-upper mount */
				620	if (!config->upperdir && config->workdir) {
				621	pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
				622	config->workdir);
				623	kfree(config->workdir);
				624	config->workdir = NULL;
				625	}
				626
				627	err = ovl_parse_redirect_mode(config, config->redirect_mode);
				628	if (err)
				629	return err;
				630
				631	/*
				632	* This is to make the logic below simpler. It doesn't make any other
				633	* difference, since config->redirect_dir is only used for upper.
				634	*/
				635	if (!config->upperdir && config->redirect_follow)
				636	config->redirect_dir = true;
				637
				638	/* Resolve metacopy -> redirect_dir dependency */
				639	if (config->metacopy && !config->redirect_dir) {
				640	if (metacopy_opt && redirect_opt) {
				641	pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
				642	config->redirect_mode);
				643	return -EINVAL;
				644	}
				645	if (redirect_opt) {
				646	/*
				647	* There was an explicit redirect_dir=... that resulted
				648	* in this conflict.
				649	*/
				650	pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
				651	config->redirect_mode);
				652	config->metacopy = false;
				653	} else {
				654	/* Automatically enable redirect otherwise. */
				655	config->redirect_follow = config->redirect_dir = true;
				656	}
				657	}
				658
				659	return 0;
				660	}
				661
				662	#define OVL_WORKDIR_NAME "work"
				663	#define OVL_INDEXDIR_NAME "index"
				664
				665	static struct dentry ovl_workdir_create(struct ovl_fs ofs,
				666	const char *name, bool persist)
				667	{
				668	struct inode *dir = ofs->workbasedir->d_inode;
				669	struct vfsmount *mnt = ofs->upper_mnt;
				670	struct dentry *work;
				671	int err;
				672	bool retried = false;
				673	bool locked = false;
				674
				675	inode_lock_nested(dir, I_MUTEX_PARENT);
				676	locked = true;
				677
				678	retry:
				679	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
				680
				681	if (!IS_ERR(work)) {
				682	struct iattr attr = {
				683	.ia_valid = ATTR_MODE,
				684	.ia_mode = S_IFDIR \| 0,
				685	};
				686
				687	if (work->d_inode) {
				688	err = -EEXIST;
				689	if (retried)
				690	goto out_dput;
				691
				692	if (persist)
				693	goto out_unlock;
				694
				695	retried = true;
				696	ovl_workdir_cleanup(dir, mnt, work, 0);
				697	dput(work);
				698	goto retry;
				699	}
				700
				701	err = ovl_mkdir_real(dir, &work, attr.ia_mode);
				702	if (err)
				703	goto out_dput;
				704
				705	/* Weird filesystem returning with hashed negative (kernfs)? */
				706	err = -EINVAL;
				707	if (d_really_is_negative(work))
				708	goto out_dput;
				709
				710	/*
				711	* Try to remove POSIX ACL xattrs from workdir. We are good if:
				712	*
				713	* a) success (there was a POSIX ACL xattr and was removed)
				714	* b) -ENODATA (there was no POSIX ACL xattr)
				715	* c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
				716	*
				717	* There are various other error values that could effectively
				718	* mean that the xattr doesn't exist (e.g. -ERANGE is returned
				719	* if the xattr name is too long), but the set of filesystems
				720	* allowed as upper are limited to "normal" ones, where checking
				721	* for the above two errors is sufficient.
				722	*/
				723	err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
				724	if (err && err != -ENODATA && err != -EOPNOTSUPP)
				725	goto out_dput;
				726
				727	err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
				728	if (err && err != -ENODATA && err != -EOPNOTSUPP)
				729	goto out_dput;
				730
				731	/* Clear any inherited mode bits */
				732	inode_lock(work->d_inode);
				733	err = notify_change(work, &attr, NULL);
				734	inode_unlock(work->d_inode);
				735	if (err)
				736	goto out_dput;
				737	} else {
				738	err = PTR_ERR(work);
				739	goto out_err;
				740	}
				741	out_unlock:
				742	if (locked)
				743	inode_unlock(dir);
				744
				745	return work;
				746
				747	out_dput:
				748	dput(work);
				749	out_err:
				750	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
				751	ofs->config.workdir, name, -err);
				752	work = NULL;
				753	goto out_unlock;
				754	}
				755
				756	static void ovl_unescape(char *s)
				757	{
				758	char *d = s;
				759
				760	for (;; s++, d++) {
				761	if (*s == '\\')
				762	s++;
				763	d = s;
				764	if (!*s)
				765	break;
				766	}
				767	}
				768
				769	static int ovl_mount_dir_noesc(const char name, struct path path)
				770	{
				771	int err = -EINVAL;
				772
				773	if (!*name) {
				774	pr_err("overlayfs: empty lowerdir\n");
				775	goto out;
				776	}
				777	err = kern_path(name, LOOKUP_FOLLOW, path);
				778	if (err) {
				779	pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
				780	goto out;
				781	}
				782	err = -EINVAL;
				783	if (ovl_dentry_weird(path->dentry)) {
				784	pr_err("overlayfs: filesystem on '%s' not supported\n", name);
				785	goto out_put;
				786	}
				787	if (!d_is_dir(path->dentry)) {
				788	pr_err("overlayfs: '%s' not a directory\n", name);
				789	goto out_put;
				790	}
				791	return 0;
				792
				793	out_put:
				794	path_put_init(path);
				795	out:
				796	return err;
				797	}
				798
				799	static int ovl_mount_dir(const char name, struct path path)
				800	{
				801	int err = -ENOMEM;
				802	char *tmp = kstrdup(name, GFP_KERNEL);
				803
				804	if (tmp) {
				805	ovl_unescape(tmp);
				806	err = ovl_mount_dir_noesc(tmp, path);
				807
				808	if (!err)
				809	if (ovl_dentry_remote(path->dentry)) {
				810	pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
				811	tmp);
				812	path_put_init(path);
				813	err = -EINVAL;
				814	}
				815	kfree(tmp);
				816	}
				817	return err;
				818	}
				819
				820	static int ovl_check_namelen(struct path path, struct ovl_fs ofs,
				821	const char *name)
				822	{
				823	struct kstatfs statfs;
				824	int err = vfs_statfs(path, &statfs);
				825
				826	if (err)
				827	pr_err("overlayfs: statfs failed on '%s'\n", name);
				828	else
				829	ofs->namelen = max(ofs->namelen, statfs.f_namelen);
				830
				831	return err;
				832	}
				833
				834	static int ovl_lower_dir(const char name, struct path path,
				835	struct ovl_fs ofs, int stack_depth, bool *remote)
				836	{
				837	int fh_type;
				838	int err;
				839
				840	err = ovl_mount_dir_noesc(name, path);
				841	if (err)
				842	goto out;
				843
				844	err = ovl_check_namelen(path, ofs, name);
				845	if (err)
				846	goto out_put;
				847
				848	stack_depth = max(stack_depth, path->mnt->mnt_sb->s_stack_depth);
				849
				850	if (ovl_dentry_remote(path->dentry))
				851	*remote = true;
				852
				853	/*
				854	* The inodes index feature and NFS export need to encode and decode
				855	* file handles, so they require that all layers support them.
				856	*/
				857	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
				858	if ((ofs->config.nfs_export \|\|
				859	(ofs->config.index && ofs->config.upperdir)) && !fh_type) {
				860	ofs->config.index = false;
				861	ofs->config.nfs_export = false;
				862	pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
				863	name);
				864	}
				865
				866	/* Check if lower fs has 32bit inode numbers */
				867	if (fh_type != FILEID_INO32_GEN)
				868	ofs->xino_bits = 0;
				869
				870	return 0;
				871
				872	out_put:
				873	path_put_init(path);
				874	out:
				875	return err;
				876	}
				877
				878	/* Workdir should not be subdir of upperdir and vice versa */
				879	static bool ovl_workdir_ok(struct dentry workdir, struct dentry upperdir)
				880	{
				881	bool ok = false;
				882
				883	if (workdir != upperdir) {
				884	ok = (lock_rename(workdir, upperdir) == NULL);
				885	unlock_rename(workdir, upperdir);
				886	}
				887	return ok;
				888	}
				889
				890	static unsigned int ovl_split_lowerdirs(char *str)
				891	{
				892	unsigned int ctr = 1;
				893	char s, d;
				894
				895	for (s = d = str;; s++, d++) {
				896	if (*s == '\\') {
				897	s++;
				898	} else if (*s == ':') {
				899	*d = '\0';
				900	ctr++;
				901	continue;
				902	}
				903	d = s;
				904	if (!*s)
				905	break;
				906	}
				907	return ctr;
				908	}
				909
				910	static int __maybe_unused
				911	ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
				912	struct dentry dentry, struct inode inode,
				913	const char name, void buffer, size_t size, int flags)
				914	{
				915	return ovl_xattr_get(dentry, inode, handler->name, buffer, size, flags);
				916	}
				917
				918	static int __maybe_unused
				919	ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
				920	struct dentry dentry, struct inode inode,
				921	const char name, const void value,
				922	size_t size, int flags)
				923	{
				924	struct dentry *workdir = ovl_workdir(dentry);
				925	struct inode *realinode = ovl_inode_real(inode);
				926	struct posix_acl *acl = NULL;
				927	int err;
				928
				929	/* Check that everything is OK before copy-up */
				930	if (value) {
				931	acl = posix_acl_from_xattr(&init_user_ns, value, size);
				932	if (IS_ERR(acl))
				933	return PTR_ERR(acl);
				934	}
				935	err = -EOPNOTSUPP;
				936	if (!IS_POSIXACL(d_inode(workdir)))
				937	goto out_acl_release;
				938	if (!realinode->i_op->set_acl)
				939	goto out_acl_release;
				940	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
				941	err = acl ? -EACCES : 0;
				942	goto out_acl_release;
				943	}
				944	err = -EPERM;
				945	if (!inode_owner_or_capable(inode))
				946	goto out_acl_release;
				947
				948	posix_acl_release(acl);
				949
				950	/*
				951	* Check if sgid bit needs to be cleared (actual setacl operation will
				952	* be done with mounter's capabilities and so that won't do it for us).
				953	*/
				954	if (unlikely(inode->i_mode & S_ISGID) &&
				955	handler->flags == ACL_TYPE_ACCESS &&
				956	!in_group_p(inode->i_gid) &&
				957	!capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
				958	struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
				959
				960	err = ovl_setattr(dentry, &iattr);
				961	if (err)
				962	return err;
				963	}
				964
				965	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
				966	if (!err)
				967	ovl_copyattr(ovl_inode_real(inode), inode);
				968
				969	return err;
				970
				971	out_acl_release:
				972	posix_acl_release(acl);
				973	return err;
				974	}
				975
				976	static int ovl_own_xattr_get(const struct xattr_handler *handler,
				977	struct dentry dentry, struct inode inode,
				978	const char name, void buffer, size_t size,
				979	int flags)
				980	{
				981	return -EOPNOTSUPP;
				982	}
				983
				984	static int ovl_own_xattr_set(const struct xattr_handler *handler,
				985	struct dentry dentry, struct inode inode,
				986	const char name, const void value,
				987	size_t size, int flags)
				988	{
				989	return -EOPNOTSUPP;
				990	}
				991
				992	static int ovl_other_xattr_get(const struct xattr_handler *handler,
				993	struct dentry dentry, struct inode inode,
				994	const char name, void buffer, size_t size,
				995	int flags)
				996	{
				997	return ovl_xattr_get(dentry, inode, name, buffer, size, flags);
				998	}
				999
				1000	static int ovl_other_xattr_set(const struct xattr_handler *handler,
				1001	struct dentry dentry, struct inode inode,
				1002	const char name, const void value,
				1003	size_t size, int flags)
				1004	{
				1005	return ovl_xattr_set(dentry, inode, name, value, size, flags);
				1006	}
				1007
				1008	static const struct xattr_handler __maybe_unused
				1009	ovl_posix_acl_access_xattr_handler = {
				1010	.name = XATTR_NAME_POSIX_ACL_ACCESS,
				1011	.flags = ACL_TYPE_ACCESS,
				1012	.get = ovl_posix_acl_xattr_get,
				1013	.set = ovl_posix_acl_xattr_set,
				1014	};
				1015
				1016	static const struct xattr_handler __maybe_unused
				1017	ovl_posix_acl_default_xattr_handler = {
				1018	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
				1019	.flags = ACL_TYPE_DEFAULT,
				1020	.get = ovl_posix_acl_xattr_get,
				1021	.set = ovl_posix_acl_xattr_set,
				1022	};
				1023
				1024	static const struct xattr_handler ovl_own_xattr_handler = {
				1025	.prefix = OVL_XATTR_PREFIX,
				1026	.get = ovl_own_xattr_get,
				1027	.set = ovl_own_xattr_set,
				1028	};
				1029
				1030	static const struct xattr_handler ovl_other_xattr_handler = {
				1031	.prefix = "", /* catch all */
				1032	.get = ovl_other_xattr_get,
				1033	.set = ovl_other_xattr_set,
				1034	};
				1035
				1036	static const struct xattr_handler *ovl_xattr_handlers[] = {
				1037	#ifdef CONFIG_FS_POSIX_ACL
				1038	&ovl_posix_acl_access_xattr_handler,
				1039	&ovl_posix_acl_default_xattr_handler,
				1040	#endif
				1041	&ovl_own_xattr_handler,
				1042	&ovl_other_xattr_handler,
				1043	NULL
				1044	};
				1045
				1046	static int ovl_setup_trap(struct super_block sb, struct dentry dir,
				1047	struct inode *ptrap, const char name)
				1048	{
				1049	struct inode *trap;
				1050	int err;
				1051
				1052	trap = ovl_get_trap_inode(sb, dir);
				1053	err = PTR_ERR_OR_ZERO(trap);
				1054	if (err) {
				1055	if (err == -ELOOP)
				1056	pr_err("overlayfs: conflicting %s path\n", name);
				1057	return err;
				1058	}
				1059
				1060	*ptrap = trap;
				1061	return 0;
				1062	}
				1063
				1064	/*
				1065	* Determine how we treat concurrent use of upperdir/workdir based on the
				1066	* index feature. This is papering over mount leaks of container runtimes,
				1067	* for example, an old overlay mount is leaked and now its upperdir is
				1068	* attempted to be used as a lower layer in a new overlay mount.
				1069	*/
				1070	static int ovl_report_in_use(struct ovl_fs ofs, const char name)
				1071	{
				1072	if (ofs->config.index) {
				1073	pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
				1074	name);
				1075	return -EBUSY;
				1076	} else {
				1077	pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
				1078	name);
				1079	return 0;
				1080	}
				1081	}
				1082
				1083	static int ovl_get_upper(struct super_block sb, struct ovl_fs ofs,
				1084	struct path *upperpath)
				1085	{
				1086	struct vfsmount *upper_mnt;
				1087	int err;
				1088
				1089	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
				1090	if (err)
				1091	goto out;
				1092
				1093	/* Upper fs should not be r/o */
				1094	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
				1095	pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
				1096	err = -EINVAL;
				1097	goto out;
				1098	}
				1099
				1100	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
				1101	if (err)
				1102	goto out;
				1103
				1104	err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
				1105	"upperdir");
				1106	if (err)
				1107	goto out;
				1108
				1109	upper_mnt = clone_private_mount(upperpath);
				1110	err = PTR_ERR(upper_mnt);
				1111	if (IS_ERR(upper_mnt)) {
				1112	pr_err("overlayfs: failed to clone upperpath\n");
				1113	goto out;
				1114	}
				1115
				1116	/* Don't inherit atime flags */
				1117	upper_mnt->mnt_flags &= ~(MNT_NOATIME \| MNT_NODIRATIME \| MNT_RELATIME);
				1118	ofs->upper_mnt = upper_mnt;
				1119
				1120	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
				1121	ofs->upperdir_locked = true;
				1122	} else {
				1123	err = ovl_report_in_use(ofs, "upperdir");
				1124	if (err)
				1125	goto out;
				1126	}
				1127
				1128	err = 0;
				1129	out:
				1130	return err;
				1131	}
				1132
				1133	static int ovl_make_workdir(struct super_block sb, struct ovl_fs ofs,
				1134	struct path *workpath)
				1135	{
				1136	struct vfsmount *mnt = ofs->upper_mnt;
				1137	struct dentry *temp;
				1138	int fh_type;
				1139	int err;
				1140
				1141	err = mnt_want_write(mnt);
				1142	if (err)
				1143	return err;
				1144
				1145	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
				1146	if (!ofs->workdir)
				1147	goto out;
				1148
				1149	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
				1150	if (err)
				1151	goto out;
				1152
				1153	/*
				1154	* Upper should support d_type, else whiteouts are visible. Given
				1155	* workdir and upper are on same fs, we can do iterate_dir() on
				1156	* workdir. This check requires successful creation of workdir in
				1157	* previous step.
				1158	*/
				1159	err = ovl_check_d_type_supported(workpath);
				1160	if (err < 0)
				1161	goto out;
				1162
				1163	/*
				1164	* We allowed this configuration and don't want to break users over
				1165	* kernel upgrade. So warn instead of erroring out.
				1166	*/
				1167	if (!err)
				1168	pr_warn("overlayfs: upper fs needs to support d_type.\n");
				1169
				1170	/* Check if upper/work fs supports O_TMPFILE */
				1171	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG \| 0);
				1172	ofs->tmpfile = !IS_ERR(temp);
				1173	if (ofs->tmpfile)
				1174	dput(temp);
				1175	else
				1176	pr_warn("overlayfs: upper fs does not support tmpfile.\n");
				1177
				1178	/*
				1179	* Check if upper/work fs supports trusted.overlay.* xattr
				1180	*/
				1181	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
				1182	if (err) {
				1183	ofs->noxattr = true;
				1184	ofs->config.index = false;
				1185	ofs->config.metacopy = false;
				1186	pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
				1187	err = 0;
				1188	} else {
				1189	vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
				1190	}
				1191
				1192	/* Check if upper/work fs supports file handles */
				1193	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
				1194	if (ofs->config.index && !fh_type) {
				1195	ofs->config.index = false;
				1196	pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
				1197	}
				1198
				1199	/* Check if upper fs has 32bit inode numbers */
				1200	if (fh_type != FILEID_INO32_GEN)
				1201	ofs->xino_bits = 0;
				1202
				1203	/* NFS export of r/w mount depends on index */
				1204	if (ofs->config.nfs_export && !ofs->config.index) {
				1205	pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
				1206	ofs->config.nfs_export = false;
				1207	}
				1208	out:
				1209	mnt_drop_write(mnt);
				1210	return err;
				1211	}
				1212
				1213	static int ovl_get_workdir(struct super_block sb, struct ovl_fs ofs,
				1214	struct path *upperpath)
				1215	{
				1216	int err;
				1217	struct path workpath = { };
				1218
				1219	err = ovl_mount_dir(ofs->config.workdir, &workpath);
				1220	if (err)
				1221	goto out;
				1222
				1223	err = -EINVAL;
				1224	if (upperpath->mnt != workpath.mnt) {
				1225	pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
				1226	goto out;
				1227	}
				1228	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
				1229	pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
				1230	goto out;
				1231	}
				1232
				1233	ofs->workbasedir = dget(workpath.dentry);
				1234
				1235	if (ovl_inuse_trylock(ofs->workbasedir)) {
				1236	ofs->workdir_locked = true;
				1237	} else {
				1238	err = ovl_report_in_use(ofs, "workdir");
				1239	if (err)
				1240	goto out;
				1241	}
				1242
				1243	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
				1244	"workdir");
				1245	if (err)
				1246	goto out;
				1247
				1248	err = ovl_make_workdir(sb, ofs, &workpath);
				1249
				1250	out:
				1251	path_put(&workpath);
				1252
				1253	return err;
				1254	}
				1255
				1256	static int ovl_get_indexdir(struct super_block sb, struct ovl_fs ofs,
				1257	struct ovl_entry oe, struct path upperpath)
				1258	{
				1259	struct vfsmount *mnt = ofs->upper_mnt;
				1260	int err;
				1261
				1262	err = mnt_want_write(mnt);
				1263	if (err)
				1264	return err;
				1265
				1266	/* Verify lower root is upper root origin */
				1267	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
				1268	true);
				1269	if (err) {
				1270	pr_err("overlayfs: failed to verify upper root origin\n");
				1271	goto out;
				1272	}
				1273
				1274	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
				1275	if (ofs->indexdir) {
				1276	err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
				1277	"indexdir");
				1278	if (err)
				1279	goto out;
				1280
				1281	/*
				1282	* Verify upper root is exclusively associated with index dir.
				1283	* Older kernels stored upper fh in "trusted.overlay.origin"
				1284	* xattr. If that xattr exists, verify that it is a match to
				1285	* upper dir file handle. In any case, verify or set xattr
				1286	* "trusted.overlay.upper" to indicate that index may have
				1287	* directory entries.
				1288	*/
				1289	if (ovl_check_origin_xattr(ofs->indexdir)) {
				1290	err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
				1291	upperpath->dentry, true, false);
				1292	if (err)
				1293	pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
				1294	}
				1295	err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
				1296	if (err)
				1297	pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
				1298
				1299	/* Cleanup bad/stale/orphan index entries */
				1300	if (!err)
				1301	err = ovl_indexdir_cleanup(ofs);
				1302	}
				1303	if (err \|\| !ofs->indexdir)
				1304	pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
				1305
				1306	out:
				1307	mnt_drop_write(mnt);
				1308	return err;
				1309	}
				1310
				1311	static bool ovl_lower_uuid_ok(struct ovl_fs ofs, const uuid_t uuid)
				1312	{
				1313	unsigned int i;
				1314
				1315	if (!ofs->config.nfs_export && !ofs->upper_mnt)
				1316	return true;
				1317
				1318	/*
				1319	* We allow using single lower with null uuid for index and nfs_export
				1320	* for example to support those features with single lower squashfs.
				1321	* To avoid regressions in setups of overlay with re-formatted lower
				1322	* squashfs, do not allow decoding origin with lower null uuid unless
				1323	* user opted-in to one of the new features that require following the
				1324	* lower inode of non-dir upper.
				1325	*/
				1326	if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino &&
				1327	uuid_is_null(uuid))
				1328	return false;
				1329
				1330	for (i = 0; i < ofs->numlowerfs; i++) {
				1331	/*
				1332	* We use uuid to associate an overlay lower file handle with a
				1333	* lower layer, so we can accept lower fs with null uuid as long
				1334	* as all lower layers with null uuid are on the same fs.
				1335	* if we detect multiple lower fs with the same uuid, we
				1336	* disable lower file handle decoding on all of them.
				1337	*/
				1338	if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) {
				1339	ofs->lower_fs[i].bad_uuid = true;
				1340	return false;
				1341	}
				1342	}
				1343	return true;
				1344	}
				1345
				1346	/* Get a unique fsid for the layer */
				1347	static int ovl_get_fsid(struct ovl_fs ofs, const struct path path)
				1348	{
				1349	struct super_block *sb = path->mnt->mnt_sb;
				1350	unsigned int i;
				1351	dev_t dev;
				1352	int err;
				1353	bool bad_uuid = false;
				1354
				1355	/* fsid 0 is reserved for upper fs even with non upper overlay */
				1356	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
				1357	return 0;
				1358
				1359	for (i = 0; i < ofs->numlowerfs; i++) {
				1360	if (ofs->lower_fs[i].sb == sb)
				1361	return i + 1;
				1362	}
				1363
				1364	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
				1365	bad_uuid = true;
				1366	if (ofs->config.index \|\| ofs->config.nfs_export) {
				1367	ofs->config.index = false;
				1368	ofs->config.nfs_export = false;
				1369	pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
				1370	uuid_is_null(&sb->s_uuid) ? "null" :
				1371	"conflicting",
				1372	path->dentry);
				1373	}
				1374	}
				1375
				1376	err = get_anon_bdev(&dev);
				1377	if (err) {
				1378	pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
				1379	return err;
				1380	}
				1381
				1382	ofs->lower_fs[ofs->numlowerfs].sb = sb;
				1383	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
				1384	ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid;
				1385	ofs->numlowerfs++;
				1386
				1387	return ofs->numlowerfs;
				1388	}
				1389
				1390	static int ovl_get_lower_layers(struct super_block sb, struct ovl_fs ofs,
				1391	struct path *stack, unsigned int numlower)
				1392	{
				1393	int err;
				1394	unsigned int i;
				1395
				1396	err = -ENOMEM;
				1397	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
				1398	GFP_KERNEL);
				1399	if (ofs->lower_layers == NULL)
				1400	goto out;
				1401
				1402	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
				1403	GFP_KERNEL);
				1404	if (ofs->lower_fs == NULL)
				1405	goto out;
				1406
				1407	for (i = 0; i < numlower; i++) {
				1408	struct vfsmount *mnt;
				1409	struct inode *trap;
				1410	int fsid;
				1411
				1412	err = fsid = ovl_get_fsid(ofs, &stack[i]);
				1413	if (err < 0)
				1414	goto out;
				1415
				1416	/*
				1417	* Check if lower root conflicts with this overlay layers before
				1418	* checking if it is in-use as upperdir/workdir of "another"
				1419	* mount, because we do not bother to check in ovl_is_inuse() if
				1420	* the upperdir/workdir is in fact in-use by our
				1421	* upperdir/workdir.
				1422	*/
				1423	err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
				1424	if (err)
				1425	goto out;
				1426
				1427	if (ovl_is_inuse(stack[i].dentry)) {
				1428	err = ovl_report_in_use(ofs, "lowerdir");
				1429	if (err) {
				1430	iput(trap);
				1431	goto out;
				1432	}
				1433	}
				1434
				1435	mnt = clone_private_mount(&stack[i]);
				1436	err = PTR_ERR(mnt);
				1437	if (IS_ERR(mnt)) {
				1438	pr_err("overlayfs: failed to clone lowerpath\n");
				1439	iput(trap);
				1440	goto out;
				1441	}
				1442
				1443	/*
				1444	* Make lower layers R/O. That way fchmod/fchown on lower file
				1445	* will fail instead of modifying lower fs.
				1446	*/
				1447	mnt->mnt_flags \|= MNT_READONLY \| MNT_NOATIME;
				1448
				1449	ofs->lower_layers[ofs->numlower].trap = trap;
				1450	ofs->lower_layers[ofs->numlower].mnt = mnt;
				1451	ofs->lower_layers[ofs->numlower].idx = i + 1;
				1452	ofs->lower_layers[ofs->numlower].fsid = fsid;
				1453	if (fsid) {
				1454	ofs->lower_layers[ofs->numlower].fs =
				1455	&ofs->lower_fs[fsid - 1];
				1456	}
				1457	ofs->numlower++;
				1458	}
				1459
				1460	/*
				1461	* When all layers on same fs, overlay can use real inode numbers.
				1462	* With mount option "xino=on", mounter declares that there are enough
				1463	* free high bits in underlying fs to hold the unique fsid.
				1464	* If overlayfs does encounter underlying inodes using the high xino
				1465	* bits reserved for fsid, it emits a warning and uses the original
				1466	* inode number.
				1467	*/
				1468	if (!ofs->numlowerfs \|\| (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
				1469	ofs->xino_bits = 0;
				1470	ofs->config.xino = OVL_XINO_OFF;
				1471	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
				1472	/*
				1473	* This is a roundup of number of bits needed for numlowerfs+1
				1474	* (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
				1475	* upper fs even with non upper overlay.
				1476	*/
				1477	BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
				1478	ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
				1479	}
				1480
				1481	if (ofs->xino_bits) {
				1482	pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
				1483	ofs->xino_bits);
				1484	}
				1485
				1486	err = 0;
				1487	out:
				1488	return err;
				1489	}
				1490
				1491	static struct ovl_entry ovl_get_lowerstack(struct super_block sb,
				1492	struct ovl_fs *ofs)
				1493	{
				1494	int err;
				1495	char lowertmp, lower;
				1496	struct path *stack = NULL;
				1497	unsigned int stacklen, numlower = 0, i;
				1498	bool remote = false;
				1499	struct ovl_entry *oe;
				1500
				1501	err = -ENOMEM;
				1502	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
				1503	if (!lowertmp)
				1504	goto out_err;
				1505
				1506	err = -EINVAL;
				1507	stacklen = ovl_split_lowerdirs(lowertmp);
				1508	if (stacklen > OVL_MAX_STACK) {
				1509	pr_err("overlayfs: too many lower directories, limit is %d\n",
				1510	OVL_MAX_STACK);
				1511	goto out_err;
				1512	} else if (!ofs->config.upperdir && stacklen == 1) {
				1513	pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
				1514	goto out_err;
				1515	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
				1516	ofs->config.redirect_follow) {
				1517	pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
				1518	ofs->config.nfs_export = false;
				1519	}
				1520
				1521	err = -ENOMEM;
				1522	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
				1523	if (!stack)
				1524	goto out_err;
				1525
				1526	err = -EINVAL;
				1527	lower = lowertmp;
				1528	for (numlower = 0; numlower < stacklen; numlower++) {
				1529	err = ovl_lower_dir(lower, &stack[numlower], ofs,
				1530	&sb->s_stack_depth, &remote);
				1531	if (err)
				1532	goto out_err;
				1533
				1534	lower = strchr(lower, '\0') + 1;
				1535	}
				1536
				1537	err = -EINVAL;
				1538	sb->s_stack_depth++;
				1539	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
				1540	pr_err("overlayfs: maximum fs stacking depth exceeded\n");
				1541	goto out_err;
				1542	}
				1543
				1544	err = ovl_get_lower_layers(sb, ofs, stack, numlower);
				1545	if (err)
				1546	goto out_err;
				1547
				1548	err = -ENOMEM;
				1549	oe = ovl_alloc_entry(numlower);
				1550	if (!oe)
				1551	goto out_err;
				1552
				1553	for (i = 0; i < numlower; i++) {
				1554	oe->lowerstack[i].dentry = dget(stack[i].dentry);
				1555	oe->lowerstack[i].layer = &ofs->lower_layers[i];
				1556	}
				1557
				1558	if (remote)
				1559	sb->s_d_op = &ovl_reval_dentry_operations;
				1560	else
				1561	sb->s_d_op = &ovl_dentry_operations;
				1562
				1563	out:
				1564	for (i = 0; i < numlower; i++)
				1565	path_put(&stack[i]);
				1566	kfree(stack);
				1567	kfree(lowertmp);
				1568
				1569	return oe;
				1570
				1571	out_err:
				1572	oe = ERR_PTR(err);
				1573	goto out;
				1574	}
				1575
				1576	/*
				1577	* Check if this layer root is a descendant of:
				1578	* - another layer of this overlayfs instance
				1579	* - upper/work dir of any overlayfs instance
				1580	*/
				1581	static int ovl_check_layer(struct super_block sb, struct ovl_fs ofs,
				1582	struct dentry dentry, const char name,
				1583	bool is_lower)
				1584	{
				1585	struct dentry next = dentry, parent;
				1586	int err = 0;
				1587
				1588	if (!dentry)
				1589	return 0;
				1590
				1591	parent = dget_parent(next);
				1592
				1593	/* Walk back ancestors to root (inclusive) looking for traps */
				1594	while (!err && parent != next) {
				1595	if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
				1596	err = -ELOOP;
				1597	pr_err("overlayfs: overlapping %s path\n", name);
				1598	} else if (ovl_is_inuse(parent)) {
				1599	err = ovl_report_in_use(ofs, name);
				1600	}
				1601	next = parent;
				1602	parent = dget_parent(next);
				1603	dput(next);
				1604	}
				1605
				1606	dput(parent);
				1607
				1608	return err;
				1609	}
				1610
				1611	/*
				1612	* Check if any of the layers or work dirs overlap.
				1613	*/
				1614	static int ovl_check_overlapping_layers(struct super_block *sb,
				1615	struct ovl_fs *ofs)
				1616	{
				1617	int i, err;
				1618
				1619	if (ofs->upper_mnt) {
				1620	err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
				1621	"upperdir", false);
				1622	if (err)
				1623	return err;
				1624
				1625	/*
				1626	* Checking workbasedir avoids hitting ovl_is_inuse(parent) of
				1627	* this instance and covers overlapping work and index dirs,
				1628	* unless work or index dir have been moved since created inside
				1629	* workbasedir. In that case, we already have their traps in
				1630	* inode cache and we will catch that case on lookup.
				1631	*/
				1632	err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
				1633	false);
				1634	if (err)
				1635	return err;
				1636	}
				1637
				1638	for (i = 0; i < ofs->numlower; i++) {
				1639	err = ovl_check_layer(sb, ofs,
				1640	ofs->lower_layers[i].mnt->mnt_root,
				1641	"lowerdir", true);
				1642	if (err)
				1643	return err;
				1644	}
				1645
				1646	return 0;
				1647	}
				1648
				1649	static int ovl_fill_super(struct super_block sb, void data, int silent)
				1650	{
				1651	struct path upperpath = { };
				1652	struct dentry *root_dentry;
				1653	struct ovl_entry *oe;
				1654	struct ovl_fs *ofs;
				1655	struct cred *cred;
				1656	int err;
				1657
				1658	err = -ENOMEM;
				1659	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
				1660	if (!ofs)
				1661	goto out;
				1662
				1663	ofs->creator_cred = cred = prepare_creds();
				1664	if (!cred)
				1665	goto out_err;
				1666
				1667	ofs->config.index = ovl_index_def;
				1668	ofs->config.nfs_export = ovl_nfs_export_def;
				1669	ofs->config.xino = ovl_xino_def();
				1670	ofs->config.fsync_mode = OVL_FSYNC_STRICT;
				1671	ofs->config.metacopy = ovl_metacopy_def;
				1672	err = ovl_parse_opt((char *) data, &ofs->config);
				1673	if (err)
				1674	goto out_err;
				1675
				1676	err = -EINVAL;
				1677	if (!ofs->config.lowerdir) {
				1678	if (!silent)
				1679	pr_err("overlayfs: missing 'lowerdir'\n");
				1680	goto out_err;
				1681	}
				1682
				1683	sb->s_stack_depth = 0;
				1684	sb->s_maxbytes = MAX_LFS_FILESIZE;
				1685	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
				1686	if (ofs->config.xino != OVL_XINO_OFF)
				1687	ofs->xino_bits = BITS_PER_LONG - 32;
				1688
				1689	/* alloc/destroy_inode needed for setting up traps in inode cache */
				1690	sb->s_op = &ovl_super_operations;
				1691
				1692	if (ofs->config.upperdir) {
				1693	if (!ofs->config.workdir) {
				1694	pr_err("overlayfs: missing 'workdir'\n");
				1695	goto out_err;
				1696	}
				1697
				1698	err = ovl_get_upper(sb, ofs, &upperpath);
				1699	if (err)
				1700	goto out_err;
				1701
				1702	err = ovl_get_workdir(sb, ofs, &upperpath);
				1703	if (err)
				1704	goto out_err;
				1705
				1706	if (!ofs->workdir)
				1707	sb->s_flags \|= SB_RDONLY;
				1708
				1709	sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
				1710	sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
				1711
				1712	}
				1713	oe = ovl_get_lowerstack(sb, ofs);
				1714	err = PTR_ERR(oe);
				1715	if (IS_ERR(oe))
				1716	goto out_err;
				1717
				1718	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
				1719	if (!ofs->upper_mnt)
				1720	sb->s_flags \|= SB_RDONLY;
				1721
				1722	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
				1723	err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
				1724	if (err)
				1725	goto out_free_oe;
				1726
				1727	/* Force r/o mount with no index dir */
				1728	if (!ofs->indexdir) {
				1729	dput(ofs->workdir);
				1730	ofs->workdir = NULL;
				1731	sb->s_flags \|= SB_RDONLY;
				1732	}
				1733
				1734	}
				1735
				1736	err = ovl_check_overlapping_layers(sb, ofs);
				1737	if (err)
				1738	goto out_free_oe;
				1739
				1740	/* Show index=off in /proc/mounts for forced r/o mount */
				1741	if (!ofs->indexdir) {
				1742	ofs->config.index = false;
				1743	if (ofs->upper_mnt && ofs->config.nfs_export) {
				1744	pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
				1745	ofs->config.nfs_export = false;
				1746	}
				1747	}
				1748
				1749	if (ofs->config.metacopy && ofs->config.nfs_export) {
				1750	pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
				1751	ofs->config.nfs_export = false;
				1752	}
				1753
				1754	if (ofs->config.nfs_export)
				1755	sb->s_export_op = &ovl_export_operations;
				1756
				1757	/* Never override disk quota limits or use reserved space */
				1758	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
				1759
				1760	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
				1761	sb->s_xattr = ovl_xattr_handlers;
				1762	sb->s_fs_info = ofs;
				1763	sb->s_flags \|= SB_POSIXACL;
				1764	sb->s_iflags \|= SB_I_SKIP_SYNC;
				1765
				1766	err = -ENOMEM;
				1767	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
				1768	if (!root_dentry)
				1769	goto out_free_oe;
				1770
				1771	root_dentry->d_fsdata = oe;
				1772
				1773	mntput(upperpath.mnt);
				1774	if (upperpath.dentry) {
				1775	ovl_dentry_set_upper_alias(root_dentry);
				1776	if (ovl_is_impuredir(upperpath.dentry))
				1777	ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
				1778	}
				1779
				1780	/* Root is always merge -> can have whiteouts */
				1781	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
				1782	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
				1783	ovl_set_upperdata(d_inode(root_dentry));
				1784	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
				1785	ovl_dentry_lower(root_dentry), NULL);
				1786
				1787	sb->s_root = root_dentry;
				1788	return 0;
				1789
				1790	out_free_oe:
				1791	ovl_entry_stack_free(oe);
				1792	kfree(oe);
				1793	out_err:
				1794	path_put(&upperpath);
				1795	ovl_free_fs(ofs);
				1796	out:
				1797	return err;
				1798	}
				1799
				1800	static struct dentry ovl_mount(struct file_system_type fs_type, int flags,
				1801	const char dev_name, void raw_data)
				1802	{
				1803	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
				1804	}
				1805
				1806	static struct file_system_type ovl_fs_type = {
				1807	.owner = THIS_MODULE,
				1808	.name = "overlay",
				1809	.mount = ovl_mount,
				1810	.kill_sb = kill_anon_super,
				1811	};
				1812	MODULE_ALIAS_FS("overlay");
				1813
				1814	static void ovl_inode_init_once(void *foo)
				1815	{
				1816	struct ovl_inode *oi = foo;
				1817
				1818	inode_init_once(&oi->vfs_inode);
				1819	}
				1820
				1821	static int __init ovl_init(void)
				1822	{
				1823	int err;
				1824
				1825	ovl_inode_cachep = kmem_cache_create("ovl_inode",
				1826	sizeof(struct ovl_inode), 0,
				1827	(SLAB_RECLAIM_ACCOUNT\|
				1828	SLAB_MEM_SPREAD\|SLAB_ACCOUNT),
				1829	ovl_inode_init_once);
				1830	if (ovl_inode_cachep == NULL)
				1831	return -ENOMEM;
				1832
				1833	err = register_filesystem(&ovl_fs_type);
				1834	if (err)
				1835	kmem_cache_destroy(ovl_inode_cachep);
				1836
				1837	return err;
				1838	}
				1839
				1840	static void __exit ovl_exit(void)
				1841	{
				1842	unregister_filesystem(&ovl_fs_type);
				1843
				1844	/*
				1845	* Make sure all delayed rcu free inodes are flushed before we
				1846	* destroy cache.
				1847	*/
				1848	rcu_barrier();
				1849	kmem_cache_destroy(ovl_inode_cachep);
				1850
				1851	}
				1852
				1853	module_init(ovl_init);
				1854	module_exit(ovl_exit);