Blame - src/kernel/linux/v4.19/fs/ceph/super.c - T800

blob: 2bd0b1ed9708e862e0cab012043c0a33051d41a7 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1
				2	#include <linux/ceph/ceph_debug.h>
				3
				4	#include <linux/backing-dev.h>
				5	#include <linux/ctype.h>
				6	#include <linux/fs.h>
				7	#include <linux/inet.h>
				8	#include <linux/in6.h>
				9	#include <linux/module.h>
				10	#include <linux/mount.h>
				11	#include <linux/parser.h>
				12	#include <linux/sched.h>
				13	#include <linux/seq_file.h>
				14	#include <linux/slab.h>
				15	#include <linux/statfs.h>
				16	#include <linux/string.h>
				17
				18	#include "super.h"
				19	#include "mds_client.h"
				20	#include "cache.h"
				21
				22	#include <linux/ceph/ceph_features.h>
				23	#include <linux/ceph/decode.h>
				24	#include <linux/ceph/mon_client.h>
				25	#include <linux/ceph/auth.h>
				26	#include <linux/ceph/debugfs.h>
				27
				28	/*
				29	* Ceph superblock operations
				30	*
				31	* Handle the basics of mounting, unmounting.
				32	*/
				33
				34	/*
				35	* super ops
				36	*/
				37	static void ceph_put_super(struct super_block *s)
				38	{
				39	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
				40
				41	dout("put_super\n");
				42	ceph_mdsc_close_sessions(fsc->mdsc);
				43	}
				44
				45	static int ceph_statfs(struct dentry dentry, struct kstatfs buf)
				46	{
				47	struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
				48	struct ceph_mon_client *monc = &fsc->client->monc;
				49	struct ceph_statfs st;
				50	u64 fsid;
				51	int err;
				52	u64 data_pool;
				53
				54	if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
				55	data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
				56	} else {
				57	data_pool = CEPH_NOPOOL;
				58	}
				59
				60	dout("statfs\n");
				61	err = ceph_monc_do_statfs(monc, data_pool, &st);
				62	if (err < 0)
				63	return err;
				64
				65	/* fill in kstatfs */
				66	buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
				67
				68	/*
				69	* express utilization in terms of large blocks to avoid
				70	* overflow on 32-bit machines.
				71	*
				72	* NOTE: for the time being, we make bsize == frsize to humor
				73	* not-yet-ancient versions of glibc that are broken.
				74	* Someday, we will probably want to report a real block
				75	* size... whatever that may mean for a network file system!
				76	*/
				77	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
				78	buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
				79
				80	/*
				81	* By default use root quota for stats; fallback to overall filesystem
				82	* usage if using 'noquotadf' mount option or if the root dir doesn't
				83	* have max_bytes quota set.
				84	*/
				85	if (ceph_test_mount_opt(fsc, NOQUOTADF) \|\|
				86	!ceph_quota_update_statfs(fsc, buf)) {
				87	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
				88	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
				89	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
				90	}
				91
				92	buf->f_files = le64_to_cpu(st.num_objects);
				93	buf->f_ffree = -1;
				94	buf->f_namelen = NAME_MAX;
				95
				96	/* Must convert the fsid, for consistent values across arches */
				97	mutex_lock(&monc->mutex);
				98	fsid = le64_to_cpu((__le64 )(&monc->monmap->fsid)) ^
				99	le64_to_cpu(((__le64 )&monc->monmap->fsid + 1));
				100	mutex_unlock(&monc->mutex);
				101
				102	buf->f_fsid.val[0] = fsid & 0xffffffff;
				103	buf->f_fsid.val[1] = fsid >> 32;
				104
				105	return 0;
				106	}
				107
				108
				109	static int ceph_sync_fs(struct super_block *sb, int wait)
				110	{
				111	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
				112
				113	if (!wait) {
				114	dout("sync_fs (non-blocking)\n");
				115	ceph_flush_dirty_caps(fsc->mdsc);
				116	dout("sync_fs (non-blocking) done\n");
				117	return 0;
				118	}
				119
				120	dout("sync_fs (blocking)\n");
				121	ceph_osdc_sync(&fsc->client->osdc);
				122	ceph_mdsc_sync(fsc->mdsc);
				123	dout("sync_fs (blocking) done\n");
				124	return 0;
				125	}
				126
				127	/*
				128	* mount options
				129	*/
				130	enum {
				131	Opt_wsize,
				132	Opt_rsize,
				133	Opt_rasize,
				134	Opt_caps_wanted_delay_min,
				135	Opt_caps_wanted_delay_max,
				136	Opt_readdir_max_entries,
				137	Opt_readdir_max_bytes,
				138	Opt_congestion_kb,
				139	Opt_last_int,
				140	/* int args above */
				141	Opt_snapdirname,
				142	Opt_mds_namespace,
				143	Opt_fscache_uniq,
				144	Opt_last_string,
				145	/* string args above */
				146	Opt_dirstat,
				147	Opt_nodirstat,
				148	Opt_rbytes,
				149	Opt_norbytes,
				150	Opt_asyncreaddir,
				151	Opt_noasyncreaddir,
				152	Opt_dcache,
				153	Opt_nodcache,
				154	Opt_ino32,
				155	Opt_noino32,
				156	Opt_fscache,
				157	Opt_nofscache,
				158	Opt_poolperm,
				159	Opt_nopoolperm,
				160	Opt_require_active_mds,
				161	Opt_norequire_active_mds,
				162	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				163	Opt_acl,
				164	#endif
				165	Opt_noacl,
				166	Opt_quotadf,
				167	Opt_noquotadf,
				168	};
				169
				170	static match_table_t fsopt_tokens = {
				171	{Opt_wsize, "wsize=%d"},
				172	{Opt_rsize, "rsize=%d"},
				173	{Opt_rasize, "rasize=%d"},
				174	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
				175	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
				176	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
				177	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
				178	{Opt_congestion_kb, "write_congestion_kb=%d"},
				179	/* int args above */
				180	{Opt_snapdirname, "snapdirname=%s"},
				181	{Opt_mds_namespace, "mds_namespace=%s"},
				182	{Opt_fscache_uniq, "fsc=%s"},
				183	/* string args above */
				184	{Opt_dirstat, "dirstat"},
				185	{Opt_nodirstat, "nodirstat"},
				186	{Opt_rbytes, "rbytes"},
				187	{Opt_norbytes, "norbytes"},
				188	{Opt_asyncreaddir, "asyncreaddir"},
				189	{Opt_noasyncreaddir, "noasyncreaddir"},
				190	{Opt_dcache, "dcache"},
				191	{Opt_nodcache, "nodcache"},
				192	{Opt_ino32, "ino32"},
				193	{Opt_noino32, "noino32"},
				194	{Opt_fscache, "fsc"},
				195	{Opt_nofscache, "nofsc"},
				196	{Opt_poolperm, "poolperm"},
				197	{Opt_nopoolperm, "nopoolperm"},
				198	{Opt_require_active_mds, "require_active_mds"},
				199	{Opt_norequire_active_mds, "norequire_active_mds"},
				200	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				201	{Opt_acl, "acl"},
				202	#endif
				203	{Opt_noacl, "noacl"},
				204	{Opt_quotadf, "quotadf"},
				205	{Opt_noquotadf, "noquotadf"},
				206	{-1, NULL}
				207	};
				208
				209	static int parse_fsopt_token(char c, void private)
				210	{
				211	struct ceph_mount_options *fsopt = private;
				212	substring_t argstr[MAX_OPT_ARGS];
				213	int token, intval, ret;
				214
				215	token = match_token((char *)c, fsopt_tokens, argstr);
				216	if (token < 0)
				217	return -EINVAL;
				218
				219	if (token < Opt_last_int) {
				220	ret = match_int(&argstr[0], &intval);
				221	if (ret < 0) {
				222	pr_err("bad option arg (not int) at '%s'\n", c);
				223	return ret;
				224	}
				225	dout("got int token %d val %d\n", token, intval);
				226	} else if (token > Opt_last_int && token < Opt_last_string) {
				227	dout("got string token %d val %s\n", token,
				228	argstr[0].from);
				229	} else {
				230	dout("got token %d\n", token);
				231	}
				232
				233	switch (token) {
				234	case Opt_snapdirname:
				235	kfree(fsopt->snapdir_name);
				236	fsopt->snapdir_name = kstrndup(argstr[0].from,
				237	argstr[0].to-argstr[0].from,
				238	GFP_KERNEL);
				239	if (!fsopt->snapdir_name)
				240	return -ENOMEM;
				241	break;
				242	case Opt_mds_namespace:
				243	kfree(fsopt->mds_namespace);
				244	fsopt->mds_namespace = kstrndup(argstr[0].from,
				245	argstr[0].to-argstr[0].from,
				246	GFP_KERNEL);
				247	if (!fsopt->mds_namespace)
				248	return -ENOMEM;
				249	break;
				250	case Opt_fscache_uniq:
				251	#ifdef CONFIG_CEPH_FSCACHE
				252	kfree(fsopt->fscache_uniq);
				253	fsopt->fscache_uniq = kstrndup(argstr[0].from,
				254	argstr[0].to-argstr[0].from,
				255	GFP_KERNEL);
				256	if (!fsopt->fscache_uniq)
				257	return -ENOMEM;
				258	fsopt->flags \|= CEPH_MOUNT_OPT_FSCACHE;
				259	break;
				260	#else
				261	pr_err("fscache support is disabled\n");
				262	return -EINVAL;
				263	#endif
				264	case Opt_wsize:
				265	if (intval < (int)PAGE_SIZE \|\| intval > CEPH_MAX_WRITE_SIZE)
				266	return -EINVAL;
				267	fsopt->wsize = ALIGN(intval, PAGE_SIZE);
				268	break;
				269	case Opt_rsize:
				270	if (intval < (int)PAGE_SIZE \|\| intval > CEPH_MAX_READ_SIZE)
				271	return -EINVAL;
				272	fsopt->rsize = ALIGN(intval, PAGE_SIZE);
				273	break;
				274	case Opt_rasize:
				275	if (intval < 0)
				276	return -EINVAL;
				277	fsopt->rasize = ALIGN(intval, PAGE_SIZE);
				278	break;
				279	case Opt_caps_wanted_delay_min:
				280	if (intval < 1)
				281	return -EINVAL;
				282	fsopt->caps_wanted_delay_min = intval;
				283	break;
				284	case Opt_caps_wanted_delay_max:
				285	if (intval < 1)
				286	return -EINVAL;
				287	fsopt->caps_wanted_delay_max = intval;
				288	break;
				289	case Opt_readdir_max_entries:
				290	if (intval < 1)
				291	return -EINVAL;
				292	fsopt->max_readdir = intval;
				293	break;
				294	case Opt_readdir_max_bytes:
				295	if (intval < (int)PAGE_SIZE && intval != 0)
				296	return -EINVAL;
				297	fsopt->max_readdir_bytes = intval;
				298	break;
				299	case Opt_congestion_kb:
				300	if (intval < 1024) /* at least 1M */
				301	return -EINVAL;
				302	fsopt->congestion_kb = intval;
				303	break;
				304	case Opt_dirstat:
				305	fsopt->flags \|= CEPH_MOUNT_OPT_DIRSTAT;
				306	break;
				307	case Opt_nodirstat:
				308	fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
				309	break;
				310	case Opt_rbytes:
				311	fsopt->flags \|= CEPH_MOUNT_OPT_RBYTES;
				312	break;
				313	case Opt_norbytes:
				314	fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
				315	break;
				316	case Opt_asyncreaddir:
				317	fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
				318	break;
				319	case Opt_noasyncreaddir:
				320	fsopt->flags \|= CEPH_MOUNT_OPT_NOASYNCREADDIR;
				321	break;
				322	case Opt_dcache:
				323	fsopt->flags \|= CEPH_MOUNT_OPT_DCACHE;
				324	break;
				325	case Opt_nodcache:
				326	fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
				327	break;
				328	case Opt_ino32:
				329	fsopt->flags \|= CEPH_MOUNT_OPT_INO32;
				330	break;
				331	case Opt_noino32:
				332	fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
				333	break;
				334	case Opt_fscache:
				335	#ifdef CONFIG_CEPH_FSCACHE
				336	fsopt->flags \|= CEPH_MOUNT_OPT_FSCACHE;
				337	kfree(fsopt->fscache_uniq);
				338	fsopt->fscache_uniq = NULL;
				339	break;
				340	#else
				341	pr_err("fscache support is disabled\n");
				342	return -EINVAL;
				343	#endif
				344	case Opt_nofscache:
				345	fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
				346	kfree(fsopt->fscache_uniq);
				347	fsopt->fscache_uniq = NULL;
				348	break;
				349	case Opt_poolperm:
				350	fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
				351	break;
				352	case Opt_nopoolperm:
				353	fsopt->flags \|= CEPH_MOUNT_OPT_NOPOOLPERM;
				354	break;
				355	case Opt_require_active_mds:
				356	fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
				357	break;
				358	case Opt_norequire_active_mds:
				359	fsopt->flags \|= CEPH_MOUNT_OPT_MOUNTWAIT;
				360	break;
				361	case Opt_quotadf:
				362	fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
				363	break;
				364	case Opt_noquotadf:
				365	fsopt->flags \|= CEPH_MOUNT_OPT_NOQUOTADF;
				366	break;
				367	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				368	case Opt_acl:
				369	fsopt->sb_flags \|= SB_POSIXACL;
				370	break;
				371	#endif
				372	case Opt_noacl:
				373	fsopt->sb_flags &= ~SB_POSIXACL;
				374	break;
				375	default:
				376	BUG_ON(token);
				377	}
				378	return 0;
				379	}
				380
				381	static void destroy_mount_options(struct ceph_mount_options *args)
				382	{
				383	dout("destroy_mount_options %p\n", args);
				384	kfree(args->snapdir_name);
				385	kfree(args->mds_namespace);
				386	kfree(args->server_path);
				387	kfree(args->fscache_uniq);
				388	kfree(args);
				389	}
				390
				391	static int strcmp_null(const char s1, const char s2)
				392	{
				393	if (!s1 && !s2)
				394	return 0;
				395	if (s1 && !s2)
				396	return -1;
				397	if (!s1 && s2)
				398	return 1;
				399	return strcmp(s1, s2);
				400	}
				401
				402	static int compare_mount_options(struct ceph_mount_options *new_fsopt,
				403	struct ceph_options *new_opt,
				404	struct ceph_fs_client *fsc)
				405	{
				406	struct ceph_mount_options *fsopt1 = new_fsopt;
				407	struct ceph_mount_options *fsopt2 = fsc->mount_options;
				408	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
				409	int ret;
				410
				411	ret = memcmp(fsopt1, fsopt2, ofs);
				412	if (ret)
				413	return ret;
				414
				415	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
				416	if (ret)
				417	return ret;
				418	ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
				419	if (ret)
				420	return ret;
				421	ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
				422	if (ret)
				423	return ret;
				424	ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
				425	if (ret)
				426	return ret;
				427
				428	return ceph_compare_options(new_opt, fsc->client);
				429	}
				430
				431	static int parse_mount_options(struct ceph_mount_options **pfsopt,
				432	struct ceph_options **popt,
				433	int flags, char *options,
				434	const char *dev_name)
				435	{
				436	struct ceph_mount_options *fsopt;
				437	const char *dev_name_end;
				438	int err;
				439
				440	if (!dev_name \|\| !*dev_name)
				441	return -EINVAL;
				442
				443	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
				444	if (!fsopt)
				445	return -ENOMEM;
				446
				447	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
				448
				449	fsopt->sb_flags = flags;
				450	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
				451
				452	fsopt->wsize = CEPH_MAX_WRITE_SIZE;
				453	fsopt->rsize = CEPH_MAX_READ_SIZE;
				454	fsopt->rasize = CEPH_RASIZE_DEFAULT;
				455	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
				456	if (!fsopt->snapdir_name) {
				457	err = -ENOMEM;
				458	goto out;
				459	}
				460
				461	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
				462	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
				463	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
				464	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
				465	fsopt->congestion_kb = default_congestion_kb();
				466
				467	/*
				468	* Distinguish the server list from the path in "dev_name".
				469	* Internally we do not include the leading '/' in the path.
				470	*
				471	* "dev_name" will look like:
				472	* <server_spec>[,<server_spec>...]:[<path>]
				473	* where
				474	* <server_spec> is <ip>[:<port>]
				475	* <path> is optional, but if present must begin with '/'
				476	*/
				477	dev_name_end = strchr(dev_name, '/');
				478	if (dev_name_end) {
				479	if (strlen(dev_name_end) > 1) {
				480	fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
				481	if (!fsopt->server_path) {
				482	err = -ENOMEM;
				483	goto out;
				484	}
				485	}
				486	} else {
				487	dev_name_end = dev_name + strlen(dev_name);
				488	}
				489	err = -EINVAL;
				490	dev_name_end--; /* back up to ':' separator */
				491	if (dev_name_end < dev_name \|\| *dev_name_end != ':') {
				492	pr_err("device name is missing path (no : separator in %s)\n",
				493	dev_name);
				494	goto out;
				495	}
				496	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
				497	if (fsopt->server_path)
				498	dout("server path '%s'\n", fsopt->server_path);
				499
				500	*popt = ceph_parse_options(options, dev_name, dev_name_end,
				501	parse_fsopt_token, (void *)fsopt);
				502	if (IS_ERR(*popt)) {
				503	err = PTR_ERR(*popt);
				504	goto out;
				505	}
				506
				507	/* success */
				508	*pfsopt = fsopt;
				509	return 0;
				510
				511	out:
				512	destroy_mount_options(fsopt);
				513	return err;
				514	}
				515
				516	/**
				517	* ceph_show_options - Show mount options in /proc/mounts
				518	* @m: seq_file to write to
				519	* @root: root of that (sub)tree
				520	*/
				521	static int ceph_show_options(struct seq_file m, struct dentry root)
				522	{
				523	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
				524	struct ceph_mount_options *fsopt = fsc->mount_options;
				525	size_t pos;
				526	int ret;
				527
				528	/* a comma between MNT/MS and client options */
				529	seq_putc(m, ',');
				530	pos = m->count;
				531
				532	ret = ceph_print_client_options(m, fsc->client);
				533	if (ret)
				534	return ret;
				535
				536	/* retract our comma if no client options */
				537	if (m->count == pos)
				538	m->count--;
				539
				540	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
				541	seq_puts(m, ",dirstat");
				542	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
				543	seq_puts(m, ",rbytes");
				544	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
				545	seq_puts(m, ",noasyncreaddir");
				546	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
				547	seq_puts(m, ",nodcache");
				548	if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
				549	seq_puts(m, ",ino32");
				550	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
				551	seq_show_option(m, "fsc", fsopt->fscache_uniq);
				552	}
				553	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
				554	seq_puts(m, ",nopoolperm");
				555	if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
				556	seq_puts(m, ",noquotadf");
				557
				558	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				559	if (fsopt->sb_flags & SB_POSIXACL)
				560	seq_puts(m, ",acl");
				561	else
				562	seq_puts(m, ",noacl");
				563	#endif
				564
				565	if (fsopt->mds_namespace)
				566	seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
				567	if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
				568	seq_printf(m, ",wsize=%d", fsopt->wsize);
				569	if (fsopt->rsize != CEPH_MAX_READ_SIZE)
				570	seq_printf(m, ",rsize=%d", fsopt->rsize);
				571	if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
				572	seq_printf(m, ",rasize=%d", fsopt->rasize);
				573	if (fsopt->congestion_kb != default_congestion_kb())
				574	seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
				575	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
				576	seq_printf(m, ",caps_wanted_delay_min=%d",
				577	fsopt->caps_wanted_delay_min);
				578	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
				579	seq_printf(m, ",caps_wanted_delay_max=%d",
				580	fsopt->caps_wanted_delay_max);
				581	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
				582	seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
				583	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
				584	seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
				585	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
				586	seq_show_option(m, "snapdirname", fsopt->snapdir_name);
				587
				588	return 0;
				589	}
				590
				591	/*
				592	* handle any mon messages the standard library doesn't understand.
				593	* return error if we don't either.
				594	*/
				595	static int extra_mon_dispatch(struct ceph_client client, struct ceph_msg msg)
				596	{
				597	struct ceph_fs_client *fsc = client->private;
				598	int type = le16_to_cpu(msg->hdr.type);
				599
				600	switch (type) {
				601	case CEPH_MSG_MDS_MAP:
				602	ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
				603	return 0;
				604	case CEPH_MSG_FS_MAP_USER:
				605	ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
				606	return 0;
				607	default:
				608	return -1;
				609	}
				610	}
				611
				612	/*
				613	* create a new fs client
				614	*
				615	* Success or not, this function consumes @fsopt and @opt.
				616	*/
				617	static struct ceph_fs_client create_fs_client(struct ceph_mount_options fsopt,
				618	struct ceph_options *opt)
				619	{
				620	struct ceph_fs_client *fsc;
				621	int page_count;
				622	size_t size;
				623	int err;
				624
				625	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
				626	if (!fsc) {
				627	err = -ENOMEM;
				628	goto fail;
				629	}
				630
				631	fsc->client = ceph_create_client(opt, fsc);
				632	if (IS_ERR(fsc->client)) {
				633	err = PTR_ERR(fsc->client);
				634	goto fail;
				635	}
				636	opt = NULL; /* fsc->client now owns this */
				637
				638	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
				639	fsc->client->osdc.abort_on_full = true;
				640
				641	if (!fsopt->mds_namespace) {
				642	ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
				643	0, true);
				644	} else {
				645	ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
				646	0, false);
				647	}
				648
				649	fsc->mount_options = fsopt;
				650
				651	fsc->sb = NULL;
				652	fsc->mount_state = CEPH_MOUNT_MOUNTING;
				653
				654	atomic_long_set(&fsc->writeback_count, 0);
				655
				656	err = -ENOMEM;
				657	/*
				658	* The number of concurrent works can be high but they don't need
				659	* to be processed in parallel, limit concurrency.
				660	*/
				661	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
				662	if (!fsc->wb_wq)
				663	goto fail_client;
				664	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
				665	if (!fsc->pg_inv_wq)
				666	goto fail_wb_wq;
				667	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
				668	if (!fsc->trunc_wq)
				669	goto fail_pg_inv_wq;
				670
				671	/* set up mempools */
				672	err = -ENOMEM;
				673	page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
				674	size = sizeof (struct page ) (page_count ? page_count : 1);
				675	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
				676	if (!fsc->wb_pagevec_pool)
				677	goto fail_trunc_wq;
				678
				679	/* caps */
				680	fsc->min_caps = fsopt->max_readdir;
				681
				682	return fsc;
				683
				684	fail_trunc_wq:
				685	destroy_workqueue(fsc->trunc_wq);
				686	fail_pg_inv_wq:
				687	destroy_workqueue(fsc->pg_inv_wq);
				688	fail_wb_wq:
				689	destroy_workqueue(fsc->wb_wq);
				690	fail_client:
				691	ceph_destroy_client(fsc->client);
				692	fail:
				693	kfree(fsc);
				694	if (opt)
				695	ceph_destroy_options(opt);
				696	destroy_mount_options(fsopt);
				697	return ERR_PTR(err);
				698	}
				699
				700	static void flush_fs_workqueues(struct ceph_fs_client *fsc)
				701	{
				702	flush_workqueue(fsc->wb_wq);
				703	flush_workqueue(fsc->pg_inv_wq);
				704	flush_workqueue(fsc->trunc_wq);
				705	}
				706
				707	static void destroy_fs_client(struct ceph_fs_client *fsc)
				708	{
				709	dout("destroy_fs_client %p\n", fsc);
				710
				711	destroy_workqueue(fsc->wb_wq);
				712	destroy_workqueue(fsc->pg_inv_wq);
				713	destroy_workqueue(fsc->trunc_wq);
				714
				715	mempool_destroy(fsc->wb_pagevec_pool);
				716
				717	destroy_mount_options(fsc->mount_options);
				718
				719	ceph_destroy_client(fsc->client);
				720
				721	kfree(fsc);
				722	dout("destroy_fs_client %p done\n", fsc);
				723	}
				724
				725	/*
				726	* caches
				727	*/
				728	struct kmem_cache *ceph_inode_cachep;
				729	struct kmem_cache *ceph_cap_cachep;
				730	struct kmem_cache *ceph_cap_flush_cachep;
				731	struct kmem_cache *ceph_dentry_cachep;
				732	struct kmem_cache *ceph_file_cachep;
				733	struct kmem_cache *ceph_dir_file_cachep;
				734
				735	static void ceph_inode_init_once(void *foo)
				736	{
				737	struct ceph_inode_info *ci = foo;
				738	inode_init_once(&ci->vfs_inode);
				739	}
				740
				741	static int __init init_caches(void)
				742	{
				743	int error = -ENOMEM;
				744
				745	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
				746	sizeof(struct ceph_inode_info),
				747	__alignof__(struct ceph_inode_info),
				748	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD\|
				749	SLAB_ACCOUNT, ceph_inode_init_once);
				750	if (!ceph_inode_cachep)
				751	return -ENOMEM;
				752
				753	ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
				754	if (!ceph_cap_cachep)
				755	goto bad_cap;
				756	ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
				757	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				758	if (!ceph_cap_flush_cachep)
				759	goto bad_cap_flush;
				760
				761	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
				762	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				763	if (!ceph_dentry_cachep)
				764	goto bad_dentry;
				765
				766	ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
				767	if (!ceph_file_cachep)
				768	goto bad_file;
				769
				770	ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
				771	if (!ceph_dir_file_cachep)
				772	goto bad_dir_file;
				773
				774	error = ceph_fscache_register();
				775	if (error)
				776	goto bad_fscache;
				777
				778	return 0;
				779
				780	bad_fscache:
				781	kmem_cache_destroy(ceph_dir_file_cachep);
				782	bad_dir_file:
				783	kmem_cache_destroy(ceph_file_cachep);
				784	bad_file:
				785	kmem_cache_destroy(ceph_dentry_cachep);
				786	bad_dentry:
				787	kmem_cache_destroy(ceph_cap_flush_cachep);
				788	bad_cap_flush:
				789	kmem_cache_destroy(ceph_cap_cachep);
				790	bad_cap:
				791	kmem_cache_destroy(ceph_inode_cachep);
				792	return error;
				793	}
				794
				795	static void destroy_caches(void)
				796	{
				797	/*
				798	* Make sure all delayed rcu free inodes are flushed before we
				799	* destroy cache.
				800	*/
				801	rcu_barrier();
				802
				803	kmem_cache_destroy(ceph_inode_cachep);
				804	kmem_cache_destroy(ceph_cap_cachep);
				805	kmem_cache_destroy(ceph_cap_flush_cachep);
				806	kmem_cache_destroy(ceph_dentry_cachep);
				807	kmem_cache_destroy(ceph_file_cachep);
				808	kmem_cache_destroy(ceph_dir_file_cachep);
				809
				810	ceph_fscache_unregister();
				811	}
				812
				813
				814	/*
				815	* ceph_umount_begin - initiate forced umount. Tear down down the
				816	* mount, skipping steps that may hang while waiting for server(s).
				817	*/
				818	static void ceph_umount_begin(struct super_block *sb)
				819	{
				820	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
				821
				822	dout("ceph_umount_begin - starting forced umount\n");
				823	if (!fsc)
				824	return;
				825	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
				826	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
				827	ceph_mdsc_force_umount(fsc->mdsc);
				828	return;
				829	}
				830
				831	static int ceph_remount(struct super_block sb, int flags, char *data)
				832	{
				833	sync_filesystem(sb);
				834	return 0;
				835	}
				836
				837	static const struct super_operations ceph_super_ops = {
				838	.alloc_inode = ceph_alloc_inode,
				839	.destroy_inode = ceph_destroy_inode,
				840	.write_inode = ceph_write_inode,
				841	.drop_inode = ceph_drop_inode,
				842	.evict_inode = ceph_evict_inode,
				843	.sync_fs = ceph_sync_fs,
				844	.put_super = ceph_put_super,
				845	.remount_fs = ceph_remount,
				846	.show_options = ceph_show_options,
				847	.statfs = ceph_statfs,
				848	.umount_begin = ceph_umount_begin,
				849	};
				850
				851	/*
				852	* Bootstrap mount by opening the root directory. Note the mount
				853	* @started time from caller, and time out if this takes too long.
				854	*/
				855	static struct dentry open_root_dentry(struct ceph_fs_client fsc,
				856	const char *path,
				857	unsigned long started)
				858	{
				859	struct ceph_mds_client *mdsc = fsc->mdsc;
				860	struct ceph_mds_request *req = NULL;
				861	int err;
				862	struct dentry *root;
				863
				864	/* open dir */
				865	dout("open_root_inode opening '%s'\n", path);
				866	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
				867	if (IS_ERR(req))
				868	return ERR_CAST(req);
				869	req->r_path1 = kstrdup(path, GFP_NOFS);
				870	if (!req->r_path1) {
				871	root = ERR_PTR(-ENOMEM);
				872	goto out;
				873	}
				874
				875	req->r_ino1.ino = CEPH_INO_ROOT;
				876	req->r_ino1.snap = CEPH_NOSNAP;
				877	req->r_started = started;
				878	req->r_timeout = fsc->client->options->mount_timeout;
				879	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
				880	req->r_num_caps = 2;
				881	err = ceph_mdsc_do_request(mdsc, NULL, req);
				882	if (err == 0) {
				883	struct inode *inode = req->r_target_inode;
				884	req->r_target_inode = NULL;
				885	dout("open_root_inode success\n");
				886	root = d_make_root(inode);
				887	if (!root) {
				888	root = ERR_PTR(-ENOMEM);
				889	goto out;
				890	}
				891	dout("open_root_inode success, root dentry is %p\n", root);
				892	} else {
				893	root = ERR_PTR(err);
				894	}
				895	out:
				896	ceph_mdsc_put_request(req);
				897	return root;
				898	}
				899
				900
				901
				902
				903	/*
				904	* mount: join the ceph cluster, and open root directory.
				905	*/
				906	static struct dentry ceph_real_mount(struct ceph_fs_client fsc)
				907	{
				908	int err;
				909	unsigned long started = jiffies; /* note the start time */
				910	struct dentry *root;
				911
				912	dout("mount start %p\n", fsc);
				913	mutex_lock(&fsc->client->mount_mutex);
				914
				915	if (!fsc->sb->s_root) {
				916	const char *path;
				917	err = __ceph_open_session(fsc->client, started);
				918	if (err < 0)
				919	goto out;
				920
				921	/* setup fscache */
				922	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
				923	err = ceph_fscache_register_fs(fsc);
				924	if (err < 0)
				925	goto out;
				926	}
				927
				928	if (!fsc->mount_options->server_path) {
				929	path = "";
				930	dout("mount opening path \\t\n");
				931	} else {
				932	path = fsc->mount_options->server_path + 1;
				933	dout("mount opening path %s\n", path);
				934	}
				935
				936	err = ceph_fs_debugfs_init(fsc);
				937	if (err < 0)
				938	goto out;
				939
				940	root = open_root_dentry(fsc, path, started);
				941	if (IS_ERR(root)) {
				942	err = PTR_ERR(root);
				943	goto out;
				944	}
				945	fsc->sb->s_root = dget(root);
				946	} else {
				947	root = dget(fsc->sb->s_root);
				948	}
				949
				950	fsc->mount_state = CEPH_MOUNT_MOUNTED;
				951	dout("mount success\n");
				952	mutex_unlock(&fsc->client->mount_mutex);
				953	return root;
				954
				955	out:
				956	mutex_unlock(&fsc->client->mount_mutex);
				957	return ERR_PTR(err);
				958	}
				959
				960	static int ceph_set_super(struct super_block s, void data)
				961	{
				962	struct ceph_fs_client *fsc = data;
				963	int ret;
				964
				965	dout("set_super %p data %p\n", s, data);
				966
				967	s->s_flags = fsc->mount_options->sb_flags;
				968	s->s_maxbytes = MAX_LFS_FILESIZE;
				969
				970	s->s_xattr = ceph_xattr_handlers;
				971	s->s_fs_info = fsc;
				972	fsc->sb = s;
				973	fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
				974
				975	s->s_op = &ceph_super_ops;
				976	s->s_d_op = &ceph_dentry_ops;
				977	s->s_export_op = &ceph_export_ops;
				978
				979	s->s_time_gran = 1000; /* 1000 ns == 1 us */
				980
				981	ret = set_anon_super(s, NULL); /* what is that second arg for? */
				982	if (ret != 0)
				983	goto fail;
				984
				985	return ret;
				986
				987	fail:
				988	s->s_fs_info = NULL;
				989	fsc->sb = NULL;
				990	return ret;
				991	}
				992
				993	/*
				994	* share superblock if same fs AND options
				995	*/
				996	static int ceph_compare_super(struct super_block sb, void data)
				997	{
				998	struct ceph_fs_client *new = data;
				999	struct ceph_mount_options *fsopt = new->mount_options;
				1000	struct ceph_options *opt = new->client->options;
				1001	struct ceph_fs_client *other = ceph_sb_to_client(sb);
				1002
				1003	dout("ceph_compare_super %p\n", sb);
				1004
				1005	if (compare_mount_options(fsopt, opt, other)) {
				1006	dout("monitor(s)/mount options don't match\n");
				1007	return 0;
				1008	}
				1009	if ((opt->flags & CEPH_OPT_FSID) &&
				1010	ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
				1011	dout("fsid doesn't match\n");
				1012	return 0;
				1013	}
				1014	if (fsopt->sb_flags != other->mount_options->sb_flags) {
				1015	dout("flags differ\n");
				1016	return 0;
				1017	}
				1018	return 1;
				1019	}
				1020
				1021	/*
				1022	* construct our own bdi so we can control readahead, etc.
				1023	*/
				1024	static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
				1025
				1026	static int ceph_setup_bdi(struct super_block sb, struct ceph_fs_client fsc)
				1027	{
				1028	int err;
				1029
				1030	err = super_setup_bdi_name(sb, "ceph-%ld",
				1031	atomic_long_inc_return(&bdi_seq));
				1032	if (err)
				1033	return err;
				1034
				1035	/* set ra_pages based on rasize mount option? */
				1036	sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
				1037
				1038	/* set io_pages based on max osd read size */
				1039	sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
				1040
				1041	return 0;
				1042	}
				1043
				1044	static struct dentry ceph_mount(struct file_system_type fs_type,
				1045	int flags, const char dev_name, void data)
				1046	{
				1047	struct super_block *sb;
				1048	struct ceph_fs_client *fsc;
				1049	struct dentry *res;
				1050	int err;
				1051	int (compare_super)(struct super_block , void *) = ceph_compare_super;
				1052	struct ceph_mount_options *fsopt = NULL;
				1053	struct ceph_options *opt = NULL;
				1054
				1055	dout("ceph_mount\n");
				1056
				1057	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				1058	flags \|= SB_POSIXACL;
				1059	#endif
				1060	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name);
				1061	if (err < 0) {
				1062	res = ERR_PTR(err);
				1063	goto out_final;
				1064	}
				1065
				1066	/* create client (which we may/may not use) */
				1067	fsc = create_fs_client(fsopt, opt);
				1068	if (IS_ERR(fsc)) {
				1069	res = ERR_CAST(fsc);
				1070	goto out_final;
				1071	}
				1072
				1073	err = ceph_mdsc_init(fsc);
				1074	if (err < 0) {
				1075	res = ERR_PTR(err);
				1076	goto out;
				1077	}
				1078
				1079	if (ceph_test_opt(fsc->client, NOSHARE))
				1080	compare_super = NULL;
				1081	sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
				1082	if (IS_ERR(sb)) {
				1083	res = ERR_CAST(sb);
				1084	goto out;
				1085	}
				1086
				1087	if (ceph_sb_to_client(sb) != fsc) {
				1088	ceph_mdsc_destroy(fsc);
				1089	destroy_fs_client(fsc);
				1090	fsc = ceph_sb_to_client(sb);
				1091	dout("get_sb got existing client %p\n", fsc);
				1092	} else {
				1093	dout("get_sb using new client %p\n", fsc);
				1094	err = ceph_setup_bdi(sb, fsc);
				1095	if (err < 0) {
				1096	res = ERR_PTR(err);
				1097	goto out_splat;
				1098	}
				1099	}
				1100
				1101	res = ceph_real_mount(fsc);
				1102	if (IS_ERR(res))
				1103	goto out_splat;
				1104	dout("root %p inode %p ino %llx.%llx\n", res,
				1105	d_inode(res), ceph_vinop(d_inode(res)));
				1106	return res;
				1107
				1108	out_splat:
				1109	ceph_mdsc_close_sessions(fsc->mdsc);
				1110	deactivate_locked_super(sb);
				1111	goto out_final;
				1112
				1113	out:
				1114	ceph_mdsc_destroy(fsc);
				1115	destroy_fs_client(fsc);
				1116	out_final:
				1117	dout("ceph_mount fail %ld\n", PTR_ERR(res));
				1118	return res;
				1119	}
				1120
				1121	static void ceph_kill_sb(struct super_block *s)
				1122	{
				1123	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
				1124	dev_t dev = s->s_dev;
				1125
				1126	dout("kill_sb %p\n", s);
				1127
				1128	ceph_mdsc_pre_umount(fsc->mdsc);
				1129	flush_fs_workqueues(fsc);
				1130
				1131	generic_shutdown_super(s);
				1132
				1133	fsc->client->extra_mon_dispatch = NULL;
				1134	ceph_fs_debugfs_cleanup(fsc);
				1135
				1136	ceph_fscache_unregister_fs(fsc);
				1137
				1138	ceph_mdsc_destroy(fsc);
				1139
				1140	destroy_fs_client(fsc);
				1141	free_anon_bdev(dev);
				1142	}
				1143
				1144	static struct file_system_type ceph_fs_type = {
				1145	.owner = THIS_MODULE,
				1146	.name = "ceph",
				1147	.mount = ceph_mount,
				1148	.kill_sb = ceph_kill_sb,
				1149	.fs_flags = FS_RENAME_DOES_D_MOVE,
				1150	};
				1151	MODULE_ALIAS_FS("ceph");
				1152
				1153	static int __init init_ceph(void)
				1154	{
				1155	int ret = init_caches();
				1156	if (ret)
				1157	goto out;
				1158
				1159	ceph_flock_init();
				1160	ceph_xattr_init();
				1161	ret = register_filesystem(&ceph_fs_type);
				1162	if (ret)
				1163	goto out_xattr;
				1164
				1165	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
				1166
				1167	return 0;
				1168
				1169	out_xattr:
				1170	ceph_xattr_exit();
				1171	destroy_caches();
				1172	out:
				1173	return ret;
				1174	}
				1175
				1176	static void __exit exit_ceph(void)
				1177	{
				1178	dout("exit_ceph\n");
				1179	unregister_filesystem(&ceph_fs_type);
				1180	ceph_xattr_exit();
				1181	destroy_caches();
				1182	}
				1183
				1184	module_init(init_ceph);
				1185	module_exit(exit_ceph);
				1186
				1187	MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
				1188	MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
				1189	MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
				1190	MODULE_DESCRIPTION("Ceph filesystem for Linux");
				1191	MODULE_LICENSE("GPL");