Blame - src/kernel/linux/v4.14/fs/ceph/super.c - T103

blob: caa6780d8ff2f9e1b61e67e40ad3da3cabcecfd5 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1
				2	#include <linux/ceph/ceph_debug.h>
				3
				4	#include <linux/backing-dev.h>
				5	#include <linux/ctype.h>
				6	#include <linux/fs.h>
				7	#include <linux/inet.h>
				8	#include <linux/in6.h>
				9	#include <linux/module.h>
				10	#include <linux/mount.h>
				11	#include <linux/parser.h>
				12	#include <linux/sched.h>
				13	#include <linux/seq_file.h>
				14	#include <linux/slab.h>
				15	#include <linux/statfs.h>
				16	#include <linux/string.h>
				17
				18	#include "super.h"
				19	#include "mds_client.h"
				20	#include "cache.h"
				21
				22	#include <linux/ceph/ceph_features.h>
				23	#include <linux/ceph/decode.h>
				24	#include <linux/ceph/mon_client.h>
				25	#include <linux/ceph/auth.h>
				26	#include <linux/ceph/debugfs.h>
				27
				28	/*
				29	* Ceph superblock operations
				30	*
				31	* Handle the basics of mounting, unmounting.
				32	*/
				33
				34	/*
				35	* super ops
				36	*/
				37	static void ceph_put_super(struct super_block *s)
				38	{
				39	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
				40
				41	dout("put_super\n");
				42	ceph_mdsc_close_sessions(fsc->mdsc);
				43	}
				44
				45	static int ceph_statfs(struct dentry dentry, struct kstatfs buf)
				46	{
				47	struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
				48	struct ceph_monmap *monmap = fsc->client->monc.monmap;
				49	struct ceph_statfs st;
				50	u64 fsid;
				51	int err;
				52	u64 data_pool;
				53
				54	if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
				55	data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
				56	} else {
				57	data_pool = CEPH_NOPOOL;
				58	}
				59
				60	dout("statfs\n");
				61	err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st);
				62	if (err < 0)
				63	return err;
				64
				65	/* fill in kstatfs */
				66	buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
				67
				68	/*
				69	* express utilization in terms of large blocks to avoid
				70	* overflow on 32-bit machines.
				71	*
				72	* NOTE: for the time being, we make bsize == frsize to humor
				73	* not-yet-ancient versions of glibc that are broken.
				74	* Someday, we will probably want to report a real block
				75	* size... whatever that may mean for a network file system!
				76	*/
				77	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
				78	buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
				79	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
				80	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
				81	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
				82
				83	buf->f_files = le64_to_cpu(st.num_objects);
				84	buf->f_ffree = -1;
				85	buf->f_namelen = NAME_MAX;
				86
				87	/* leave fsid little-endian, regardless of host endianness */
				88	fsid = (u64 )(&monmap->fsid) ^ ((u64 )&monmap->fsid + 1);
				89	buf->f_fsid.val[0] = fsid & 0xffffffff;
				90	buf->f_fsid.val[1] = fsid >> 32;
				91
				92	return 0;
				93	}
				94
				95	static int ceph_sync_fs(struct super_block *sb, int wait)
				96	{
				97	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
				98
				99	if (!wait) {
				100	dout("sync_fs (non-blocking)\n");
				101	ceph_flush_dirty_caps(fsc->mdsc);
				102	dout("sync_fs (non-blocking) done\n");
				103	return 0;
				104	}
				105
				106	dout("sync_fs (blocking)\n");
				107	ceph_osdc_sync(&fsc->client->osdc);
				108	ceph_mdsc_sync(fsc->mdsc);
				109	dout("sync_fs (blocking) done\n");
				110	return 0;
				111	}
				112
				113	/*
				114	* mount options
				115	*/
				116	enum {
				117	Opt_wsize,
				118	Opt_rsize,
				119	Opt_rasize,
				120	Opt_caps_wanted_delay_min,
				121	Opt_caps_wanted_delay_max,
				122	Opt_readdir_max_entries,
				123	Opt_readdir_max_bytes,
				124	Opt_congestion_kb,
				125	Opt_last_int,
				126	/* int args above */
				127	Opt_snapdirname,
				128	Opt_mds_namespace,
				129	Opt_fscache_uniq,
				130	Opt_last_string,
				131	/* string args above */
				132	Opt_dirstat,
				133	Opt_nodirstat,
				134	Opt_rbytes,
				135	Opt_norbytes,
				136	Opt_asyncreaddir,
				137	Opt_noasyncreaddir,
				138	Opt_dcache,
				139	Opt_nodcache,
				140	Opt_ino32,
				141	Opt_noino32,
				142	Opt_fscache,
				143	Opt_nofscache,
				144	Opt_poolperm,
				145	Opt_nopoolperm,
				146	Opt_require_active_mds,
				147	Opt_norequire_active_mds,
				148	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				149	Opt_acl,
				150	#endif
				151	Opt_noacl,
				152	};
				153
				154	static match_table_t fsopt_tokens = {
				155	{Opt_wsize, "wsize=%d"},
				156	{Opt_rsize, "rsize=%d"},
				157	{Opt_rasize, "rasize=%d"},
				158	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
				159	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
				160	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
				161	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
				162	{Opt_congestion_kb, "write_congestion_kb=%d"},
				163	/* int args above */
				164	{Opt_snapdirname, "snapdirname=%s"},
				165	{Opt_mds_namespace, "mds_namespace=%s"},
				166	{Opt_fscache_uniq, "fsc=%s"},
				167	/* string args above */
				168	{Opt_dirstat, "dirstat"},
				169	{Opt_nodirstat, "nodirstat"},
				170	{Opt_rbytes, "rbytes"},
				171	{Opt_norbytes, "norbytes"},
				172	{Opt_asyncreaddir, "asyncreaddir"},
				173	{Opt_noasyncreaddir, "noasyncreaddir"},
				174	{Opt_dcache, "dcache"},
				175	{Opt_nodcache, "nodcache"},
				176	{Opt_ino32, "ino32"},
				177	{Opt_noino32, "noino32"},
				178	{Opt_fscache, "fsc"},
				179	{Opt_nofscache, "nofsc"},
				180	{Opt_poolperm, "poolperm"},
				181	{Opt_nopoolperm, "nopoolperm"},
				182	{Opt_require_active_mds, "require_active_mds"},
				183	{Opt_norequire_active_mds, "norequire_active_mds"},
				184	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				185	{Opt_acl, "acl"},
				186	#endif
				187	{Opt_noacl, "noacl"},
				188	{-1, NULL}
				189	};
				190
				191	/*
				192	* Remove adjacent slashes and then the trailing slash, unless it is
				193	* the only remaining character.
				194	*
				195	* E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
				196	*/
				197	static void canonicalize_path(char *path)
				198	{
				199	int i, j = 0;
				200
				201	for (i = 0; path[i] != '\0'; i++) {
				202	if (path[i] != '/' \|\| j < 1 \|\| path[j - 1] != '/')
				203	path[j++] = path[i];
				204	}
				205
				206	if (j > 1 && path[j - 1] == '/')
				207	j--;
				208	path[j] = '\0';
				209	}
				210
				211	static int parse_fsopt_token(char c, void private)
				212	{
				213	struct ceph_mount_options *fsopt = private;
				214	substring_t argstr[MAX_OPT_ARGS];
				215	int token, intval, ret;
				216
				217	token = match_token((char *)c, fsopt_tokens, argstr);
				218	if (token < 0)
				219	return -EINVAL;
				220
				221	if (token < Opt_last_int) {
				222	ret = match_int(&argstr[0], &intval);
				223	if (ret < 0) {
				224	pr_err("bad mount option arg (not int) "
				225	"at '%s'\n", c);
				226	return ret;
				227	}
				228	dout("got int token %d val %d\n", token, intval);
				229	} else if (token > Opt_last_int && token < Opt_last_string) {
				230	dout("got string token %d val %s\n", token,
				231	argstr[0].from);
				232	} else {
				233	dout("got token %d\n", token);
				234	}
				235
				236	switch (token) {
				237	case Opt_snapdirname:
				238	kfree(fsopt->snapdir_name);
				239	fsopt->snapdir_name = kstrndup(argstr[0].from,
				240	argstr[0].to-argstr[0].from,
				241	GFP_KERNEL);
				242	if (!fsopt->snapdir_name)
				243	return -ENOMEM;
				244	break;
				245	case Opt_mds_namespace:
				246	kfree(fsopt->mds_namespace);
				247	fsopt->mds_namespace = kstrndup(argstr[0].from,
				248	argstr[0].to-argstr[0].from,
				249	GFP_KERNEL);
				250	if (!fsopt->mds_namespace)
				251	return -ENOMEM;
				252	break;
				253	case Opt_fscache_uniq:
				254	#ifdef CONFIG_CEPH_FSCACHE
				255	kfree(fsopt->fscache_uniq);
				256	fsopt->fscache_uniq = kstrndup(argstr[0].from,
				257	argstr[0].to-argstr[0].from,
				258	GFP_KERNEL);
				259	if (!fsopt->fscache_uniq)
				260	return -ENOMEM;
				261	fsopt->flags \|= CEPH_MOUNT_OPT_FSCACHE;
				262	break;
				263	#else
				264	pr_err("fscache support is disabled\n");
				265	return -EINVAL;
				266	#endif
				267	case Opt_wsize:
				268	if (intval < PAGE_SIZE \|\| intval > CEPH_MAX_WRITE_SIZE)
				269	return -EINVAL;
				270	fsopt->wsize = ALIGN(intval, PAGE_SIZE);
				271	break;
				272	case Opt_rsize:
				273	if (intval < PAGE_SIZE \|\| intval > CEPH_MAX_READ_SIZE)
				274	return -EINVAL;
				275	fsopt->rsize = ALIGN(intval, PAGE_SIZE);
				276	break;
				277	case Opt_rasize:
				278	if (intval < 0)
				279	return -EINVAL;
				280	fsopt->rasize = ALIGN(intval, PAGE_SIZE);
				281	break;
				282	case Opt_caps_wanted_delay_min:
				283	if (intval < 1)
				284	return -EINVAL;
				285	fsopt->caps_wanted_delay_min = intval;
				286	break;
				287	case Opt_caps_wanted_delay_max:
				288	if (intval < 1)
				289	return -EINVAL;
				290	fsopt->caps_wanted_delay_max = intval;
				291	break;
				292	case Opt_readdir_max_entries:
				293	if (intval < 1)
				294	return -EINVAL;
				295	fsopt->max_readdir = intval;
				296	break;
				297	case Opt_readdir_max_bytes:
				298	if (intval < PAGE_SIZE && intval != 0)
				299	return -EINVAL;
				300	fsopt->max_readdir_bytes = intval;
				301	break;
				302	case Opt_congestion_kb:
				303	if (intval < 1024) /* at least 1M */
				304	return -EINVAL;
				305	fsopt->congestion_kb = intval;
				306	break;
				307	case Opt_dirstat:
				308	fsopt->flags \|= CEPH_MOUNT_OPT_DIRSTAT;
				309	break;
				310	case Opt_nodirstat:
				311	fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
				312	break;
				313	case Opt_rbytes:
				314	fsopt->flags \|= CEPH_MOUNT_OPT_RBYTES;
				315	break;
				316	case Opt_norbytes:
				317	fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
				318	break;
				319	case Opt_asyncreaddir:
				320	fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
				321	break;
				322	case Opt_noasyncreaddir:
				323	fsopt->flags \|= CEPH_MOUNT_OPT_NOASYNCREADDIR;
				324	break;
				325	case Opt_dcache:
				326	fsopt->flags \|= CEPH_MOUNT_OPT_DCACHE;
				327	break;
				328	case Opt_nodcache:
				329	fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
				330	break;
				331	case Opt_ino32:
				332	fsopt->flags \|= CEPH_MOUNT_OPT_INO32;
				333	break;
				334	case Opt_noino32:
				335	fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
				336	break;
				337	case Opt_fscache:
				338	#ifdef CONFIG_CEPH_FSCACHE
				339	fsopt->flags \|= CEPH_MOUNT_OPT_FSCACHE;
				340	break;
				341	#else
				342	pr_err("fscache support is disabled\n");
				343	return -EINVAL;
				344	#endif
				345	case Opt_nofscache:
				346	fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
				347	break;
				348	case Opt_poolperm:
				349	fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
				350	printk ("pool perm");
				351	break;
				352	case Opt_nopoolperm:
				353	fsopt->flags \|= CEPH_MOUNT_OPT_NOPOOLPERM;
				354	break;
				355	case Opt_require_active_mds:
				356	fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
				357	break;
				358	case Opt_norequire_active_mds:
				359	fsopt->flags \|= CEPH_MOUNT_OPT_MOUNTWAIT;
				360	break;
				361	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				362	case Opt_acl:
				363	fsopt->sb_flags \|= MS_POSIXACL;
				364	break;
				365	#endif
				366	case Opt_noacl:
				367	fsopt->sb_flags &= ~MS_POSIXACL;
				368	break;
				369	default:
				370	BUG_ON(token);
				371	}
				372	return 0;
				373	}
				374
				375	static void destroy_mount_options(struct ceph_mount_options *args)
				376	{
				377	dout("destroy_mount_options %p\n", args);
				378	kfree(args->snapdir_name);
				379	kfree(args->mds_namespace);
				380	kfree(args->server_path);
				381	kfree(args->fscache_uniq);
				382	kfree(args);
				383	}
				384
				385	static int strcmp_null(const char s1, const char s2)
				386	{
				387	if (!s1 && !s2)
				388	return 0;
				389	if (s1 && !s2)
				390	return -1;
				391	if (!s1 && s2)
				392	return 1;
				393	return strcmp(s1, s2);
				394	}
				395
				396	static int compare_mount_options(struct ceph_mount_options *new_fsopt,
				397	struct ceph_options *new_opt,
				398	struct ceph_fs_client *fsc)
				399	{
				400	struct ceph_mount_options *fsopt1 = new_fsopt;
				401	struct ceph_mount_options *fsopt2 = fsc->mount_options;
				402	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
				403	int ret;
				404
				405	ret = memcmp(fsopt1, fsopt2, ofs);
				406	if (ret)
				407	return ret;
				408
				409	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
				410	if (ret)
				411	return ret;
				412
				413	ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
				414	if (ret)
				415	return ret;
				416
				417	ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
				418	if (ret)
				419	return ret;
				420
				421	ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
				422	if (ret)
				423	return ret;
				424
				425	return ceph_compare_options(new_opt, fsc->client);
				426	}
				427
				428	static int parse_mount_options(struct ceph_mount_options **pfsopt,
				429	struct ceph_options **popt,
				430	int flags, char *options,
				431	const char *dev_name)
				432	{
				433	struct ceph_mount_options *fsopt;
				434	const char *dev_name_end;
				435	int err;
				436
				437	if (!dev_name \|\| !*dev_name)
				438	return -EINVAL;
				439
				440	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
				441	if (!fsopt)
				442	return -ENOMEM;
				443
				444	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
				445
				446	fsopt->sb_flags = flags;
				447	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
				448
				449	fsopt->wsize = CEPH_MAX_WRITE_SIZE;
				450	fsopt->rsize = CEPH_MAX_READ_SIZE;
				451	fsopt->rasize = CEPH_RASIZE_DEFAULT;
				452	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
				453	if (!fsopt->snapdir_name) {
				454	err = -ENOMEM;
				455	goto out;
				456	}
				457
				458	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
				459	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
				460	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
				461	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
				462	fsopt->congestion_kb = default_congestion_kb();
				463
				464	/*
				465	* Distinguish the server list from the path in "dev_name".
				466	* Internally we do not include the leading '/' in the path.
				467	*
				468	* "dev_name" will look like:
				469	* <server_spec>[,<server_spec>...]:[<path>]
				470	* where
				471	* <server_spec> is <ip>[:<port>]
				472	* <path> is optional, but if present must begin with '/'
				473	*/
				474	dev_name_end = strchr(dev_name, '/');
				475	if (dev_name_end) {
				476	/*
				477	* The server_path will include the whole chars from userland
				478	* including the leading '/'.
				479	*/
				480	fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
				481	if (!fsopt->server_path) {
				482	err = -ENOMEM;
				483	goto out;
				484	}
				485
				486	canonicalize_path(fsopt->server_path);
				487	} else {
				488	dev_name_end = dev_name + strlen(dev_name);
				489	}
				490	err = -EINVAL;
				491	dev_name_end--; /* back up to ':' separator */
				492	if (dev_name_end < dev_name \|\| *dev_name_end != ':') {
				493	pr_err("device name is missing path (no : separator in %s)\n",
				494	dev_name);
				495	goto out;
				496	}
				497	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
				498	if (fsopt->server_path)
				499	dout("server path '%s'\n", fsopt->server_path);
				500
				501	*popt = ceph_parse_options(options, dev_name, dev_name_end,
				502	parse_fsopt_token, (void *)fsopt);
				503	if (IS_ERR(*popt)) {
				504	err = PTR_ERR(*popt);
				505	goto out;
				506	}
				507
				508	/* success */
				509	*pfsopt = fsopt;
				510	return 0;
				511
				512	out:
				513	destroy_mount_options(fsopt);
				514	return err;
				515	}
				516
				517	/**
				518	* ceph_show_options - Show mount options in /proc/mounts
				519	* @m: seq_file to write to
				520	* @root: root of that (sub)tree
				521	*/
				522	static int ceph_show_options(struct seq_file m, struct dentry root)
				523	{
				524	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
				525	struct ceph_mount_options *fsopt = fsc->mount_options;
				526	size_t pos;
				527	int ret;
				528
				529	/* a comma between MNT/MS and client options */
				530	seq_putc(m, ',');
				531	pos = m->count;
				532
				533	ret = ceph_print_client_options(m, fsc->client);
				534	if (ret)
				535	return ret;
				536
				537	/* retract our comma if no client options */
				538	if (m->count == pos)
				539	m->count--;
				540
				541	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
				542	seq_puts(m, ",dirstat");
				543	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
				544	seq_puts(m, ",rbytes");
				545	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
				546	seq_puts(m, ",noasyncreaddir");
				547	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
				548	seq_puts(m, ",nodcache");
				549	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
				550	if (fsopt->fscache_uniq)
				551	seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
				552	else
				553	seq_puts(m, ",fsc");
				554	}
				555	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
				556	seq_puts(m, ",nopoolperm");
				557
				558	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				559	if (fsopt->sb_flags & MS_POSIXACL)
				560	seq_puts(m, ",acl");
				561	else
				562	seq_puts(m, ",noacl");
				563	#endif
				564
				565	if (fsopt->mds_namespace)
				566	seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace);
				567	if (fsopt->wsize)
				568	seq_printf(m, ",wsize=%d", fsopt->wsize);
				569	if (fsopt->rsize != CEPH_MAX_READ_SIZE)
				570	seq_printf(m, ",rsize=%d", fsopt->rsize);
				571	if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
				572	seq_printf(m, ",rasize=%d", fsopt->rasize);
				573	if (fsopt->congestion_kb != default_congestion_kb())
				574	seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
				575	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
				576	seq_printf(m, ",caps_wanted_delay_min=%d",
				577	fsopt->caps_wanted_delay_min);
				578	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
				579	seq_printf(m, ",caps_wanted_delay_max=%d",
				580	fsopt->caps_wanted_delay_max);
				581	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
				582	seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
				583	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
				584	seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
				585	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
				586	seq_show_option(m, "snapdirname", fsopt->snapdir_name);
				587
				588	return 0;
				589	}
				590
				591	/*
				592	* handle any mon messages the standard library doesn't understand.
				593	* return error if we don't either.
				594	*/
				595	static int extra_mon_dispatch(struct ceph_client client, struct ceph_msg msg)
				596	{
				597	struct ceph_fs_client *fsc = client->private;
				598	int type = le16_to_cpu(msg->hdr.type);
				599
				600	switch (type) {
				601	case CEPH_MSG_MDS_MAP:
				602	ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
				603	return 0;
				604	case CEPH_MSG_FS_MAP_USER:
				605	ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
				606	return 0;
				607	default:
				608	return -1;
				609	}
				610	}
				611
				612	/*
				613	* create a new fs client
				614	*/
				615	static struct ceph_fs_client create_fs_client(struct ceph_mount_options fsopt,
				616	struct ceph_options *opt)
				617	{
				618	struct ceph_fs_client *fsc;
				619	int page_count;
				620	size_t size;
				621	int err = -ENOMEM;
				622
				623	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
				624	if (!fsc)
				625	return ERR_PTR(-ENOMEM);
				626
				627	fsc->client = ceph_create_client(opt, fsc);
				628	if (IS_ERR(fsc->client)) {
				629	err = PTR_ERR(fsc->client);
				630	goto fail;
				631	}
				632	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
				633
				634	if (!fsopt->mds_namespace) {
				635	ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
				636	0, true);
				637	} else {
				638	ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
				639	0, false);
				640	}
				641
				642	fsc->mount_options = fsopt;
				643
				644	fsc->sb = NULL;
				645	fsc->mount_state = CEPH_MOUNT_MOUNTING;
				646
				647	atomic_long_set(&fsc->writeback_count, 0);
				648
				649	err = -ENOMEM;
				650	/*
				651	* The number of concurrent works can be high but they don't need
				652	* to be processed in parallel, limit concurrency.
				653	*/
				654	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
				655	if (!fsc->wb_wq)
				656	goto fail_client;
				657	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
				658	if (!fsc->pg_inv_wq)
				659	goto fail_wb_wq;
				660	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
				661	if (!fsc->trunc_wq)
				662	goto fail_pg_inv_wq;
				663
				664	/* set up mempools */
				665	err = -ENOMEM;
				666	page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
				667	size = sizeof (struct page ) (page_count ? page_count : 1);
				668	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
				669	if (!fsc->wb_pagevec_pool)
				670	goto fail_trunc_wq;
				671
				672	/* caps */
				673	fsc->min_caps = fsopt->max_readdir;
				674
				675	return fsc;
				676
				677	fail_trunc_wq:
				678	destroy_workqueue(fsc->trunc_wq);
				679	fail_pg_inv_wq:
				680	destroy_workqueue(fsc->pg_inv_wq);
				681	fail_wb_wq:
				682	destroy_workqueue(fsc->wb_wq);
				683	fail_client:
				684	ceph_destroy_client(fsc->client);
				685	fail:
				686	kfree(fsc);
				687	return ERR_PTR(err);
				688	}
				689
				690	static void destroy_fs_client(struct ceph_fs_client *fsc)
				691	{
				692	dout("destroy_fs_client %p\n", fsc);
				693
				694	destroy_workqueue(fsc->wb_wq);
				695	destroy_workqueue(fsc->pg_inv_wq);
				696	destroy_workqueue(fsc->trunc_wq);
				697
				698	mempool_destroy(fsc->wb_pagevec_pool);
				699
				700	destroy_mount_options(fsc->mount_options);
				701
				702	ceph_destroy_client(fsc->client);
				703
				704	kfree(fsc);
				705	dout("destroy_fs_client %p done\n", fsc);
				706	}
				707
				708	/*
				709	* caches
				710	*/
				711	struct kmem_cache *ceph_inode_cachep;
				712	struct kmem_cache *ceph_cap_cachep;
				713	struct kmem_cache *ceph_cap_flush_cachep;
				714	struct kmem_cache *ceph_dentry_cachep;
				715	struct kmem_cache *ceph_file_cachep;
				716
				717	static void ceph_inode_init_once(void *foo)
				718	{
				719	struct ceph_inode_info *ci = foo;
				720	inode_init_once(&ci->vfs_inode);
				721	}
				722
				723	static int __init init_caches(void)
				724	{
				725	int error = -ENOMEM;
				726
				727	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
				728	sizeof(struct ceph_inode_info),
				729	__alignof__(struct ceph_inode_info),
				730	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD\|
				731	SLAB_ACCOUNT, ceph_inode_init_once);
				732	if (!ceph_inode_cachep)
				733	return -ENOMEM;
				734
				735	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
				736	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				737	if (!ceph_cap_cachep)
				738	goto bad_cap;
				739	ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
				740	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				741	if (!ceph_cap_flush_cachep)
				742	goto bad_cap_flush;
				743
				744	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
				745	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				746	if (!ceph_dentry_cachep)
				747	goto bad_dentry;
				748
				749	ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
				750	if (!ceph_file_cachep)
				751	goto bad_file;
				752
				753	error = ceph_fscache_register();
				754	if (error)
				755	goto bad_fscache;
				756
				757	return 0;
				758
				759	bad_fscache:
				760	kmem_cache_destroy(ceph_file_cachep);
				761	bad_file:
				762	kmem_cache_destroy(ceph_dentry_cachep);
				763	bad_dentry:
				764	kmem_cache_destroy(ceph_cap_flush_cachep);
				765	bad_cap_flush:
				766	kmem_cache_destroy(ceph_cap_cachep);
				767	bad_cap:
				768	kmem_cache_destroy(ceph_inode_cachep);
				769	return error;
				770	}
				771
				772	static void destroy_caches(void)
				773	{
				774	/*
				775	* Make sure all delayed rcu free inodes are flushed before we
				776	* destroy cache.
				777	*/
				778	rcu_barrier();
				779
				780	kmem_cache_destroy(ceph_inode_cachep);
				781	kmem_cache_destroy(ceph_cap_cachep);
				782	kmem_cache_destroy(ceph_cap_flush_cachep);
				783	kmem_cache_destroy(ceph_dentry_cachep);
				784	kmem_cache_destroy(ceph_file_cachep);
				785
				786	ceph_fscache_unregister();
				787	}
				788
				789	/*
				790	* ceph_umount_begin - initiate forced umount. Tear down down the
				791	* mount, skipping steps that may hang while waiting for server(s).
				792	*/
				793	static void ceph_umount_begin(struct super_block *sb)
				794	{
				795	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
				796
				797	dout("ceph_umount_begin - starting forced umount\n");
				798	if (!fsc)
				799	return;
				800	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
				801	ceph_mdsc_force_umount(fsc->mdsc);
				802	return;
				803	}
				804
				805	static int ceph_remount(struct super_block sb, int flags, char *data)
				806	{
				807	sync_filesystem(sb);
				808	return 0;
				809	}
				810
				811	static const struct super_operations ceph_super_ops = {
				812	.alloc_inode = ceph_alloc_inode,
				813	.destroy_inode = ceph_destroy_inode,
				814	.write_inode = ceph_write_inode,
				815	.drop_inode = ceph_drop_inode,
				816	.sync_fs = ceph_sync_fs,
				817	.put_super = ceph_put_super,
				818	.remount_fs = ceph_remount,
				819	.show_options = ceph_show_options,
				820	.statfs = ceph_statfs,
				821	.umount_begin = ceph_umount_begin,
				822	};
				823
				824	/*
				825	* Bootstrap mount by opening the root directory. Note the mount
				826	* @started time from caller, and time out if this takes too long.
				827	*/
				828	static struct dentry open_root_dentry(struct ceph_fs_client fsc,
				829	const char *path,
				830	unsigned long started)
				831	{
				832	struct ceph_mds_client *mdsc = fsc->mdsc;
				833	struct ceph_mds_request *req = NULL;
				834	int err;
				835	struct dentry *root;
				836
				837	/* open dir */
				838	dout("open_root_inode opening '%s'\n", path);
				839	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
				840	if (IS_ERR(req))
				841	return ERR_CAST(req);
				842	req->r_path1 = kstrdup(path, GFP_NOFS);
				843	if (!req->r_path1) {
				844	root = ERR_PTR(-ENOMEM);
				845	goto out;
				846	}
				847
				848	req->r_ino1.ino = CEPH_INO_ROOT;
				849	req->r_ino1.snap = CEPH_NOSNAP;
				850	req->r_started = started;
				851	req->r_timeout = fsc->client->options->mount_timeout;
				852	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
				853	req->r_num_caps = 2;
				854	err = ceph_mdsc_do_request(mdsc, NULL, req);
				855	if (err == 0) {
				856	struct inode *inode = req->r_target_inode;
				857	req->r_target_inode = NULL;
				858	dout("open_root_inode success\n");
				859	root = d_make_root(inode);
				860	if (!root) {
				861	root = ERR_PTR(-ENOMEM);
				862	goto out;
				863	}
				864	dout("open_root_inode success, root dentry is %p\n", root);
				865	} else {
				866	root = ERR_PTR(err);
				867	}
				868	out:
				869	ceph_mdsc_put_request(req);
				870	return root;
				871	}
				872
				873	/*
				874	* mount: join the ceph cluster, and open root directory.
				875	*/
				876	static struct dentry ceph_real_mount(struct ceph_fs_client fsc)
				877	{
				878	int err;
				879	unsigned long started = jiffies; /* note the start time */
				880	struct dentry *root;
				881
				882	dout("mount start %p\n", fsc);
				883	mutex_lock(&fsc->client->mount_mutex);
				884
				885	if (!fsc->sb->s_root) {
				886	const char *path = fsc->mount_options->server_path ?
				887	fsc->mount_options->server_path + 1 : "";
				888
				889	err = __ceph_open_session(fsc->client, started);
				890	if (err < 0)
				891	goto out;
				892
				893	/* setup fscache */
				894	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
				895	err = ceph_fscache_register_fs(fsc);
				896	if (err < 0)
				897	goto out;
				898	}
				899
				900	dout("mount opening path '%s'\n", path);
				901
				902	err = ceph_fs_debugfs_init(fsc);
				903	if (err < 0)
				904	goto out;
				905
				906	root = open_root_dentry(fsc, path, started);
				907	if (IS_ERR(root)) {
				908	err = PTR_ERR(root);
				909	goto out;
				910	}
				911	fsc->sb->s_root = dget(root);
				912	} else {
				913	root = dget(fsc->sb->s_root);
				914	}
				915
				916	fsc->mount_state = CEPH_MOUNT_MOUNTED;
				917	dout("mount success\n");
				918	mutex_unlock(&fsc->client->mount_mutex);
				919	return root;
				920
				921	out:
				922	mutex_unlock(&fsc->client->mount_mutex);
				923	return ERR_PTR(err);
				924	}
				925
				926	static int ceph_set_super(struct super_block s, void data)
				927	{
				928	struct ceph_fs_client *fsc = data;
				929	int ret;
				930
				931	dout("set_super %p data %p\n", s, data);
				932
				933	s->s_flags = fsc->mount_options->sb_flags;
				934	s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
				935
				936	s->s_xattr = ceph_xattr_handlers;
				937	s->s_fs_info = fsc;
				938	fsc->sb = s;
				939
				940	s->s_op = &ceph_super_ops;
				941	s->s_d_op = &ceph_dentry_ops;
				942	s->s_export_op = &ceph_export_ops;
				943
				944	s->s_time_gran = 1000; /* 1000 ns == 1 us */
				945
				946	ret = set_anon_super(s, NULL); /* what is that second arg for? */
				947	if (ret != 0)
				948	goto fail;
				949
				950	return ret;
				951
				952	fail:
				953	s->s_fs_info = NULL;
				954	fsc->sb = NULL;
				955	return ret;
				956	}
				957
				958	/*
				959	* share superblock if same fs AND options
				960	*/
				961	static int ceph_compare_super(struct super_block sb, void data)
				962	{
				963	struct ceph_fs_client *new = data;
				964	struct ceph_mount_options *fsopt = new->mount_options;
				965	struct ceph_options *opt = new->client->options;
				966	struct ceph_fs_client *other = ceph_sb_to_client(sb);
				967
				968	dout("ceph_compare_super %p\n", sb);
				969
				970	if (compare_mount_options(fsopt, opt, other)) {
				971	dout("monitor(s)/mount options don't match\n");
				972	return 0;
				973	}
				974	if ((opt->flags & CEPH_OPT_FSID) &&
				975	ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
				976	dout("fsid doesn't match\n");
				977	return 0;
				978	}
				979	if (fsopt->sb_flags != other->mount_options->sb_flags) {
				980	dout("flags differ\n");
				981	return 0;
				982	}
				983	return 1;
				984	}
				985
				986	/*
				987	* construct our own bdi so we can control readahead, etc.
				988	*/
				989	static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
				990
				991	static int ceph_setup_bdi(struct super_block sb, struct ceph_fs_client fsc)
				992	{
				993	int err;
				994
				995	err = super_setup_bdi_name(sb, "ceph-%ld",
				996	atomic_long_inc_return(&bdi_seq));
				997	if (err)
				998	return err;
				999
				1000	/* set ra_pages based on rasize mount option? */
				1001	sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
				1002
				1003	/* set io_pages based on max osd read size */
				1004	sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
				1005
				1006	return 0;
				1007	}
				1008
				1009	static struct dentry ceph_mount(struct file_system_type fs_type,
				1010	int flags, const char dev_name, void data)
				1011	{
				1012	struct super_block *sb;
				1013	struct ceph_fs_client *fsc;
				1014	struct dentry *res;
				1015	int err;
				1016	int (compare_super)(struct super_block , void *) = ceph_compare_super;
				1017	struct ceph_mount_options *fsopt = NULL;
				1018	struct ceph_options *opt = NULL;
				1019
				1020	dout("ceph_mount\n");
				1021
				1022	#ifdef CONFIG_CEPH_FS_POSIX_ACL
				1023	flags \|= MS_POSIXACL;
				1024	#endif
				1025	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name);
				1026	if (err < 0) {
				1027	res = ERR_PTR(err);
				1028	goto out_final;
				1029	}
				1030
				1031	/* create client (which we may/may not use) */
				1032	fsc = create_fs_client(fsopt, opt);
				1033	if (IS_ERR(fsc)) {
				1034	res = ERR_CAST(fsc);
				1035	destroy_mount_options(fsopt);
				1036	ceph_destroy_options(opt);
				1037	goto out_final;
				1038	}
				1039
				1040	err = ceph_mdsc_init(fsc);
				1041	if (err < 0) {
				1042	res = ERR_PTR(err);
				1043	goto out;
				1044	}
				1045
				1046	if (ceph_test_opt(fsc->client, NOSHARE))
				1047	compare_super = NULL;
				1048	sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
				1049	if (IS_ERR(sb)) {
				1050	res = ERR_CAST(sb);
				1051	goto out;
				1052	}
				1053
				1054	if (ceph_sb_to_client(sb) != fsc) {
				1055	ceph_mdsc_destroy(fsc);
				1056	destroy_fs_client(fsc);
				1057	fsc = ceph_sb_to_client(sb);
				1058	dout("get_sb got existing client %p\n", fsc);
				1059	} else {
				1060	dout("get_sb using new client %p\n", fsc);
				1061	err = ceph_setup_bdi(sb, fsc);
				1062	if (err < 0) {
				1063	res = ERR_PTR(err);
				1064	goto out_splat;
				1065	}
				1066	}
				1067
				1068	res = ceph_real_mount(fsc);
				1069	if (IS_ERR(res))
				1070	goto out_splat;
				1071	dout("root %p inode %p ino %llx.%llx\n", res,
				1072	d_inode(res), ceph_vinop(d_inode(res)));
				1073	return res;
				1074
				1075	out_splat:
				1076	if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
				1077	pr_info("No mds server is up or the cluster is laggy\n");
				1078	err = -EHOSTUNREACH;
				1079	}
				1080
				1081	ceph_mdsc_close_sessions(fsc->mdsc);
				1082	deactivate_locked_super(sb);
				1083	goto out_final;
				1084
				1085	out:
				1086	ceph_mdsc_destroy(fsc);
				1087	destroy_fs_client(fsc);
				1088	out_final:
				1089	dout("ceph_mount fail %ld\n", PTR_ERR(res));
				1090	return res;
				1091	}
				1092
				1093	static void ceph_kill_sb(struct super_block *s)
				1094	{
				1095	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
				1096	dev_t dev = s->s_dev;
				1097
				1098	dout("kill_sb %p\n", s);
				1099
				1100	ceph_mdsc_pre_umount(fsc->mdsc);
				1101	generic_shutdown_super(s);
				1102
				1103	fsc->client->extra_mon_dispatch = NULL;
				1104	ceph_fs_debugfs_cleanup(fsc);
				1105
				1106	ceph_fscache_unregister_fs(fsc);
				1107
				1108	ceph_mdsc_destroy(fsc);
				1109
				1110	destroy_fs_client(fsc);
				1111	free_anon_bdev(dev);
				1112	}
				1113
				1114	static struct file_system_type ceph_fs_type = {
				1115	.owner = THIS_MODULE,
				1116	.name = "ceph",
				1117	.mount = ceph_mount,
				1118	.kill_sb = ceph_kill_sb,
				1119	.fs_flags = FS_RENAME_DOES_D_MOVE,
				1120	};
				1121	MODULE_ALIAS_FS("ceph");
				1122
				1123	static int __init init_ceph(void)
				1124	{
				1125	int ret = init_caches();
				1126	if (ret)
				1127	goto out;
				1128
				1129	ceph_flock_init();
				1130	ceph_xattr_init();
				1131	ret = register_filesystem(&ceph_fs_type);
				1132	if (ret)
				1133	goto out_xattr;
				1134
				1135	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
				1136
				1137	return 0;
				1138
				1139	out_xattr:
				1140	ceph_xattr_exit();
				1141	destroy_caches();
				1142	out:
				1143	return ret;
				1144	}
				1145
				1146	static void __exit exit_ceph(void)
				1147	{
				1148	dout("exit_ceph\n");
				1149	unregister_filesystem(&ceph_fs_type);
				1150	ceph_xattr_exit();
				1151	destroy_caches();
				1152	}
				1153
				1154	module_init(init_ceph);
				1155	module_exit(exit_ceph);
				1156
				1157	MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
				1158	MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
				1159	MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
				1160	MODULE_DESCRIPTION("Ceph filesystem for Linux");
				1161	MODULE_LICENSE("GPL");