blob: 6244345a5745b2daffa13b7b96342d342402a741 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/sched.h>
14#include <linux/namei.h>
15#include <linux/slab.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl.h>
18
19static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
20{
21 struct fuse_conn *fc = get_fuse_conn(dir);
22 struct fuse_inode *fi = get_fuse_inode(dir);
23
24 if (!fc->do_readdirplus)
25 return false;
26 if (!fc->readdirplus_auto)
27 return true;
28 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
29 return true;
30 if (ctx->pos == 0)
31 return true;
32 return false;
33}
34
35static void fuse_advise_use_readdirplus(struct inode *dir)
36{
37 struct fuse_inode *fi = get_fuse_inode(dir);
38
39 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
40}
41
42union fuse_dentry {
43 u64 time;
44 struct rcu_head rcu;
45};
46
47static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
48{
49 ((union fuse_dentry *) entry->d_fsdata)->time = time;
50}
51
52static inline u64 fuse_dentry_time(struct dentry *entry)
53{
54 return ((union fuse_dentry *) entry->d_fsdata)->time;
55}
56
57/*
58 * FUSE caches dentries and attributes with separate timeout. The
59 * time in jiffies until the dentry/attributes are valid is stored in
60 * dentry->d_fsdata and fuse_inode->i_time respectively.
61 */
62
63/*
64 * Calculate the time in jiffies until a dentry/attributes are valid
65 */
66static u64 time_to_jiffies(u64 sec, u32 nsec)
67{
68 if (sec || nsec) {
69 struct timespec64 ts = {
70 sec,
71 min_t(u32, nsec, NSEC_PER_SEC - 1)
72 };
73
74 return get_jiffies_64() + timespec64_to_jiffies(&ts);
75 } else
76 return 0;
77}
78
79/*
80 * Set dentry and possibly attribute timeouts from the lookup/mk*
81 * replies
82 */
83static void fuse_change_entry_timeout(struct dentry *entry,
84 struct fuse_entry_out *o)
85{
86 fuse_dentry_settime(entry,
87 time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
88}
89
90static u64 attr_timeout(struct fuse_attr_out *o)
91{
92 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
93}
94
95static u64 entry_attr_timeout(struct fuse_entry_out *o)
96{
97 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
98}
99
100/*
101 * Mark the attributes as stale, so that at the next call to
102 * ->getattr() they will be fetched from userspace
103 */
104void fuse_invalidate_attr(struct inode *inode)
105{
106 get_fuse_inode(inode)->i_time = 0;
107}
108
109/**
110 * Mark the attributes as stale due to an atime change. Avoid the invalidate if
111 * atime is not used.
112 */
113void fuse_invalidate_atime(struct inode *inode)
114{
115 if (!IS_RDONLY(inode))
116 fuse_invalidate_attr(inode);
117}
118
119/*
120 * Just mark the entry as stale, so that a next attempt to look it up
121 * will result in a new lookup call to userspace
122 *
123 * This is called when a dentry is about to become negative and the
124 * timeout is unknown (unlink, rmdir, rename and in some cases
125 * lookup)
126 */
127void fuse_invalidate_entry_cache(struct dentry *entry)
128{
129 fuse_dentry_settime(entry, 0);
130}
131
132/*
133 * Same as fuse_invalidate_entry_cache(), but also try to remove the
134 * dentry from the hash
135 */
136static void fuse_invalidate_entry(struct dentry *entry)
137{
138 d_invalidate(entry);
139 fuse_invalidate_entry_cache(entry);
140}
141
142static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
143 u64 nodeid, const struct qstr *name,
144 struct fuse_entry_out *outarg)
145{
146 memset(outarg, 0, sizeof(struct fuse_entry_out));
147 args->in.h.opcode = FUSE_LOOKUP;
148 args->in.h.nodeid = nodeid;
149 args->in.numargs = 1;
150 args->in.args[0].size = name->len + 1;
151 args->in.args[0].value = name->name;
152 args->out.numargs = 1;
153 args->out.args[0].size = sizeof(struct fuse_entry_out);
154 args->out.args[0].value = outarg;
155}
156
157u64 fuse_get_attr_version(struct fuse_conn *fc)
158{
159 u64 curr_version;
160
161 /*
162 * The spin lock isn't actually needed on 64bit archs, but we
163 * don't yet care too much about such optimizations.
164 */
165 spin_lock(&fc->lock);
166 curr_version = fc->attr_version;
167 spin_unlock(&fc->lock);
168
169 return curr_version;
170}
171
172/*
173 * Check whether the dentry is still valid
174 *
175 * If the entry validity timeout has expired and the dentry is
176 * positive, try to redo the lookup. If the lookup results in a
177 * different inode, then let the VFS invalidate the dentry and redo
178 * the lookup once more. If the lookup results in the same inode,
179 * then refresh the attributes, timeouts and mark the dentry valid.
180 */
181static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
182{
183 struct inode *inode;
184 struct dentry *parent;
185 struct fuse_conn *fc;
186 struct fuse_inode *fi;
187 int ret;
188
189 inode = d_inode_rcu(entry);
190 if (inode && is_bad_inode(inode))
191 goto invalid;
192 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
193 (flags & LOOKUP_REVAL)) {
194 struct fuse_entry_out outarg;
195 FUSE_ARGS(args);
196 struct fuse_forget_link *forget;
197 u64 attr_version;
198
199 /* For negative dentries, always do a fresh lookup */
200 if (!inode)
201 goto invalid;
202
203 ret = -ECHILD;
204 if (flags & LOOKUP_RCU)
205 goto out;
206
207 fc = get_fuse_conn(inode);
208
209 forget = fuse_alloc_forget();
210 ret = -ENOMEM;
211 if (!forget)
212 goto out;
213
214 attr_version = fuse_get_attr_version(fc);
215
216 parent = dget_parent(entry);
217 fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
218 &entry->d_name, &outarg);
219 ret = fuse_simple_request(fc, &args);
220 dput(parent);
221 /* Zero nodeid is same as -ENOENT */
222 if (!ret && !outarg.nodeid)
223 ret = -ENOENT;
224 if (!ret) {
225 fi = get_fuse_inode(inode);
226 if (outarg.nodeid != get_node_id(inode)) {
227 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
228 goto invalid;
229 }
230 spin_lock(&fc->lock);
231 fi->nlookup++;
232 spin_unlock(&fc->lock);
233 }
234 kfree(forget);
235 if (ret == -ENOMEM)
236 goto out;
237 if (ret || fuse_invalid_attr(&outarg.attr) ||
238 (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
239 goto invalid;
240
241 forget_all_cached_acls(inode);
242 fuse_change_attributes(inode, &outarg.attr,
243 entry_attr_timeout(&outarg),
244 attr_version);
245 fuse_change_entry_timeout(entry, &outarg);
246 } else if (inode) {
247 fi = get_fuse_inode(inode);
248 if (flags & LOOKUP_RCU) {
249 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
250 return -ECHILD;
251 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
252 parent = dget_parent(entry);
253 fuse_advise_use_readdirplus(d_inode(parent));
254 dput(parent);
255 }
256 }
257 ret = 1;
258out:
259 return ret;
260
261invalid:
262 ret = 0;
263 goto out;
264}
265
266static int invalid_nodeid(u64 nodeid)
267{
268 return !nodeid || nodeid == FUSE_ROOT_ID;
269}
270
271static int fuse_dentry_init(struct dentry *dentry)
272{
273 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
274
275 return dentry->d_fsdata ? 0 : -ENOMEM;
276}
277static void fuse_dentry_release(struct dentry *dentry)
278{
279 union fuse_dentry *fd = dentry->d_fsdata;
280
281 kfree_rcu(fd, rcu);
282}
283
284const struct dentry_operations fuse_dentry_operations = {
285 .d_revalidate = fuse_dentry_revalidate,
286 .d_init = fuse_dentry_init,
287 .d_release = fuse_dentry_release,
288};
289
290const struct dentry_operations fuse_root_dentry_operations = {
291 .d_init = fuse_dentry_init,
292 .d_release = fuse_dentry_release,
293};
294
295int fuse_valid_type(int m)
296{
297 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
298 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
299}
300
301bool fuse_invalid_attr(struct fuse_attr *attr)
302{
303 return !fuse_valid_type(attr->mode) ||
304 attr->size > LLONG_MAX;
305}
306
307int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
308 struct fuse_entry_out *outarg, struct inode **inode)
309{
310 struct fuse_conn *fc = get_fuse_conn_super(sb);
311 FUSE_ARGS(args);
312 struct fuse_forget_link *forget;
313 u64 attr_version;
314 int err;
315
316 *inode = NULL;
317 err = -ENAMETOOLONG;
318 if (name->len > FUSE_NAME_MAX)
319 goto out;
320
321
322 forget = fuse_alloc_forget();
323 err = -ENOMEM;
324 if (!forget)
325 goto out;
326
327 attr_version = fuse_get_attr_version(fc);
328
329 fuse_lookup_init(fc, &args, nodeid, name, outarg);
330 err = fuse_simple_request(fc, &args);
331 /* Zero nodeid is same as -ENOENT, but with valid timeout */
332 if (err || !outarg->nodeid)
333 goto out_put_forget;
334
335 err = -EIO;
336 if (!outarg->nodeid)
337 goto out_put_forget;
338 if (fuse_invalid_attr(&outarg->attr))
339 goto out_put_forget;
340
341 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
342 &outarg->attr, entry_attr_timeout(outarg),
343 attr_version);
344 err = -ENOMEM;
345 if (!*inode) {
346 fuse_queue_forget(fc, forget, outarg->nodeid, 1);
347 goto out;
348 }
349 err = 0;
350
351 out_put_forget:
352 kfree(forget);
353 out:
354 return err;
355}
356
357static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
358 unsigned int flags)
359{
360 int err;
361 struct fuse_entry_out outarg;
362 struct inode *inode;
363 struct dentry *newent;
364 bool outarg_valid = true;
365 bool locked;
366
367 locked = fuse_lock_inode(dir);
368 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
369 &outarg, &inode);
370 fuse_unlock_inode(dir, locked);
371 if (err == -ENOENT) {
372 outarg_valid = false;
373 err = 0;
374 }
375 if (err)
376 goto out_err;
377
378 err = -EIO;
379 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
380 goto out_iput;
381
382 newent = d_splice_alias(inode, entry);
383 err = PTR_ERR(newent);
384 if (IS_ERR(newent))
385 goto out_err;
386
387 entry = newent ? newent : entry;
388 if (outarg_valid)
389 fuse_change_entry_timeout(entry, &outarg);
390 else
391 fuse_invalidate_entry_cache(entry);
392
393 fuse_advise_use_readdirplus(dir);
394 return newent;
395
396 out_iput:
397 iput(inode);
398 out_err:
399 return ERR_PTR(err);
400}
401
402/*
403 * Atomic create+open operation
404 *
405 * If the filesystem doesn't support this, then fall back to separate
406 * 'mknod' + 'open' requests.
407 */
408static int fuse_create_open(struct inode *dir, struct dentry *entry,
409 struct file *file, unsigned flags,
410 umode_t mode)
411{
412 int err;
413 struct inode *inode;
414 struct fuse_conn *fc = get_fuse_conn(dir);
415 FUSE_ARGS(args);
416 struct fuse_forget_link *forget;
417 struct fuse_create_in inarg;
418 struct fuse_open_out outopen;
419 struct fuse_entry_out outentry;
420 struct fuse_file *ff;
421
422 /* Userspace expects S_IFREG in create mode */
423 BUG_ON((mode & S_IFMT) != S_IFREG);
424
425 forget = fuse_alloc_forget();
426 err = -ENOMEM;
427 if (!forget)
428 goto out_err;
429
430 err = -ENOMEM;
431 ff = fuse_file_alloc(fc);
432 if (!ff)
433 goto out_put_forget_req;
434
435 if (!fc->dont_mask)
436 mode &= ~current_umask();
437
438 flags &= ~O_NOCTTY;
439 memset(&inarg, 0, sizeof(inarg));
440 memset(&outentry, 0, sizeof(outentry));
441 inarg.flags = flags;
442 inarg.mode = mode;
443 inarg.umask = current_umask();
444 args.in.h.opcode = FUSE_CREATE;
445 args.in.h.nodeid = get_node_id(dir);
446 args.in.numargs = 2;
447 args.in.args[0].size = sizeof(inarg);
448 args.in.args[0].value = &inarg;
449 args.in.args[1].size = entry->d_name.len + 1;
450 args.in.args[1].value = entry->d_name.name;
451 args.out.numargs = 2;
452 args.out.args[0].size = sizeof(outentry);
453 args.out.args[0].value = &outentry;
454 args.out.args[1].size = sizeof(outopen);
455 args.out.args[1].value = &outopen;
456 err = fuse_simple_request(fc, &args);
457 if (err)
458 goto out_free_ff;
459
460 err = -EIO;
461 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
462 fuse_invalid_attr(&outentry.attr))
463 goto out_free_ff;
464
465 ff->fh = outopen.fh;
466 ff->nodeid = outentry.nodeid;
467 ff->open_flags = outopen.open_flags;
468 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
469 &outentry.attr, entry_attr_timeout(&outentry), 0);
470 if (!inode) {
471 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
472 fuse_sync_release(ff, flags);
473 fuse_queue_forget(fc, forget, outentry.nodeid, 1);
474 err = -ENOMEM;
475 goto out_err;
476 }
477 kfree(forget);
478 d_instantiate(entry, inode);
479 fuse_change_entry_timeout(entry, &outentry);
480 fuse_invalidate_attr(dir);
481 err = finish_open(file, entry, generic_file_open);
482 if (err) {
483 fuse_sync_release(ff, flags);
484 } else {
485 file->private_data = ff;
486 fuse_finish_open(inode, file);
487 }
488 return err;
489
490out_free_ff:
491 fuse_file_free(ff);
492out_put_forget_req:
493 kfree(forget);
494out_err:
495 return err;
496}
497
498static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
499static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
500 struct file *file, unsigned flags,
501 umode_t mode)
502{
503 int err;
504 struct fuse_conn *fc = get_fuse_conn(dir);
505 struct dentry *res = NULL;
506
507 if (d_in_lookup(entry)) {
508 res = fuse_lookup(dir, entry, 0);
509 if (IS_ERR(res))
510 return PTR_ERR(res);
511
512 if (res)
513 entry = res;
514 }
515
516 if (!(flags & O_CREAT) || d_really_is_positive(entry))
517 goto no_open;
518
519 /* Only creates */
520 file->f_mode |= FMODE_CREATED;
521
522 if (fc->no_create)
523 goto mknod;
524
525 err = fuse_create_open(dir, entry, file, flags, mode);
526 if (err == -ENOSYS) {
527 fc->no_create = 1;
528 goto mknod;
529 }
530out_dput:
531 dput(res);
532 return err;
533
534mknod:
535 err = fuse_mknod(dir, entry, mode, 0);
536 if (err)
537 goto out_dput;
538no_open:
539 return finish_no_open(file, res);
540}
541
542/*
543 * Code shared between mknod, mkdir, symlink and link
544 */
545static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
546 struct inode *dir, struct dentry *entry,
547 umode_t mode)
548{
549 struct fuse_entry_out outarg;
550 struct inode *inode;
551 struct dentry *d;
552 int err;
553 struct fuse_forget_link *forget;
554
555 forget = fuse_alloc_forget();
556 if (!forget)
557 return -ENOMEM;
558
559 memset(&outarg, 0, sizeof(outarg));
560 args->in.h.nodeid = get_node_id(dir);
561 args->out.numargs = 1;
562 args->out.args[0].size = sizeof(outarg);
563 args->out.args[0].value = &outarg;
564 err = fuse_simple_request(fc, args);
565 if (err)
566 goto out_put_forget_req;
567
568 err = -EIO;
569 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
570 goto out_put_forget_req;
571
572 if ((outarg.attr.mode ^ mode) & S_IFMT)
573 goto out_put_forget_req;
574
575 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
576 &outarg.attr, entry_attr_timeout(&outarg), 0);
577 if (!inode) {
578 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
579 return -ENOMEM;
580 }
581 kfree(forget);
582
583 d_drop(entry);
584 d = d_splice_alias(inode, entry);
585 if (IS_ERR(d))
586 return PTR_ERR(d);
587
588 if (d) {
589 fuse_change_entry_timeout(d, &outarg);
590 dput(d);
591 } else {
592 fuse_change_entry_timeout(entry, &outarg);
593 }
594 fuse_invalidate_attr(dir);
595 return 0;
596
597 out_put_forget_req:
598 kfree(forget);
599 return err;
600}
601
602static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
603 dev_t rdev)
604{
605 struct fuse_mknod_in inarg;
606 struct fuse_conn *fc = get_fuse_conn(dir);
607 FUSE_ARGS(args);
608
609 if (!fc->dont_mask)
610 mode &= ~current_umask();
611
612 memset(&inarg, 0, sizeof(inarg));
613 inarg.mode = mode;
614 inarg.rdev = new_encode_dev(rdev);
615 inarg.umask = current_umask();
616 args.in.h.opcode = FUSE_MKNOD;
617 args.in.numargs = 2;
618 args.in.args[0].size = sizeof(inarg);
619 args.in.args[0].value = &inarg;
620 args.in.args[1].size = entry->d_name.len + 1;
621 args.in.args[1].value = entry->d_name.name;
622 return create_new_entry(fc, &args, dir, entry, mode);
623}
624
625static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
626 bool excl)
627{
628 return fuse_mknod(dir, entry, mode, 0);
629}
630
631static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
632{
633 struct fuse_mkdir_in inarg;
634 struct fuse_conn *fc = get_fuse_conn(dir);
635 FUSE_ARGS(args);
636
637 if (!fc->dont_mask)
638 mode &= ~current_umask();
639
640 memset(&inarg, 0, sizeof(inarg));
641 inarg.mode = mode;
642 inarg.umask = current_umask();
643 args.in.h.opcode = FUSE_MKDIR;
644 args.in.numargs = 2;
645 args.in.args[0].size = sizeof(inarg);
646 args.in.args[0].value = &inarg;
647 args.in.args[1].size = entry->d_name.len + 1;
648 args.in.args[1].value = entry->d_name.name;
649 return create_new_entry(fc, &args, dir, entry, S_IFDIR);
650}
651
652static int fuse_symlink(struct inode *dir, struct dentry *entry,
653 const char *link)
654{
655 struct fuse_conn *fc = get_fuse_conn(dir);
656 unsigned len = strlen(link) + 1;
657 FUSE_ARGS(args);
658
659 args.in.h.opcode = FUSE_SYMLINK;
660 args.in.numargs = 2;
661 args.in.args[0].size = entry->d_name.len + 1;
662 args.in.args[0].value = entry->d_name.name;
663 args.in.args[1].size = len;
664 args.in.args[1].value = link;
665 return create_new_entry(fc, &args, dir, entry, S_IFLNK);
666}
667
668void fuse_update_ctime(struct inode *inode)
669{
670 if (!IS_NOCMTIME(inode)) {
671 inode->i_ctime = current_time(inode);
672 mark_inode_dirty_sync(inode);
673 }
674}
675
676static int fuse_unlink(struct inode *dir, struct dentry *entry)
677{
678 int err;
679 struct fuse_conn *fc = get_fuse_conn(dir);
680 FUSE_ARGS(args);
681
682 args.in.h.opcode = FUSE_UNLINK;
683 args.in.h.nodeid = get_node_id(dir);
684 args.in.numargs = 1;
685 args.in.args[0].size = entry->d_name.len + 1;
686 args.in.args[0].value = entry->d_name.name;
687 err = fuse_simple_request(fc, &args);
688 if (!err) {
689 struct inode *inode = d_inode(entry);
690 struct fuse_inode *fi = get_fuse_inode(inode);
691
692 spin_lock(&fc->lock);
693 fi->attr_version = ++fc->attr_version;
694 /*
695 * If i_nlink == 0 then unlink doesn't make sense, yet this can
696 * happen if userspace filesystem is careless. It would be
697 * difficult to enforce correct nlink usage so just ignore this
698 * condition here
699 */
700 if (inode->i_nlink > 0)
701 drop_nlink(inode);
702 spin_unlock(&fc->lock);
703 fuse_invalidate_attr(inode);
704 fuse_invalidate_attr(dir);
705 fuse_invalidate_entry_cache(entry);
706 fuse_update_ctime(inode);
707 } else if (err == -EINTR)
708 fuse_invalidate_entry(entry);
709 return err;
710}
711
712static int fuse_rmdir(struct inode *dir, struct dentry *entry)
713{
714 int err;
715 struct fuse_conn *fc = get_fuse_conn(dir);
716 FUSE_ARGS(args);
717
718 args.in.h.opcode = FUSE_RMDIR;
719 args.in.h.nodeid = get_node_id(dir);
720 args.in.numargs = 1;
721 args.in.args[0].size = entry->d_name.len + 1;
722 args.in.args[0].value = entry->d_name.name;
723 err = fuse_simple_request(fc, &args);
724 if (!err) {
725 clear_nlink(d_inode(entry));
726 fuse_invalidate_attr(dir);
727 fuse_invalidate_entry_cache(entry);
728 } else if (err == -EINTR)
729 fuse_invalidate_entry(entry);
730 return err;
731}
732
733static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
734 struct inode *newdir, struct dentry *newent,
735 unsigned int flags, int opcode, size_t argsize)
736{
737 int err;
738 struct fuse_rename2_in inarg;
739 struct fuse_conn *fc = get_fuse_conn(olddir);
740 FUSE_ARGS(args);
741
742 memset(&inarg, 0, argsize);
743 inarg.newdir = get_node_id(newdir);
744 inarg.flags = flags;
745 args.in.h.opcode = opcode;
746 args.in.h.nodeid = get_node_id(olddir);
747 args.in.numargs = 3;
748 args.in.args[0].size = argsize;
749 args.in.args[0].value = &inarg;
750 args.in.args[1].size = oldent->d_name.len + 1;
751 args.in.args[1].value = oldent->d_name.name;
752 args.in.args[2].size = newent->d_name.len + 1;
753 args.in.args[2].value = newent->d_name.name;
754 err = fuse_simple_request(fc, &args);
755 if (!err) {
756 /* ctime changes */
757 fuse_invalidate_attr(d_inode(oldent));
758 fuse_update_ctime(d_inode(oldent));
759
760 if (flags & RENAME_EXCHANGE) {
761 fuse_invalidate_attr(d_inode(newent));
762 fuse_update_ctime(d_inode(newent));
763 }
764
765 fuse_invalidate_attr(olddir);
766 if (olddir != newdir)
767 fuse_invalidate_attr(newdir);
768
769 /* newent will end up negative */
770 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
771 fuse_invalidate_attr(d_inode(newent));
772 fuse_invalidate_entry_cache(newent);
773 fuse_update_ctime(d_inode(newent));
774 }
775 } else if (err == -EINTR) {
776 /* If request was interrupted, DEITY only knows if the
777 rename actually took place. If the invalidation
778 fails (e.g. some process has CWD under the renamed
779 directory), then there can be inconsistency between
780 the dcache and the real filesystem. Tough luck. */
781 fuse_invalidate_entry(oldent);
782 if (d_really_is_positive(newent))
783 fuse_invalidate_entry(newent);
784 }
785
786 return err;
787}
788
789static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
790 struct inode *newdir, struct dentry *newent,
791 unsigned int flags)
792{
793 struct fuse_conn *fc = get_fuse_conn(olddir);
794 int err;
795
796 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
797 return -EINVAL;
798
799 if (flags) {
800 if (fc->no_rename2 || fc->minor < 23)
801 return -EINVAL;
802
803 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
804 FUSE_RENAME2,
805 sizeof(struct fuse_rename2_in));
806 if (err == -ENOSYS) {
807 fc->no_rename2 = 1;
808 err = -EINVAL;
809 }
810 } else {
811 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
812 FUSE_RENAME,
813 sizeof(struct fuse_rename_in));
814 }
815
816 return err;
817}
818
819static int fuse_link(struct dentry *entry, struct inode *newdir,
820 struct dentry *newent)
821{
822 int err;
823 struct fuse_link_in inarg;
824 struct inode *inode = d_inode(entry);
825 struct fuse_conn *fc = get_fuse_conn(inode);
826 FUSE_ARGS(args);
827
828 memset(&inarg, 0, sizeof(inarg));
829 inarg.oldnodeid = get_node_id(inode);
830 args.in.h.opcode = FUSE_LINK;
831 args.in.numargs = 2;
832 args.in.args[0].size = sizeof(inarg);
833 args.in.args[0].value = &inarg;
834 args.in.args[1].size = newent->d_name.len + 1;
835 args.in.args[1].value = newent->d_name.name;
836 err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
837 /* Contrary to "normal" filesystems it can happen that link
838 makes two "logical" inodes point to the same "physical"
839 inode. We invalidate the attributes of the old one, so it
840 will reflect changes in the backing inode (link count,
841 etc.)
842 */
843 if (!err) {
844 struct fuse_inode *fi = get_fuse_inode(inode);
845
846 spin_lock(&fc->lock);
847 fi->attr_version = ++fc->attr_version;
848 if (likely(inode->i_nlink < UINT_MAX))
849 inc_nlink(inode);
850 spin_unlock(&fc->lock);
851 fuse_invalidate_attr(inode);
852 fuse_update_ctime(inode);
853 } else if (err == -EINTR) {
854 fuse_invalidate_attr(inode);
855 }
856 return err;
857}
858
859static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
860 struct kstat *stat)
861{
862 unsigned int blkbits;
863 struct fuse_conn *fc = get_fuse_conn(inode);
864
865 /* see the comment in fuse_change_attributes() */
866 if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
867 attr->size = i_size_read(inode);
868 attr->mtime = inode->i_mtime.tv_sec;
869 attr->mtimensec = inode->i_mtime.tv_nsec;
870 attr->ctime = inode->i_ctime.tv_sec;
871 attr->ctimensec = inode->i_ctime.tv_nsec;
872 }
873
874 stat->dev = inode->i_sb->s_dev;
875 stat->ino = attr->ino;
876 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
877 stat->nlink = attr->nlink;
878 stat->uid = make_kuid(fc->user_ns, attr->uid);
879 stat->gid = make_kgid(fc->user_ns, attr->gid);
880 stat->rdev = inode->i_rdev;
881 stat->atime.tv_sec = attr->atime;
882 stat->atime.tv_nsec = attr->atimensec;
883 stat->mtime.tv_sec = attr->mtime;
884 stat->mtime.tv_nsec = attr->mtimensec;
885 stat->ctime.tv_sec = attr->ctime;
886 stat->ctime.tv_nsec = attr->ctimensec;
887 stat->size = attr->size;
888 stat->blocks = attr->blocks;
889
890 if (attr->blksize != 0)
891 blkbits = ilog2(attr->blksize);
892 else
893 blkbits = inode->i_sb->s_blocksize_bits;
894
895 stat->blksize = 1 << blkbits;
896}
897
898static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
899 struct file *file)
900{
901 int err;
902 struct fuse_getattr_in inarg;
903 struct fuse_attr_out outarg;
904 struct fuse_conn *fc = get_fuse_conn(inode);
905 FUSE_ARGS(args);
906 u64 attr_version;
907
908 attr_version = fuse_get_attr_version(fc);
909
910 memset(&inarg, 0, sizeof(inarg));
911 memset(&outarg, 0, sizeof(outarg));
912 /* Directories have separate file-handle space */
913 if (file && S_ISREG(inode->i_mode)) {
914 struct fuse_file *ff = file->private_data;
915
916 inarg.getattr_flags |= FUSE_GETATTR_FH;
917 inarg.fh = ff->fh;
918 }
919 args.in.h.opcode = FUSE_GETATTR;
920 args.in.h.nodeid = get_node_id(inode);
921 args.in.numargs = 1;
922 args.in.args[0].size = sizeof(inarg);
923 args.in.args[0].value = &inarg;
924 args.out.numargs = 1;
925 args.out.args[0].size = sizeof(outarg);
926 args.out.args[0].value = &outarg;
927 err = fuse_simple_request(fc, &args);
928 if (!err) {
929 if (fuse_invalid_attr(&outarg.attr) ||
930 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
931 make_bad_inode(inode);
932 err = -EIO;
933 } else {
934 fuse_change_attributes(inode, &outarg.attr,
935 attr_timeout(&outarg),
936 attr_version);
937 if (stat)
938 fuse_fillattr(inode, &outarg.attr, stat);
939 }
940 }
941 return err;
942}
943
944static int fuse_update_get_attr(struct inode *inode, struct file *file,
945 struct kstat *stat, unsigned int flags)
946{
947 struct fuse_inode *fi = get_fuse_inode(inode);
948 int err = 0;
949 bool sync;
950
951 if (flags & AT_STATX_FORCE_SYNC)
952 sync = true;
953 else if (flags & AT_STATX_DONT_SYNC)
954 sync = false;
955 else
956 sync = time_before64(fi->i_time, get_jiffies_64());
957
958 if (sync) {
959 forget_all_cached_acls(inode);
960 err = fuse_do_getattr(inode, stat, file);
961 } else if (stat) {
962 generic_fillattr(inode, stat);
963 stat->mode = fi->orig_i_mode;
964 stat->ino = fi->orig_ino;
965 }
966
967 return err;
968}
969
970int fuse_update_attributes(struct inode *inode, struct file *file)
971{
972 return fuse_update_get_attr(inode, file, NULL, 0);
973}
974
975int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
976 u64 child_nodeid, struct qstr *name)
977{
978 int err = -ENOTDIR;
979 struct inode *parent;
980 struct dentry *dir;
981 struct dentry *entry;
982
983 parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
984 if (!parent)
985 return -ENOENT;
986
987 inode_lock(parent);
988 if (!S_ISDIR(parent->i_mode))
989 goto unlock;
990
991 err = -ENOENT;
992 dir = d_find_alias(parent);
993 if (!dir)
994 goto unlock;
995
996 name->hash = full_name_hash(dir, name->name, name->len);
997 entry = d_lookup(dir, name);
998 dput(dir);
999 if (!entry)
1000 goto unlock;
1001
1002 fuse_invalidate_attr(parent);
1003 fuse_invalidate_entry(entry);
1004
1005 if (child_nodeid != 0 && d_really_is_positive(entry)) {
1006 inode_lock(d_inode(entry));
1007 if (get_node_id(d_inode(entry)) != child_nodeid) {
1008 err = -ENOENT;
1009 goto badentry;
1010 }
1011 if (d_mountpoint(entry)) {
1012 err = -EBUSY;
1013 goto badentry;
1014 }
1015 if (d_is_dir(entry)) {
1016 shrink_dcache_parent(entry);
1017 if (!simple_empty(entry)) {
1018 err = -ENOTEMPTY;
1019 goto badentry;
1020 }
1021 d_inode(entry)->i_flags |= S_DEAD;
1022 }
1023 dont_mount(entry);
1024 clear_nlink(d_inode(entry));
1025 err = 0;
1026 badentry:
1027 inode_unlock(d_inode(entry));
1028 if (!err)
1029 d_delete(entry);
1030 } else {
1031 err = 0;
1032 }
1033 dput(entry);
1034
1035 unlock:
1036 inode_unlock(parent);
1037 iput(parent);
1038 return err;
1039}
1040
1041/*
1042 * Calling into a user-controlled filesystem gives the filesystem
1043 * daemon ptrace-like capabilities over the current process. This
1044 * means, that the filesystem daemon is able to record the exact
1045 * filesystem operations performed, and can also control the behavior
1046 * of the requester process in otherwise impossible ways. For example
1047 * it can delay the operation for arbitrary length of time allowing
1048 * DoS against the requester.
1049 *
1050 * For this reason only those processes can call into the filesystem,
1051 * for which the owner of the mount has ptrace privilege. This
1052 * excludes processes started by other users, suid or sgid processes.
1053 */
1054int fuse_allow_current_process(struct fuse_conn *fc)
1055{
1056 const struct cred *cred;
1057
1058 if (fc->allow_other)
1059 return current_in_userns(fc->user_ns);
1060
1061 cred = current_cred();
1062 if (uid_eq(cred->euid, fc->user_id) &&
1063 uid_eq(cred->suid, fc->user_id) &&
1064 uid_eq(cred->uid, fc->user_id) &&
1065 gid_eq(cred->egid, fc->group_id) &&
1066 gid_eq(cred->sgid, fc->group_id) &&
1067 gid_eq(cred->gid, fc->group_id))
1068 return 1;
1069
1070 return 0;
1071}
1072
1073static int fuse_access(struct inode *inode, int mask)
1074{
1075 struct fuse_conn *fc = get_fuse_conn(inode);
1076 FUSE_ARGS(args);
1077 struct fuse_access_in inarg;
1078 int err;
1079
1080 BUG_ON(mask & MAY_NOT_BLOCK);
1081
1082 if (fc->no_access)
1083 return 0;
1084
1085 memset(&inarg, 0, sizeof(inarg));
1086 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1087 args.in.h.opcode = FUSE_ACCESS;
1088 args.in.h.nodeid = get_node_id(inode);
1089 args.in.numargs = 1;
1090 args.in.args[0].size = sizeof(inarg);
1091 args.in.args[0].value = &inarg;
1092 err = fuse_simple_request(fc, &args);
1093 if (err == -ENOSYS) {
1094 fc->no_access = 1;
1095 err = 0;
1096 }
1097 return err;
1098}
1099
1100static int fuse_perm_getattr(struct inode *inode, int mask)
1101{
1102 if (mask & MAY_NOT_BLOCK)
1103 return -ECHILD;
1104
1105 forget_all_cached_acls(inode);
1106 return fuse_do_getattr(inode, NULL, NULL);
1107}
1108
1109/*
1110 * Check permission. The two basic access models of FUSE are:
1111 *
1112 * 1) Local access checking ('default_permissions' mount option) based
1113 * on file mode. This is the plain old disk filesystem permission
1114 * modell.
1115 *
1116 * 2) "Remote" access checking, where server is responsible for
1117 * checking permission in each inode operation. An exception to this
1118 * is if ->permission() was invoked from sys_access() in which case an
1119 * access request is sent. Execute permission is still checked
1120 * locally based on file mode.
1121 */
1122static int fuse_permission(struct inode *inode, int mask)
1123{
1124 struct fuse_conn *fc = get_fuse_conn(inode);
1125 bool refreshed = false;
1126 int err = 0;
1127
1128 if (!fuse_allow_current_process(fc))
1129 return -EACCES;
1130
1131 /*
1132 * If attributes are needed, refresh them before proceeding
1133 */
1134 if (fc->default_permissions ||
1135 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1136 struct fuse_inode *fi = get_fuse_inode(inode);
1137
1138 if (time_before64(fi->i_time, get_jiffies_64())) {
1139 refreshed = true;
1140
1141 err = fuse_perm_getattr(inode, mask);
1142 if (err)
1143 return err;
1144 }
1145 }
1146
1147 if (fc->default_permissions) {
1148 err = generic_permission(inode, mask);
1149
1150 /* If permission is denied, try to refresh file
1151 attributes. This is also needed, because the root
1152 node will at first have no permissions */
1153 if (err == -EACCES && !refreshed) {
1154 err = fuse_perm_getattr(inode, mask);
1155 if (!err)
1156 err = generic_permission(inode, mask);
1157 }
1158
1159 /* Note: the opposite of the above test does not
1160 exist. So if permissions are revoked this won't be
1161 noticed immediately, only after the attribute
1162 timeout has expired */
1163 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1164 err = fuse_access(inode, mask);
1165 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1166 if (!(inode->i_mode & S_IXUGO)) {
1167 if (refreshed)
1168 return -EACCES;
1169
1170 err = fuse_perm_getattr(inode, mask);
1171 if (!err && !(inode->i_mode & S_IXUGO))
1172 return -EACCES;
1173 }
1174 }
1175 return err;
1176}
1177
1178static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1179 struct dir_context *ctx)
1180{
1181 while (nbytes >= FUSE_NAME_OFFSET) {
1182 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1183 size_t reclen = FUSE_DIRENT_SIZE(dirent);
1184 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1185 return -EIO;
1186 if (reclen > nbytes)
1187 break;
1188 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1189 return -EIO;
1190
1191 if (!dir_emit(ctx, dirent->name, dirent->namelen,
1192 dirent->ino, dirent->type))
1193 break;
1194
1195 buf += reclen;
1196 nbytes -= reclen;
1197 ctx->pos = dirent->off;
1198 }
1199
1200 return 0;
1201}
1202
1203static int fuse_direntplus_link(struct file *file,
1204 struct fuse_direntplus *direntplus,
1205 u64 attr_version)
1206{
1207 struct fuse_entry_out *o = &direntplus->entry_out;
1208 struct fuse_dirent *dirent = &direntplus->dirent;
1209 struct dentry *parent = file->f_path.dentry;
1210 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1211 struct dentry *dentry;
1212 struct dentry *alias;
1213 struct inode *dir = d_inode(parent);
1214 struct fuse_conn *fc;
1215 struct inode *inode;
1216 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1217
1218 if (!o->nodeid) {
1219 /*
1220 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1221 * ENOENT. Instead, it only means the userspace filesystem did
1222 * not want to return attributes/handle for this entry.
1223 *
1224 * So do nothing.
1225 */
1226 return 0;
1227 }
1228
1229 if (name.name[0] == '.') {
1230 /*
1231 * We could potentially refresh the attributes of the directory
1232 * and its parent?
1233 */
1234 if (name.len == 1)
1235 return 0;
1236 if (name.name[1] == '.' && name.len == 2)
1237 return 0;
1238 }
1239
1240 if (invalid_nodeid(o->nodeid))
1241 return -EIO;
1242 if (fuse_invalid_attr(&o->attr))
1243 return -EIO;
1244
1245 fc = get_fuse_conn(dir);
1246
1247 name.hash = full_name_hash(parent, name.name, name.len);
1248 dentry = d_lookup(parent, &name);
1249 if (!dentry) {
1250retry:
1251 dentry = d_alloc_parallel(parent, &name, &wq);
1252 if (IS_ERR(dentry))
1253 return PTR_ERR(dentry);
1254 }
1255 if (!d_in_lookup(dentry)) {
1256 struct fuse_inode *fi;
1257 inode = d_inode(dentry);
1258 if (!inode ||
1259 get_node_id(inode) != o->nodeid ||
1260 ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1261 d_invalidate(dentry);
1262 dput(dentry);
1263 goto retry;
1264 }
1265 if (is_bad_inode(inode)) {
1266 dput(dentry);
1267 return -EIO;
1268 }
1269
1270 fi = get_fuse_inode(inode);
1271 spin_lock(&fc->lock);
1272 fi->nlookup++;
1273 spin_unlock(&fc->lock);
1274
1275 forget_all_cached_acls(inode);
1276 fuse_change_attributes(inode, &o->attr,
1277 entry_attr_timeout(o),
1278 attr_version);
1279 /*
1280 * The other branch comes via fuse_iget()
1281 * which bumps nlookup inside
1282 */
1283 } else {
1284 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1285 &o->attr, entry_attr_timeout(o),
1286 attr_version);
1287 if (!inode)
1288 inode = ERR_PTR(-ENOMEM);
1289
1290 alias = d_splice_alias(inode, dentry);
1291 d_lookup_done(dentry);
1292 if (alias) {
1293 dput(dentry);
1294 dentry = alias;
1295 }
1296 if (IS_ERR(dentry))
1297 return PTR_ERR(dentry);
1298 }
1299 if (fc->readdirplus_auto)
1300 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1301 fuse_change_entry_timeout(dentry, o);
1302
1303 dput(dentry);
1304 return 0;
1305}
1306
1307static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1308 struct dir_context *ctx, u64 attr_version)
1309{
1310 struct fuse_direntplus *direntplus;
1311 struct fuse_dirent *dirent;
1312 size_t reclen;
1313 int over = 0;
1314 int ret;
1315
1316 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1317 direntplus = (struct fuse_direntplus *) buf;
1318 dirent = &direntplus->dirent;
1319 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1320
1321 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1322 return -EIO;
1323 if (reclen > nbytes)
1324 break;
1325 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1326 return -EIO;
1327
1328 if (!over) {
1329 /* We fill entries into dstbuf only as much as
1330 it can hold. But we still continue iterating
1331 over remaining entries to link them. If not,
1332 we need to send a FORGET for each of those
1333 which we did not link.
1334 */
1335 over = !dir_emit(ctx, dirent->name, dirent->namelen,
1336 dirent->ino, dirent->type);
1337 if (!over)
1338 ctx->pos = dirent->off;
1339 }
1340
1341 buf += reclen;
1342 nbytes -= reclen;
1343
1344 ret = fuse_direntplus_link(file, direntplus, attr_version);
1345 if (ret)
1346 fuse_force_forget(file, direntplus->entry_out.nodeid);
1347 }
1348
1349 return 0;
1350}
1351
1352static int fuse_readdir(struct file *file, struct dir_context *ctx)
1353{
1354 int plus, err;
1355 size_t nbytes;
1356 struct page *page;
1357 struct inode *inode = file_inode(file);
1358 struct fuse_conn *fc = get_fuse_conn(inode);
1359 struct fuse_req *req;
1360 u64 attr_version = 0;
1361 bool locked;
1362
1363 if (is_bad_inode(inode))
1364 return -EIO;
1365
1366 req = fuse_get_req(fc, 1);
1367 if (IS_ERR(req))
1368 return PTR_ERR(req);
1369
1370 page = alloc_page(GFP_KERNEL);
1371 if (!page) {
1372 fuse_put_request(fc, req);
1373 return -ENOMEM;
1374 }
1375
1376 plus = fuse_use_readdirplus(inode, ctx);
1377 req->out.argpages = 1;
1378 req->num_pages = 1;
1379 req->pages[0] = page;
1380 req->page_descs[0].length = PAGE_SIZE;
1381 if (plus) {
1382 attr_version = fuse_get_attr_version(fc);
1383 fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1384 FUSE_READDIRPLUS);
1385 } else {
1386 fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1387 FUSE_READDIR);
1388 }
1389 locked = fuse_lock_inode(inode);
1390 fuse_request_send(fc, req);
1391 fuse_unlock_inode(inode, locked);
1392 nbytes = req->out.args[0].size;
1393 err = req->out.h.error;
1394 fuse_put_request(fc, req);
1395 if (!err) {
1396 if (plus) {
1397 err = parse_dirplusfile(page_address(page), nbytes,
1398 file, ctx,
1399 attr_version);
1400 } else {
1401 err = parse_dirfile(page_address(page), nbytes, file,
1402 ctx);
1403 }
1404 }
1405
1406 __free_page(page);
1407 fuse_invalidate_atime(inode);
1408 return err;
1409}
1410
1411static const char *fuse_get_link(struct dentry *dentry,
1412 struct inode *inode,
1413 struct delayed_call *done)
1414{
1415 struct fuse_conn *fc = get_fuse_conn(inode);
1416 FUSE_ARGS(args);
1417 char *link;
1418 ssize_t ret;
1419
1420 if (!dentry)
1421 return ERR_PTR(-ECHILD);
1422
1423 link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1424 if (!link)
1425 return ERR_PTR(-ENOMEM);
1426
1427 args.in.h.opcode = FUSE_READLINK;
1428 args.in.h.nodeid = get_node_id(inode);
1429 args.out.argvar = 1;
1430 args.out.numargs = 1;
1431 args.out.args[0].size = PAGE_SIZE - 1;
1432 args.out.args[0].value = link;
1433 ret = fuse_simple_request(fc, &args);
1434 if (ret < 0) {
1435 kfree(link);
1436 link = ERR_PTR(ret);
1437 } else {
1438 link[ret] = '\0';
1439 set_delayed_call(done, kfree_link, link);
1440 }
1441 fuse_invalidate_atime(inode);
1442 return link;
1443}
1444
1445static int fuse_dir_open(struct inode *inode, struct file *file)
1446{
1447 return fuse_open_common(inode, file, true);
1448}
1449
1450static int fuse_dir_release(struct inode *inode, struct file *file)
1451{
1452 fuse_release_common(file, true);
1453
1454 return 0;
1455}
1456
1457static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1458 int datasync)
1459{
1460 return fuse_fsync_common(file, start, end, datasync, 1);
1461}
1462
1463static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1464 unsigned long arg)
1465{
1466 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1467
1468 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1469 if (fc->minor < 18)
1470 return -ENOTTY;
1471
1472 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1473}
1474
1475static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1476 unsigned long arg)
1477{
1478 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1479
1480 if (fc->minor < 18)
1481 return -ENOTTY;
1482
1483 return fuse_ioctl_common(file, cmd, arg,
1484 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1485}
1486
1487static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1488{
1489 /* Always update if mtime is explicitly set */
1490 if (ivalid & ATTR_MTIME_SET)
1491 return true;
1492
1493 /* Or if kernel i_mtime is the official one */
1494 if (trust_local_mtime)
1495 return true;
1496
1497 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1498 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1499 return false;
1500
1501 /* In all other cases update */
1502 return true;
1503}
1504
1505static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1506 struct fuse_setattr_in *arg, bool trust_local_cmtime)
1507{
1508 unsigned ivalid = iattr->ia_valid;
1509
1510 if (ivalid & ATTR_MODE)
1511 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1512 if (ivalid & ATTR_UID)
1513 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1514 if (ivalid & ATTR_GID)
1515 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1516 if (ivalid & ATTR_SIZE)
1517 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1518 if (ivalid & ATTR_ATIME) {
1519 arg->valid |= FATTR_ATIME;
1520 arg->atime = iattr->ia_atime.tv_sec;
1521 arg->atimensec = iattr->ia_atime.tv_nsec;
1522 if (!(ivalid & ATTR_ATIME_SET))
1523 arg->valid |= FATTR_ATIME_NOW;
1524 }
1525 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1526 arg->valid |= FATTR_MTIME;
1527 arg->mtime = iattr->ia_mtime.tv_sec;
1528 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1529 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1530 arg->valid |= FATTR_MTIME_NOW;
1531 }
1532 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1533 arg->valid |= FATTR_CTIME;
1534 arg->ctime = iattr->ia_ctime.tv_sec;
1535 arg->ctimensec = iattr->ia_ctime.tv_nsec;
1536 }
1537}
1538
1539/*
1540 * Prevent concurrent writepages on inode
1541 *
1542 * This is done by adding a negative bias to the inode write counter
1543 * and waiting for all pending writes to finish.
1544 */
1545void fuse_set_nowrite(struct inode *inode)
1546{
1547 struct fuse_conn *fc = get_fuse_conn(inode);
1548 struct fuse_inode *fi = get_fuse_inode(inode);
1549
1550 BUG_ON(!inode_is_locked(inode));
1551
1552 spin_lock(&fc->lock);
1553 BUG_ON(fi->writectr < 0);
1554 fi->writectr += FUSE_NOWRITE;
1555 spin_unlock(&fc->lock);
1556 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1557}
1558
1559/*
1560 * Allow writepages on inode
1561 *
1562 * Remove the bias from the writecounter and send any queued
1563 * writepages.
1564 */
1565static void __fuse_release_nowrite(struct inode *inode)
1566{
1567 struct fuse_inode *fi = get_fuse_inode(inode);
1568
1569 BUG_ON(fi->writectr != FUSE_NOWRITE);
1570 fi->writectr = 0;
1571 fuse_flush_writepages(inode);
1572}
1573
1574void fuse_release_nowrite(struct inode *inode)
1575{
1576 struct fuse_conn *fc = get_fuse_conn(inode);
1577
1578 spin_lock(&fc->lock);
1579 __fuse_release_nowrite(inode);
1580 spin_unlock(&fc->lock);
1581}
1582
1583static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1584 struct inode *inode,
1585 struct fuse_setattr_in *inarg_p,
1586 struct fuse_attr_out *outarg_p)
1587{
1588 args->in.h.opcode = FUSE_SETATTR;
1589 args->in.h.nodeid = get_node_id(inode);
1590 args->in.numargs = 1;
1591 args->in.args[0].size = sizeof(*inarg_p);
1592 args->in.args[0].value = inarg_p;
1593 args->out.numargs = 1;
1594 args->out.args[0].size = sizeof(*outarg_p);
1595 args->out.args[0].value = outarg_p;
1596}
1597
1598/*
1599 * Flush inode->i_mtime to the server
1600 */
1601int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1602{
1603 struct fuse_conn *fc = get_fuse_conn(inode);
1604 FUSE_ARGS(args);
1605 struct fuse_setattr_in inarg;
1606 struct fuse_attr_out outarg;
1607
1608 memset(&inarg, 0, sizeof(inarg));
1609 memset(&outarg, 0, sizeof(outarg));
1610
1611 inarg.valid = FATTR_MTIME;
1612 inarg.mtime = inode->i_mtime.tv_sec;
1613 inarg.mtimensec = inode->i_mtime.tv_nsec;
1614 if (fc->minor >= 23) {
1615 inarg.valid |= FATTR_CTIME;
1616 inarg.ctime = inode->i_ctime.tv_sec;
1617 inarg.ctimensec = inode->i_ctime.tv_nsec;
1618 }
1619 if (ff) {
1620 inarg.valid |= FATTR_FH;
1621 inarg.fh = ff->fh;
1622 }
1623 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1624
1625 return fuse_simple_request(fc, &args);
1626}
1627
1628/*
1629 * Set attributes, and at the same time refresh them.
1630 *
1631 * Truncation is slightly complicated, because the 'truncate' request
1632 * may fail, in which case we don't want to touch the mapping.
1633 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1634 * and the actual truncation by hand.
1635 */
1636int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1637 struct file *file)
1638{
1639 struct inode *inode = d_inode(dentry);
1640 struct fuse_conn *fc = get_fuse_conn(inode);
1641 struct fuse_inode *fi = get_fuse_inode(inode);
1642 FUSE_ARGS(args);
1643 struct fuse_setattr_in inarg;
1644 struct fuse_attr_out outarg;
1645 bool is_truncate = false;
1646 bool is_wb = fc->writeback_cache;
1647 loff_t oldsize;
1648 int err;
1649 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1650
1651 if (!fc->default_permissions)
1652 attr->ia_valid |= ATTR_FORCE;
1653
1654 err = setattr_prepare(dentry, attr);
1655 if (err)
1656 return err;
1657
1658 if (attr->ia_valid & ATTR_OPEN) {
1659 /* This is coming from open(..., ... | O_TRUNC); */
1660 WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1661 WARN_ON(attr->ia_size != 0);
1662 if (fc->atomic_o_trunc) {
1663 /*
1664 * No need to send request to userspace, since actual
1665 * truncation has already been done by OPEN. But still
1666 * need to truncate page cache.
1667 */
1668 i_size_write(inode, 0);
1669 truncate_pagecache(inode, 0);
1670 return 0;
1671 }
1672 file = NULL;
1673 }
1674
1675 if (attr->ia_valid & ATTR_SIZE)
1676 is_truncate = true;
1677
1678 /* Flush dirty data/metadata before non-truncate SETATTR */
1679 if (is_wb && S_ISREG(inode->i_mode) &&
1680 attr->ia_valid &
1681 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1682 ATTR_TIMES_SET)) {
1683 err = write_inode_now(inode, true);
1684 if (err)
1685 return err;
1686
1687 fuse_set_nowrite(inode);
1688 fuse_release_nowrite(inode);
1689 }
1690
1691 if (is_truncate) {
1692 fuse_set_nowrite(inode);
1693 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1694 if (trust_local_cmtime && attr->ia_size != inode->i_size)
1695 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1696 }
1697
1698 memset(&inarg, 0, sizeof(inarg));
1699 memset(&outarg, 0, sizeof(outarg));
1700 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1701 if (file) {
1702 struct fuse_file *ff = file->private_data;
1703 inarg.valid |= FATTR_FH;
1704 inarg.fh = ff->fh;
1705 }
1706 if (attr->ia_valid & ATTR_SIZE) {
1707 /* For mandatory locking in truncate */
1708 inarg.valid |= FATTR_LOCKOWNER;
1709 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1710 }
1711 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1712 err = fuse_simple_request(fc, &args);
1713 if (err) {
1714 if (err == -EINTR)
1715 fuse_invalidate_attr(inode);
1716 goto error;
1717 }
1718
1719 if (fuse_invalid_attr(&outarg.attr) ||
1720 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1721 make_bad_inode(inode);
1722 err = -EIO;
1723 goto error;
1724 }
1725
1726 spin_lock(&fc->lock);
1727 /* the kernel maintains i_mtime locally */
1728 if (trust_local_cmtime) {
1729 if (attr->ia_valid & ATTR_MTIME)
1730 inode->i_mtime = attr->ia_mtime;
1731 if (attr->ia_valid & ATTR_CTIME)
1732 inode->i_ctime = attr->ia_ctime;
1733 /* FIXME: clear I_DIRTY_SYNC? */
1734 }
1735
1736 fuse_change_attributes_common(inode, &outarg.attr,
1737 attr_timeout(&outarg));
1738 oldsize = inode->i_size;
1739 /* see the comment in fuse_change_attributes() */
1740 if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1741 i_size_write(inode, outarg.attr.size);
1742
1743 if (is_truncate) {
1744 /* NOTE: this may release/reacquire fc->lock */
1745 __fuse_release_nowrite(inode);
1746 }
1747 spin_unlock(&fc->lock);
1748
1749 /*
1750 * Only call invalidate_inode_pages2() after removing
1751 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1752 */
1753 if ((is_truncate || !is_wb) &&
1754 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1755 truncate_pagecache(inode, outarg.attr.size);
1756 invalidate_inode_pages2(inode->i_mapping);
1757 }
1758
1759 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1760 return 0;
1761
1762error:
1763 if (is_truncate)
1764 fuse_release_nowrite(inode);
1765
1766 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1767 return err;
1768}
1769
1770static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1771{
1772 struct inode *inode = d_inode(entry);
1773 struct fuse_conn *fc = get_fuse_conn(inode);
1774 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1775 int ret;
1776
1777 if (!fuse_allow_current_process(get_fuse_conn(inode)))
1778 return -EACCES;
1779
1780 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1781 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1782 ATTR_MODE);
1783
1784 /*
1785 * The only sane way to reliably kill suid/sgid is to do it in
1786 * the userspace filesystem
1787 *
1788 * This should be done on write(), truncate() and chown().
1789 */
1790 if (!fc->handle_killpriv) {
1791 /*
1792 * ia_mode calculation may have used stale i_mode.
1793 * Refresh and recalculate.
1794 */
1795 ret = fuse_do_getattr(inode, NULL, file);
1796 if (ret)
1797 return ret;
1798
1799 attr->ia_mode = inode->i_mode;
1800 if (inode->i_mode & S_ISUID) {
1801 attr->ia_valid |= ATTR_MODE;
1802 attr->ia_mode &= ~S_ISUID;
1803 }
1804 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1805 attr->ia_valid |= ATTR_MODE;
1806 attr->ia_mode &= ~S_ISGID;
1807 }
1808 }
1809 }
1810 if (!attr->ia_valid)
1811 return 0;
1812
1813 ret = fuse_do_setattr(entry, attr, file);
1814 if (!ret) {
1815 /*
1816 * If filesystem supports acls it may have updated acl xattrs in
1817 * the filesystem, so forget cached acls for the inode.
1818 */
1819 if (fc->posix_acl)
1820 forget_all_cached_acls(inode);
1821
1822 /* Directory mode changed, may need to revalidate access */
1823 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1824 fuse_invalidate_entry_cache(entry);
1825 }
1826 return ret;
1827}
1828
1829static int fuse_getattr(const struct path *path, struct kstat *stat,
1830 u32 request_mask, unsigned int flags)
1831{
1832 struct inode *inode = d_inode(path->dentry);
1833 struct fuse_conn *fc = get_fuse_conn(inode);
1834
1835 if (!fuse_allow_current_process(fc))
1836 return -EACCES;
1837
1838 return fuse_update_get_attr(inode, NULL, stat, flags);
1839}
1840
1841static const struct inode_operations fuse_dir_inode_operations = {
1842 .lookup = fuse_lookup,
1843 .mkdir = fuse_mkdir,
1844 .symlink = fuse_symlink,
1845 .unlink = fuse_unlink,
1846 .rmdir = fuse_rmdir,
1847 .rename = fuse_rename2,
1848 .link = fuse_link,
1849 .setattr = fuse_setattr,
1850 .create = fuse_create,
1851 .atomic_open = fuse_atomic_open,
1852 .mknod = fuse_mknod,
1853 .permission = fuse_permission,
1854 .getattr = fuse_getattr,
1855 .listxattr = fuse_listxattr,
1856 .get_acl = fuse_get_acl,
1857 .set_acl = fuse_set_acl,
1858};
1859
1860static const struct file_operations fuse_dir_operations = {
1861 .llseek = generic_file_llseek,
1862 .read = generic_read_dir,
1863 .iterate_shared = fuse_readdir,
1864 .open = fuse_dir_open,
1865 .release = fuse_dir_release,
1866 .fsync = fuse_dir_fsync,
1867 .unlocked_ioctl = fuse_dir_ioctl,
1868 .compat_ioctl = fuse_dir_compat_ioctl,
1869};
1870
1871static const struct inode_operations fuse_common_inode_operations = {
1872 .setattr = fuse_setattr,
1873 .permission = fuse_permission,
1874 .getattr = fuse_getattr,
1875 .listxattr = fuse_listxattr,
1876 .get_acl = fuse_get_acl,
1877 .set_acl = fuse_set_acl,
1878};
1879
1880static const struct inode_operations fuse_symlink_inode_operations = {
1881 .setattr = fuse_setattr,
1882 .get_link = fuse_get_link,
1883 .getattr = fuse_getattr,
1884 .listxattr = fuse_listxattr,
1885};
1886
1887void fuse_init_common(struct inode *inode)
1888{
1889 inode->i_op = &fuse_common_inode_operations;
1890}
1891
1892void fuse_init_dir(struct inode *inode)
1893{
1894 inode->i_op = &fuse_dir_inode_operations;
1895 inode->i_fop = &fuse_dir_operations;
1896}
1897
1898void fuse_init_symlink(struct inode *inode)
1899{
1900 inode->i_op = &fuse_symlink_inode_operations;
1901}