blob: 512609da8590224ae79d54b7aacce1a20e45def6 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9
10#include "fuse_i.h"
11#include <linux/iversion.h>
12#include <linux/posix_acl.h>
13#include <linux/pagemap.h>
14#include <linux/highmem.h>
15
16static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17{
18 struct fuse_conn *fc = get_fuse_conn(dir);
19 struct fuse_inode *fi = get_fuse_inode(dir);
20
21 if (!fc->do_readdirplus)
22 return false;
23 if (!fc->readdirplus_auto)
24 return true;
25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 return true;
27 if (ctx->pos == 0)
28 return true;
29 return false;
30}
31
32static void fuse_add_dirent_to_cache(struct file *file,
33 struct fuse_dirent *dirent, loff_t pos)
34{
35 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 pgoff_t index;
38 struct page *page;
39 loff_t size;
40 u64 version;
41 unsigned int offset;
42 void *addr;
43
44 spin_lock(&fi->rdc.lock);
45 /*
46 * Is cache already completed? Or this entry does not go at the end of
47 * cache?
48 */
49 if (fi->rdc.cached || pos != fi->rdc.pos) {
50 spin_unlock(&fi->rdc.lock);
51 return;
52 }
53 version = fi->rdc.version;
54 size = fi->rdc.size;
55 offset = size & ~PAGE_MASK;
56 index = size >> PAGE_SHIFT;
57 /* Dirent doesn't fit in current page? Jump to next page. */
58 if (offset + reclen > PAGE_SIZE) {
59 index++;
60 offset = 0;
61 }
62 spin_unlock(&fi->rdc.lock);
63
64 if (offset) {
65 page = find_lock_page(file->f_mapping, index);
66 } else {
67 page = find_or_create_page(file->f_mapping, index,
68 mapping_gfp_mask(file->f_mapping));
69 }
70 if (!page)
71 return;
72
73 spin_lock(&fi->rdc.lock);
74 /* Raced with another readdir */
75 if (fi->rdc.version != version || fi->rdc.size != size ||
76 WARN_ON(fi->rdc.pos != pos))
77 goto unlock;
78
79 addr = kmap_atomic(page);
80 if (!offset)
81 clear_page(addr);
82 memcpy(addr + offset, dirent, reclen);
83 kunmap_atomic(addr);
84 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85 fi->rdc.pos = dirent->off;
86unlock:
87 spin_unlock(&fi->rdc.lock);
88 unlock_page(page);
89 put_page(page);
90}
91
92static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93{
94 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95 loff_t end;
96
97 spin_lock(&fi->rdc.lock);
98 /* does cache end position match current position? */
99 if (fi->rdc.pos != pos) {
100 spin_unlock(&fi->rdc.lock);
101 return;
102 }
103
104 fi->rdc.cached = true;
105 end = ALIGN(fi->rdc.size, PAGE_SIZE);
106 spin_unlock(&fi->rdc.lock);
107
108 /* truncate unused tail of cache */
109 truncate_inode_pages(file->f_mapping, end);
110}
111
112static bool fuse_emit(struct file *file, struct dir_context *ctx,
113 struct fuse_dirent *dirent)
114{
115 struct fuse_file *ff = file->private_data;
116
117 if (ff->open_flags & FOPEN_CACHE_DIR)
118 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119
120 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121 dirent->type);
122}
123
124static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125 struct dir_context *ctx)
126{
127 while (nbytes >= FUSE_NAME_OFFSET) {
128 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129 size_t reclen = FUSE_DIRENT_SIZE(dirent);
130 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131 return -EIO;
132 if (reclen > nbytes)
133 break;
134 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135 return -EIO;
136
137 if (!fuse_emit(file, ctx, dirent))
138 break;
139
140 buf += reclen;
141 nbytes -= reclen;
142 ctx->pos = dirent->off;
143 }
144
145 return 0;
146}
147
148static int fuse_direntplus_link(struct file *file,
149 struct fuse_direntplus *direntplus,
150 u64 attr_version)
151{
152 struct fuse_entry_out *o = &direntplus->entry_out;
153 struct fuse_dirent *dirent = &direntplus->dirent;
154 struct dentry *parent = file->f_path.dentry;
155 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156 struct dentry *dentry;
157 struct dentry *alias;
158 struct inode *dir = d_inode(parent);
159 struct fuse_conn *fc;
160 struct inode *inode;
161 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162
163 if (!o->nodeid) {
164 /*
165 * Unlike in the case of fuse_lookup, zero nodeid does not mean
166 * ENOENT. Instead, it only means the userspace filesystem did
167 * not want to return attributes/handle for this entry.
168 *
169 * So do nothing.
170 */
171 return 0;
172 }
173
174 if (name.name[0] == '.') {
175 /*
176 * We could potentially refresh the attributes of the directory
177 * and its parent?
178 */
179 if (name.len == 1)
180 return 0;
181 if (name.name[1] == '.' && name.len == 2)
182 return 0;
183 }
184
185 if (invalid_nodeid(o->nodeid))
186 return -EIO;
187 if (fuse_invalid_attr(&o->attr))
188 return -EIO;
189
190 fc = get_fuse_conn(dir);
191
192 name.hash = full_name_hash(parent, name.name, name.len);
193 dentry = d_lookup(parent, &name);
194 if (!dentry) {
195retry:
196 dentry = d_alloc_parallel(parent, &name, &wq);
197 if (IS_ERR(dentry))
198 return PTR_ERR(dentry);
199 }
200 if (!d_in_lookup(dentry)) {
201 struct fuse_inode *fi;
202 inode = d_inode(dentry);
203 if (!inode ||
204 get_node_id(inode) != o->nodeid ||
205 ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206 d_invalidate(dentry);
207 dput(dentry);
208 goto retry;
209 }
210 if (fuse_is_bad(inode)) {
211 dput(dentry);
212 return -EIO;
213 }
214
215 fi = get_fuse_inode(inode);
216 spin_lock(&fi->lock);
217 fi->nlookup++;
218 spin_unlock(&fi->lock);
219
220 forget_all_cached_acls(inode);
221 fuse_change_attributes(inode, &o->attr,
222 entry_attr_timeout(o),
223 attr_version);
224 /*
225 * The other branch comes via fuse_iget()
226 * which bumps nlookup inside
227 */
228 } else {
229 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230 &o->attr, entry_attr_timeout(o),
231 attr_version);
232 if (!inode)
233 inode = ERR_PTR(-ENOMEM);
234
235 alias = d_splice_alias(inode, dentry);
236 d_lookup_done(dentry);
237 if (alias) {
238 dput(dentry);
239 dentry = alias;
240 }
241 if (IS_ERR(dentry)) {
242 if (!IS_ERR(inode)) {
243 struct fuse_inode *fi = get_fuse_inode(inode);
244
245 spin_lock(&fi->lock);
246 fi->nlookup--;
247 spin_unlock(&fi->lock);
248 }
249 return PTR_ERR(dentry);
250 }
251 }
252 if (fc->readdirplus_auto)
253 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
254 fuse_change_entry_timeout(dentry, o);
255
256 dput(dentry);
257 return 0;
258}
259
260static void fuse_force_forget(struct file *file, u64 nodeid)
261{
262 struct inode *inode = file_inode(file);
263 struct fuse_conn *fc = get_fuse_conn(inode);
264 struct fuse_forget_in inarg;
265 FUSE_ARGS(args);
266
267 memset(&inarg, 0, sizeof(inarg));
268 inarg.nlookup = 1;
269 args.opcode = FUSE_FORGET;
270 args.nodeid = nodeid;
271 args.in_numargs = 1;
272 args.in_args[0].size = sizeof(inarg);
273 args.in_args[0].value = &inarg;
274 args.force = true;
275 args.noreply = true;
276
277 fuse_simple_request(fc, &args);
278 /* ignore errors */
279}
280
281static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
282 struct dir_context *ctx, u64 attr_version)
283{
284 struct fuse_direntplus *direntplus;
285 struct fuse_dirent *dirent;
286 size_t reclen;
287 int over = 0;
288 int ret;
289
290 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
291 direntplus = (struct fuse_direntplus *) buf;
292 dirent = &direntplus->dirent;
293 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
294
295 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
296 return -EIO;
297 if (reclen > nbytes)
298 break;
299 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
300 return -EIO;
301
302 if (!over) {
303 /* We fill entries into dstbuf only as much as
304 it can hold. But we still continue iterating
305 over remaining entries to link them. If not,
306 we need to send a FORGET for each of those
307 which we did not link.
308 */
309 over = !fuse_emit(file, ctx, dirent);
310 if (!over)
311 ctx->pos = dirent->off;
312 }
313
314 buf += reclen;
315 nbytes -= reclen;
316
317 ret = fuse_direntplus_link(file, direntplus, attr_version);
318 if (ret)
319 fuse_force_forget(file, direntplus->entry_out.nodeid);
320 }
321
322 return 0;
323}
324
325static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
326{
327 int plus;
328 ssize_t res;
329 struct page *page;
330 struct inode *inode = file_inode(file);
331 struct fuse_conn *fc = get_fuse_conn(inode);
332 struct fuse_io_args ia = {};
333 struct fuse_args_pages *ap = &ia.ap;
334 struct fuse_page_desc desc = { .length = PAGE_SIZE };
335 u64 attr_version = 0;
336 bool locked;
337
338 page = alloc_page(GFP_KERNEL);
339 if (!page)
340 return -ENOMEM;
341
342 plus = fuse_use_readdirplus(inode, ctx);
343 ap->args.out_pages = 1;
344 ap->num_pages = 1;
345 ap->pages = &page;
346 ap->descs = &desc;
347 if (plus) {
348 attr_version = fuse_get_attr_version(fc);
349 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
350 FUSE_READDIRPLUS);
351 } else {
352 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
353 FUSE_READDIR);
354 }
355 locked = fuse_lock_inode(inode);
356 res = fuse_simple_request(fc, &ap->args);
357 fuse_unlock_inode(inode, locked);
358 if (res >= 0) {
359 if (!res) {
360 struct fuse_file *ff = file->private_data;
361
362 if (ff->open_flags & FOPEN_CACHE_DIR)
363 fuse_readdir_cache_end(file, ctx->pos);
364 } else if (plus) {
365 res = parse_dirplusfile(page_address(page), res,
366 file, ctx, attr_version);
367 } else {
368 res = parse_dirfile(page_address(page), res, file,
369 ctx);
370 }
371 }
372
373 __free_page(page);
374 fuse_invalidate_atime(inode);
375 return res;
376}
377
378enum fuse_parse_result {
379 FOUND_ERR = -1,
380 FOUND_NONE = 0,
381 FOUND_SOME,
382 FOUND_ALL,
383};
384
385static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
386 void *addr, unsigned int size,
387 struct dir_context *ctx)
388{
389 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
390 enum fuse_parse_result res = FOUND_NONE;
391
392 WARN_ON(offset >= size);
393
394 for (;;) {
395 struct fuse_dirent *dirent = addr + offset;
396 unsigned int nbytes = size - offset;
397 size_t reclen;
398
399 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
400 break;
401
402 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
403
404 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
405 return FOUND_ERR;
406 if (WARN_ON(reclen > nbytes))
407 return FOUND_ERR;
408 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
409 return FOUND_ERR;
410
411 if (ff->readdir.pos == ctx->pos) {
412 res = FOUND_SOME;
413 if (!dir_emit(ctx, dirent->name, dirent->namelen,
414 dirent->ino, dirent->type))
415 return FOUND_ALL;
416 ctx->pos = dirent->off;
417 }
418 ff->readdir.pos = dirent->off;
419 ff->readdir.cache_off += reclen;
420
421 offset += reclen;
422 }
423
424 return res;
425}
426
427static void fuse_rdc_reset(struct inode *inode)
428{
429 struct fuse_inode *fi = get_fuse_inode(inode);
430
431 fi->rdc.cached = false;
432 fi->rdc.version++;
433 fi->rdc.size = 0;
434 fi->rdc.pos = 0;
435}
436
437#define UNCACHED 1
438
439static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
440{
441 struct fuse_file *ff = file->private_data;
442 struct inode *inode = file_inode(file);
443 struct fuse_conn *fc = get_fuse_conn(inode);
444 struct fuse_inode *fi = get_fuse_inode(inode);
445 enum fuse_parse_result res;
446 pgoff_t index;
447 unsigned int size;
448 struct page *page;
449 void *addr;
450
451 /* Seeked? If so, reset the cache stream */
452 if (ff->readdir.pos != ctx->pos) {
453 ff->readdir.pos = 0;
454 ff->readdir.cache_off = 0;
455 }
456
457 /*
458 * We're just about to start reading into the cache or reading the
459 * cache; both cases require an up-to-date mtime value.
460 */
461 if (!ctx->pos && fc->auto_inval_data) {
462 int err = fuse_update_attributes(inode, file);
463
464 if (err)
465 return err;
466 }
467
468retry:
469 spin_lock(&fi->rdc.lock);
470retry_locked:
471 if (!fi->rdc.cached) {
472 /* Starting cache? Set cache mtime. */
473 if (!ctx->pos && !fi->rdc.size) {
474 fi->rdc.mtime = inode->i_mtime;
475 fi->rdc.iversion = inode_query_iversion(inode);
476 }
477 spin_unlock(&fi->rdc.lock);
478 return UNCACHED;
479 }
480 /*
481 * When at the beginning of the directory (i.e. just after opendir(3) or
482 * rewinddir(3)), then need to check whether directory contents have
483 * changed, and reset the cache if so.
484 */
485 if (!ctx->pos) {
486 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
487 !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
488 fuse_rdc_reset(inode);
489 goto retry_locked;
490 }
491 }
492
493 /*
494 * If cache version changed since the last getdents() call, then reset
495 * the cache stream.
496 */
497 if (ff->readdir.version != fi->rdc.version) {
498 ff->readdir.pos = 0;
499 ff->readdir.cache_off = 0;
500 }
501 /*
502 * If at the beginning of the cache, than reset version to
503 * current.
504 */
505 if (ff->readdir.pos == 0)
506 ff->readdir.version = fi->rdc.version;
507
508 WARN_ON(fi->rdc.size < ff->readdir.cache_off);
509
510 index = ff->readdir.cache_off >> PAGE_SHIFT;
511
512 if (index == (fi->rdc.size >> PAGE_SHIFT))
513 size = fi->rdc.size & ~PAGE_MASK;
514 else
515 size = PAGE_SIZE;
516 spin_unlock(&fi->rdc.lock);
517
518 /* EOF? */
519 if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
520 return 0;
521
522 page = find_get_page_flags(file->f_mapping, index,
523 FGP_ACCESSED | FGP_LOCK);
524 spin_lock(&fi->rdc.lock);
525 if (!page) {
526 /*
527 * Uh-oh: page gone missing, cache is useless
528 */
529 if (fi->rdc.version == ff->readdir.version)
530 fuse_rdc_reset(inode);
531 goto retry_locked;
532 }
533
534 /* Make sure it's still the same version after getting the page. */
535 if (ff->readdir.version != fi->rdc.version) {
536 spin_unlock(&fi->rdc.lock);
537 unlock_page(page);
538 put_page(page);
539 goto retry;
540 }
541 spin_unlock(&fi->rdc.lock);
542
543 /*
544 * Contents of the page are now protected against changing by holding
545 * the page lock.
546 */
547 addr = kmap(page);
548 res = fuse_parse_cache(ff, addr, size, ctx);
549 kunmap(page);
550 unlock_page(page);
551 put_page(page);
552
553 if (res == FOUND_ERR)
554 return -EIO;
555
556 if (res == FOUND_ALL)
557 return 0;
558
559 if (size == PAGE_SIZE) {
560 /* We hit end of page: skip to next page. */
561 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
562 goto retry;
563 }
564
565 /*
566 * End of cache reached. If found position, then we are done, otherwise
567 * need to fall back to uncached, since the position we were looking for
568 * wasn't in the cache.
569 */
570 return res == FOUND_SOME ? 0 : UNCACHED;
571}
572
573int fuse_readdir(struct file *file, struct dir_context *ctx)
574{
575 struct fuse_file *ff = file->private_data;
576 struct inode *inode = file_inode(file);
577 int err;
578
579 if (fuse_is_bad(inode))
580 return -EIO;
581
582 mutex_lock(&ff->readdir.lock);
583
584 err = UNCACHED;
585 if (ff->open_flags & FOPEN_CACHE_DIR)
586 err = fuse_readdir_cached(file, ctx);
587 if (err == UNCACHED)
588 err = fuse_readdir_uncached(file, ctx);
589
590 mutex_unlock(&ff->readdir.lock);
591
592 return err;
593}