blob: f59638448d55da5e6725a2d4f971ca56f0922427 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2017 Red Hat, Inc.
4 */
5
6#include <linux/cred.h>
7#include <linux/file.h>
8#include <linux/mount.h>
9#include <linux/xattr.h>
10#include <linux/uio.h>
11#include <linux/uaccess.h>
12#include <linux/splice.h>
13#include <linux/mm.h>
14#include <linux/fs.h>
15#include "overlayfs.h"
16
17#define OVL_IOCB_MASK (IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
18
19static char ovl_whatisit(struct inode *inode, struct inode *realinode)
20{
21 if (realinode != ovl_inode_upper(inode))
22 return 'l';
23 if (ovl_has_upperdata(inode))
24 return 'u';
25 else
26 return 'm';
27}
28
29/* No atime modificaton nor notify on underlying */
30#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
31
32static struct file *ovl_open_realfile(const struct file *file,
33 struct inode *realinode)
34{
35 struct inode *inode = file_inode(file);
36 struct file *realfile;
37 const struct cred *old_cred;
38 int flags = file->f_flags | OVL_OPEN_FLAGS;
39
40 old_cred = ovl_override_creds(inode->i_sb);
41 realfile = open_with_fake_path(&file->f_path, flags, realinode,
42 current_cred());
43 ovl_revert_creds(inode->i_sb, old_cred);
44
45 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
46 file, file, ovl_whatisit(inode, realinode), file->f_flags,
47 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
48
49 return realfile;
50}
51
52#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
53
54static int ovl_change_flags(struct file *file, unsigned int flags)
55{
56 struct inode *inode = file_inode(file);
57 int err;
58
59 flags |= OVL_OPEN_FLAGS;
60
61 /* If some flag changed that cannot be changed then something's amiss */
62 if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
63 return -EIO;
64
65 flags &= OVL_SETFL_MASK;
66
67 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
68 return -EPERM;
69
70 if (flags & O_DIRECT) {
71 if (!file->f_mapping->a_ops ||
72 !file->f_mapping->a_ops->direct_IO)
73 return -EINVAL;
74 }
75
76 if (file->f_op->check_flags) {
77 err = file->f_op->check_flags(flags);
78 if (err)
79 return err;
80 }
81
82 spin_lock(&file->f_lock);
83 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
84 spin_unlock(&file->f_lock);
85
86 return 0;
87}
88
89static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
90 bool allow_meta)
91{
92 struct inode *inode = file_inode(file);
93 struct inode *realinode;
94
95 real->flags = 0;
96 real->file = file->private_data;
97
98 if (allow_meta)
99 realinode = ovl_inode_real(inode);
100 else
101 realinode = ovl_inode_realdata(inode);
102
103 /* Has it been copied up since we'd opened it? */
104 if (unlikely(file_inode(real->file) != realinode)) {
105 real->flags = FDPUT_FPUT;
106 real->file = ovl_open_realfile(file, realinode);
107
108 return PTR_ERR_OR_ZERO(real->file);
109 }
110
111 /* Did the flags change since open? */
112 if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
113 return ovl_change_flags(real->file, file->f_flags);
114
115 return 0;
116}
117
118static int ovl_real_fdget(const struct file *file, struct fd *real)
119{
120 return ovl_real_fdget_meta(file, real, false);
121}
122
123static int ovl_open(struct inode *inode, struct file *file)
124{
125 struct file *realfile;
126 int err;
127
128 err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
129 if (err)
130 return err;
131
132 /* No longer need these flags, so don't pass them on to underlying fs */
133 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
134
135 realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
136 if (IS_ERR(realfile))
137 return PTR_ERR(realfile);
138
139 file->private_data = realfile;
140
141 return 0;
142}
143
144static int ovl_release(struct inode *inode, struct file *file)
145{
146 fput(file->private_data);
147
148 return 0;
149}
150
151static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
152{
153 struct inode *inode = file_inode(file);
154 struct fd real;
155 const struct cred *old_cred;
156 loff_t ret;
157
158 /*
159 * The two special cases below do not need to involve real fs,
160 * so we can optimizing concurrent callers.
161 */
162 if (offset == 0) {
163 if (whence == SEEK_CUR)
164 return file->f_pos;
165
166 if (whence == SEEK_SET)
167 return vfs_setpos(file, 0, 0);
168 }
169
170 ret = ovl_real_fdget(file, &real);
171 if (ret)
172 return ret;
173
174 /*
175 * Overlay file f_pos is the master copy that is preserved
176 * through copy up and modified on read/write, but only real
177 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
178 * limitations that are more strict than ->s_maxbytes for specific
179 * files, so we use the real file to perform seeks.
180 */
181 inode_lock(inode);
182 real.file->f_pos = file->f_pos;
183
184 old_cred = ovl_override_creds(inode->i_sb);
185 ret = vfs_llseek(real.file, offset, whence);
186 ovl_revert_creds(inode->i_sb, old_cred);
187
188 file->f_pos = real.file->f_pos;
189 inode_unlock(inode);
190
191 fdput(real);
192
193 return ret;
194}
195
196static void ovl_file_accessed(struct file *file)
197{
198 struct inode *inode, *upperinode;
199
200 if (file->f_flags & O_NOATIME)
201 return;
202
203 inode = file_inode(file);
204 upperinode = ovl_inode_upper(inode);
205
206 if (!upperinode)
207 return;
208
209 if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
210 !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
211 inode->i_mtime = upperinode->i_mtime;
212 inode->i_ctime = upperinode->i_ctime;
213 }
214
215 touch_atime(&file->f_path);
216}
217
218static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
219{
220 struct file *file = iocb->ki_filp;
221 struct fd real;
222 const struct cred *old_cred;
223 ssize_t ret;
224
225 if (!iov_iter_count(iter))
226 return 0;
227
228 ret = ovl_real_fdget(file, &real);
229 if (ret)
230 return ret;
231
232 old_cred = ovl_override_creds(file_inode(file)->i_sb);
233 ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
234 iocb_to_rw_flags(iocb->ki_flags, OVL_IOCB_MASK));
235 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
236
237 ovl_file_accessed(file);
238
239 fdput(real);
240
241 return ret;
242}
243
244static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
245{
246 struct file *file = iocb->ki_filp;
247 struct inode *inode = file_inode(file);
248 struct fd real;
249 const struct cred *old_cred;
250 ssize_t ret;
251
252 if (!iov_iter_count(iter))
253 return 0;
254
255 inode_lock(inode);
256 /* Update mode */
257 ovl_copyattr(ovl_inode_real(inode), inode);
258 ret = file_remove_privs(file);
259 if (ret)
260 goto out_unlock;
261
262 ret = ovl_real_fdget(file, &real);
263 if (ret)
264 goto out_unlock;
265
266 old_cred = ovl_override_creds(file_inode(file)->i_sb);
267 file_start_write(real.file);
268 ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
269 iocb_to_rw_flags(iocb->ki_flags, OVL_IOCB_MASK));
270 file_end_write(real.file);
271 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
272
273 /* Update size */
274 ovl_copyattr(ovl_inode_real(inode), inode);
275
276 fdput(real);
277
278out_unlock:
279 inode_unlock(inode);
280
281 return ret;
282}
283
284/*
285 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
286 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
287 * and file_start_write(real.file) in ovl_write_iter().
288 *
289 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
290 * the real file.
291 */
292static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
293 loff_t *ppos, size_t len, unsigned int flags)
294{
295 struct fd real;
296 const struct cred *old_cred;
297 struct inode *inode = file_inode(out);
298 struct inode *realinode = ovl_inode_real(inode);
299 ssize_t ret;
300
301 inode_lock(inode);
302 /* Update mode */
303 ovl_copyattr(realinode, inode);
304 ret = file_remove_privs(out);
305 if (ret)
306 goto out_unlock;
307
308 ret = ovl_real_fdget(out, &real);
309 if (ret)
310 goto out_unlock;
311
312 old_cred = ovl_override_creds(inode->i_sb);
313 file_start_write(real.file);
314
315 ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
316
317 file_end_write(real.file);
318 /* Update size */
319 ovl_copyattr(realinode, inode);
320 revert_creds(old_cred);
321 fdput(real);
322
323out_unlock:
324 inode_unlock(inode);
325
326 return ret;
327}
328
329static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
330{
331 struct fd real;
332 const struct cred *old_cred;
333 int ret;
334
335 ret = ovl_real_fdget_meta(file, &real, !datasync);
336 if (ret)
337 return ret;
338
339 /* Don't sync lower file for fear of receiving EROFS error */
340 if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
341 old_cred = ovl_override_creds(file_inode(file)->i_sb);
342 ret = vfs_fsync_range(real.file, start, end, datasync);
343 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
344 }
345
346 fdput(real);
347
348 return ret;
349}
350
351static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
352{
353 struct file *realfile = file->private_data;
354 const struct cred *old_cred;
355 int ret;
356
357 if (!realfile->f_op->mmap)
358 return -ENODEV;
359
360 if (WARN_ON(file != vma->vm_file))
361 return -EIO;
362
363 vma->vm_file = get_file(realfile);
364
365 old_cred = ovl_override_creds(file_inode(file)->i_sb);
366 ret = call_mmap(vma->vm_file, vma);
367 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
368
369 if (ret) {
370 /* Drop reference count from new vm_file value */
371 fput(realfile);
372 } else {
373 /* Drop reference count from previous vm_file value */
374 fput(file);
375 }
376
377 ovl_file_accessed(file);
378
379 return ret;
380}
381
382static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
383{
384 struct inode *inode = file_inode(file);
385 struct fd real;
386 const struct cred *old_cred;
387 int ret;
388
389 ret = ovl_real_fdget(file, &real);
390 if (ret)
391 return ret;
392
393 old_cred = ovl_override_creds(file_inode(file)->i_sb);
394 ret = vfs_fallocate(real.file, mode, offset, len);
395 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
396
397 /* Update size */
398 ovl_copyattr(ovl_inode_real(inode), inode);
399
400 fdput(real);
401
402 return ret;
403}
404
405static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
406{
407 struct fd real;
408 const struct cred *old_cred;
409 int ret;
410
411 ret = ovl_real_fdget(file, &real);
412 if (ret)
413 return ret;
414
415 old_cred = ovl_override_creds(file_inode(file)->i_sb);
416 ret = vfs_fadvise(real.file, offset, len, advice);
417 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
418
419 fdput(real);
420
421 return ret;
422}
423
424static long ovl_real_ioctl(struct file *file, unsigned int cmd,
425 unsigned long arg)
426{
427 struct fd real;
428 const struct cred *old_cred;
429 long ret;
430
431 ret = ovl_real_fdget(file, &real);
432 if (ret)
433 return ret;
434
435 old_cred = ovl_override_creds(file_inode(file)->i_sb);
436 ret = vfs_ioctl(real.file, cmd, arg);
437 ovl_revert_creds(file_inode(file)->i_sb, old_cred);
438
439 fdput(real);
440
441 return ret;
442}
443
444static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
445 unsigned long arg, unsigned int iflags)
446{
447 long ret;
448 struct inode *inode = file_inode(file);
449 unsigned int old_iflags;
450
451 if (!inode_owner_or_capable(inode))
452 return -EACCES;
453
454 ret = mnt_want_write_file(file);
455 if (ret)
456 return ret;
457
458 inode_lock(inode);
459
460 /* Check the capability before cred override */
461 ret = -EPERM;
462 old_iflags = READ_ONCE(inode->i_flags);
463 if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
464 !capable(CAP_LINUX_IMMUTABLE))
465 goto unlock;
466
467 ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
468 if (ret)
469 goto unlock;
470
471 ret = ovl_real_ioctl(file, cmd, arg);
472
473 ovl_copyflags(ovl_inode_real(inode), inode);
474unlock:
475 inode_unlock(inode);
476
477 mnt_drop_write_file(file);
478
479 return ret;
480
481}
482
483static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
484{
485 unsigned int iflags = 0;
486
487 if (flags & FS_SYNC_FL)
488 iflags |= S_SYNC;
489 if (flags & FS_APPEND_FL)
490 iflags |= S_APPEND;
491 if (flags & FS_IMMUTABLE_FL)
492 iflags |= S_IMMUTABLE;
493 if (flags & FS_NOATIME_FL)
494 iflags |= S_NOATIME;
495
496 return iflags;
497}
498
499static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
500 unsigned long arg)
501{
502 unsigned int flags;
503
504 if (get_user(flags, (int __user *) arg))
505 return -EFAULT;
506
507 return ovl_ioctl_set_flags(file, cmd, arg,
508 ovl_fsflags_to_iflags(flags));
509}
510
511static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
512{
513 unsigned int iflags = 0;
514
515 if (xflags & FS_XFLAG_SYNC)
516 iflags |= S_SYNC;
517 if (xflags & FS_XFLAG_APPEND)
518 iflags |= S_APPEND;
519 if (xflags & FS_XFLAG_IMMUTABLE)
520 iflags |= S_IMMUTABLE;
521 if (xflags & FS_XFLAG_NOATIME)
522 iflags |= S_NOATIME;
523
524 return iflags;
525}
526
527static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
528 unsigned long arg)
529{
530 struct fsxattr fa;
531
532 memset(&fa, 0, sizeof(fa));
533 if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
534 return -EFAULT;
535
536 return ovl_ioctl_set_flags(file, cmd, arg,
537 ovl_fsxflags_to_iflags(fa.fsx_xflags));
538}
539
540static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
541{
542 long ret;
543
544 switch (cmd) {
545 case FS_IOC_GETFLAGS:
546 case FS_IOC_FSGETXATTR:
547 ret = ovl_real_ioctl(file, cmd, arg);
548 break;
549
550 case FS_IOC_SETFLAGS:
551 ret = ovl_ioctl_set_fsflags(file, cmd, arg);
552 break;
553
554 case FS_IOC_FSSETXATTR:
555 ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
556 break;
557
558 default:
559 ret = -ENOTTY;
560 }
561
562 return ret;
563}
564
565static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
566 unsigned long arg)
567{
568 switch (cmd) {
569 case FS_IOC32_GETFLAGS:
570 cmd = FS_IOC_GETFLAGS;
571 break;
572
573 case FS_IOC32_SETFLAGS:
574 cmd = FS_IOC_SETFLAGS;
575 break;
576
577 default:
578 return -ENOIOCTLCMD;
579 }
580
581 return ovl_ioctl(file, cmd, arg);
582}
583
584enum ovl_copyop {
585 OVL_COPY,
586 OVL_CLONE,
587 OVL_DEDUPE,
588};
589
590static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
591 struct file *file_out, loff_t pos_out,
592 loff_t len, unsigned int flags, enum ovl_copyop op)
593{
594 struct inode *inode_out = file_inode(file_out);
595 struct fd real_in, real_out;
596 const struct cred *old_cred;
597 loff_t ret;
598
599 ret = ovl_real_fdget(file_out, &real_out);
600 if (ret)
601 return ret;
602
603 ret = ovl_real_fdget(file_in, &real_in);
604 if (ret) {
605 fdput(real_out);
606 return ret;
607 }
608
609 old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
610 switch (op) {
611 case OVL_COPY:
612 ret = vfs_copy_file_range(real_in.file, pos_in,
613 real_out.file, pos_out, len, flags);
614 break;
615
616 case OVL_CLONE:
617 ret = vfs_clone_file_range(real_in.file, pos_in,
618 real_out.file, pos_out, len, flags);
619 break;
620
621 case OVL_DEDUPE:
622 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
623 real_out.file, pos_out, len,
624 flags);
625 break;
626 }
627 ovl_revert_creds(file_inode(file_out)->i_sb, old_cred);
628
629 /* Update size */
630 ovl_copyattr(ovl_inode_real(inode_out), inode_out);
631
632 fdput(real_in);
633 fdput(real_out);
634
635 return ret;
636}
637
638static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
639 struct file *file_out, loff_t pos_out,
640 size_t len, unsigned int flags)
641{
642 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
643 OVL_COPY);
644}
645
646static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
647 struct file *file_out, loff_t pos_out,
648 loff_t len, unsigned int remap_flags)
649{
650 enum ovl_copyop op;
651
652 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
653 return -EINVAL;
654
655 if (remap_flags & REMAP_FILE_DEDUP)
656 op = OVL_DEDUPE;
657 else
658 op = OVL_CLONE;
659
660 /*
661 * Don't copy up because of a dedupe request, this wouldn't make sense
662 * most of the time (data would be duplicated instead of deduplicated).
663 */
664 if (op == OVL_DEDUPE &&
665 (!ovl_inode_upper(file_inode(file_in)) ||
666 !ovl_inode_upper(file_inode(file_out))))
667 return -EPERM;
668
669 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
670 remap_flags, op);
671}
672
673const struct file_operations ovl_file_operations = {
674 .open = ovl_open,
675 .release = ovl_release,
676 .llseek = ovl_llseek,
677 .read_iter = ovl_read_iter,
678 .write_iter = ovl_write_iter,
679 .fsync = ovl_fsync,
680 .mmap = ovl_mmap,
681 .fallocate = ovl_fallocate,
682 .fadvise = ovl_fadvise,
683 .unlocked_ioctl = ovl_ioctl,
684 .compat_ioctl = ovl_compat_ioctl,
685 .splice_read = generic_file_splice_read,
686 .splice_write = ovl_splice_write,
687
688 .copy_file_range = ovl_copy_file_range,
689 .remap_file_range = ovl_remap_file_range,
690};