blob: 47d3255432a14b44a3a7f650fb2fb62289f74189 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 */
23
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/hugetlb.h>
27#include <linux/shm.h>
28#include <linux/init.h>
29#include <linux/file.h>
30#include <linux/mman.h>
31#include <linux/shmem_fs.h>
32#include <linux/security.h>
33#include <linux/syscalls.h>
34#include <linux/audit.h>
35#include <linux/capability.h>
36#include <linux/ptrace.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/mount.h>
41#include <linux/ipc_namespace.h>
42
43#include <asm/uaccess.h>
44
45#include "util.h"
lh758261d2023-07-13 05:52:04 -070046#ifdef CONFIG_SYSVIPC_CROSS_SHM
47#include "shm_ctrl.h"
48#endif
lh9ed821d2023-04-07 01:36:19 -070049
50struct shm_file_data {
51 int id;
52 struct ipc_namespace *ns;
53 struct file *file;
54 const struct vm_operations_struct *vm_ops;
55};
56
57#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
58
59static const struct file_operations shm_file_operations;
60static const struct vm_operations_struct shm_vm_ops;
61
62#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
63
64#define shm_unlock(shp) \
65 ipc_unlock(&(shp)->shm_perm)
66
67static int newseg(struct ipc_namespace *, struct ipc_params *);
68static void shm_open(struct vm_area_struct *vma);
69static void shm_close(struct vm_area_struct *vma);
70static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
71#ifdef CONFIG_PROC_FS
72static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73#endif
74
lh758261d2023-07-13 05:52:04 -070075#ifdef CONFIG_SYSVIPC_CROSS_SHM
76extern int shm_remote_free_pages(struct vm_area_struct *unmap_vma);
77extern int shm_do_remote_map_vma(struct vm_area_struct *vma, key_t key);
78#endif
79
lh9ed821d2023-04-07 01:36:19 -070080void shm_init_ns(struct ipc_namespace *ns)
81{
82 ns->shm_ctlmax = SHMMAX;
83 ns->shm_ctlall = SHMALL;
84 ns->shm_ctlmni = SHMMNI;
85 ns->shm_rmid_forced = 0;
86 ns->shm_tot = 0;
87 ipc_init_ids(&shm_ids(ns));
88}
89
90/*
91 * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
92 * Only shm_ids.rw_mutex remains locked on exit.
93 */
94static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
95{
96 struct shmid_kernel *shp;
97 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
98
99 if (shp->shm_nattch){
100 shp->shm_perm.mode |= SHM_DEST;
101 /* Do not find it any more */
102 shp->shm_perm.key = IPC_PRIVATE;
103 shm_unlock(shp);
104 } else
105 shm_destroy(ns, shp);
106}
107
108#ifdef CONFIG_IPC_NS
109void shm_exit_ns(struct ipc_namespace *ns)
110{
111 free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
112 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
113}
114#endif
115
116static int __init ipc_ns_init(void)
117{
118 shm_init_ns(&init_ipc_ns);
119 return 0;
120}
121
122pure_initcall(ipc_ns_init);
123
124void __init shm_init (void)
125{
126 if (IS_ENABLED(CONFIG_PROC_STRIPPED))
127 return 0;
128
129 ipc_init_proc_interface("sysvipc/shm",
130#if BITS_PER_LONG <= 32
131 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
132#else
133 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
134#endif
135 IPC_SHM_IDS, sysvipc_shm_proc_show);
136}
137
138/*
139 * shm_lock_(check_) routines are called in the paths where the rw_mutex
140 * is not necessarily held.
141 */
142static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
143{
144 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
145
146 if (IS_ERR(ipcp))
147 return (struct shmid_kernel *)ipcp;
148
149 return container_of(ipcp, struct shmid_kernel, shm_perm);
150}
151
lh758261d2023-07-13 05:52:04 -0700152#ifdef CONFIG_SYSVIPC_CROSS_SHM
153void shm_mmap_pagetable(struct vm_area_struct *vma, struct file *file)
154{
155 int ret = 0;
156 struct shm_file_data *sfd;
157 struct shmid_kernel *shp;
158
159 sfd = shm_file_data(file);
160 shp = shm_lock(sfd->ns, sfd->id);
161
162 ret = shm_do_remote_map_vma(vma, shp->shm_perm.key);
163 if (ret < 0)
164 printk("shm_mmap_pagetable Error");
165
166 shm_unlock(shp);
167}
168#endif
169
lh9ed821d2023-04-07 01:36:19 -0700170static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
171{
172 rcu_read_lock();
173 spin_lock(&ipcp->shm_perm.lock);
174}
175
176static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
177 int id)
178{
179 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
180
181 if (IS_ERR(ipcp))
182 return (struct shmid_kernel *)ipcp;
183
184 return container_of(ipcp, struct shmid_kernel, shm_perm);
185}
186
187static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
188{
189 ipc_rmid(&shm_ids(ns), &s->shm_perm);
190}
191
192
193/* This is called by fork, once for every shm attach. */
194static void shm_open(struct vm_area_struct *vma)
195{
196 struct file *file = vma->vm_file;
197 struct shm_file_data *sfd = shm_file_data(file);
198 struct shmid_kernel *shp;
199
200 shp = shm_lock(sfd->ns, sfd->id);
201 BUG_ON(IS_ERR(shp));
202 shp->shm_atim = get_seconds();
203 shp->shm_lprid = task_tgid_vnr(current);
204 shp->shm_nattch++;
205 shm_unlock(shp);
206}
207
208/*
209 * shm_destroy - free the struct shmid_kernel
210 *
211 * @ns: namespace
212 * @shp: struct to free
213 *
214 * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
215 * but returns with shp unlocked and freed.
216 */
217static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
218{
219 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
220 shm_rmid(ns, shp);
221 shm_unlock(shp);
222 if (!is_file_hugepages(shp->shm_file))
223 shmem_lock(shp->shm_file, 0, shp->mlock_user);
224 else if (shp->mlock_user)
225 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
226 shp->mlock_user);
227 fput (shp->shm_file);
228 security_shm_free(shp);
229 ipc_rcu_putref(shp);
230}
231
232/*
233 * shm_may_destroy - identifies whether shm segment should be destroyed now
234 *
235 * Returns true if and only if there are no active users of the segment and
236 * one of the following is true:
237 *
238 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
239 *
240 * 2) sysctl kernel.shm_rmid_forced is set to 1.
241 */
242static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
243{
244 return (shp->shm_nattch == 0) &&
245 (ns->shm_rmid_forced ||
246 (shp->shm_perm.mode & SHM_DEST));
247}
248
249/*
250 * remove the attach descriptor vma.
251 * free memory for segment if it is marked destroyed.
252 * The descriptor has already been removed from the current->mm->mmap list
253 * and will later be kfree()d.
254 */
255static void shm_close(struct vm_area_struct *vma)
256{
257 struct file * file = vma->vm_file;
258 struct shm_file_data *sfd = shm_file_data(file);
259 struct shmid_kernel *shp;
260 struct ipc_namespace *ns = sfd->ns;
261
262 down_write(&shm_ids(ns).rw_mutex);
263 /* remove from the list of attaches of the shm segment */
264 shp = shm_lock(ns, sfd->id);
265 BUG_ON(IS_ERR(shp));
266 shp->shm_lprid = task_tgid_vnr(current);
267 shp->shm_dtim = get_seconds();
268 shp->shm_nattch--;
lh758261d2023-07-13 05:52:04 -0700269#ifdef CONFIG_SYSVIPC_CROSS_SHM
270 if (shp->shm_perm.rpmflag == TRUE)
271 shm_remote_free_pages(shp->shm_perm.key);
272#endif
lh9ed821d2023-04-07 01:36:19 -0700273 if (shm_may_destroy(ns, shp))
274 shm_destroy(ns, shp);
275 else
276 shm_unlock(shp);
277 up_write(&shm_ids(ns).rw_mutex);
278}
279
280/* Called with ns->shm_ids(ns).rw_mutex locked */
281static int shm_try_destroy_current(int id, void *p, void *data)
282{
283 struct ipc_namespace *ns = data;
284 struct kern_ipc_perm *ipcp = p;
285 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
286
287 if (shp->shm_creator != current)
288 return 0;
289
290 /*
291 * Mark it as orphaned to destroy the segment when
292 * kernel.shm_rmid_forced is changed.
293 * It is noop if the following shm_may_destroy() returns true.
294 */
295 shp->shm_creator = NULL;
296
297 /*
298 * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
299 * is not set, it shouldn't be deleted here.
300 */
301 if (!ns->shm_rmid_forced)
302 return 0;
303
304 if (shm_may_destroy(ns, shp)) {
305 shm_lock_by_ptr(shp);
306 shm_destroy(ns, shp);
307 }
308 return 0;
309}
310
311/* Called with ns->shm_ids(ns).rw_mutex locked */
312static int shm_try_destroy_orphaned(int id, void *p, void *data)
313{
314 struct ipc_namespace *ns = data;
315 struct kern_ipc_perm *ipcp = p;
316 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
317
318 /*
319 * We want to destroy segments without users and with already
320 * exit'ed originating process.
321 *
322 * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
323 */
324 if (shp->shm_creator != NULL)
325 return 0;
326
327 if (shm_may_destroy(ns, shp)) {
328 shm_lock_by_ptr(shp);
329 shm_destroy(ns, shp);
330 }
331 return 0;
332}
333
334void shm_destroy_orphaned(struct ipc_namespace *ns)
335{
336 down_write(&shm_ids(ns).rw_mutex);
337 if (shm_ids(ns).in_use)
338 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
339 up_write(&shm_ids(ns).rw_mutex);
340}
341
342
343void exit_shm(struct task_struct *task)
344{
345 struct ipc_namespace *ns = task->nsproxy->ipc_ns;
346
347 if (shm_ids(ns).in_use == 0)
348 return;
349
350 /* Destroy all already created segments, but not mapped yet */
351 down_write(&shm_ids(ns).rw_mutex);
352 if (shm_ids(ns).in_use)
353 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
354 up_write(&shm_ids(ns).rw_mutex);
355}
356
357static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
358{
359 struct file *file = vma->vm_file;
360 struct shm_file_data *sfd = shm_file_data(file);
361
362 return sfd->vm_ops->fault(vma, vmf);
363}
364
365#ifdef CONFIG_NUMA
366static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
367{
368 struct file *file = vma->vm_file;
369 struct shm_file_data *sfd = shm_file_data(file);
370 int err = 0;
371 if (sfd->vm_ops->set_policy)
372 err = sfd->vm_ops->set_policy(vma, new);
373 return err;
374}
375
376static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
377 unsigned long addr)
378{
379 struct file *file = vma->vm_file;
380 struct shm_file_data *sfd = shm_file_data(file);
381 struct mempolicy *pol = NULL;
382
383 if (sfd->vm_ops->get_policy)
384 pol = sfd->vm_ops->get_policy(vma, addr);
385 else if (vma->vm_policy)
386 pol = vma->vm_policy;
387
388 return pol;
389}
390#endif
391
392static int shm_mmap(struct file * file, struct vm_area_struct * vma)
393{
394 struct shm_file_data *sfd = shm_file_data(file);
395 int ret;
396
397 ret = sfd->file->f_op->mmap(sfd->file, vma);
398 if (ret != 0)
399 return ret;
400 sfd->vm_ops = vma->vm_ops;
401#ifdef CONFIG_MMU
402 BUG_ON(!sfd->vm_ops->fault);
403#endif
404 vma->vm_ops = &shm_vm_ops;
405 shm_open(vma);
406
407 return ret;
408}
409
410static int shm_release(struct inode *ino, struct file *file)
411{
412 struct shm_file_data *sfd = shm_file_data(file);
413
414 put_ipc_ns(sfd->ns);
415 shm_file_data(file) = NULL;
416 kfree(sfd);
417 return 0;
418}
419
420static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
421{
422 struct shm_file_data *sfd = shm_file_data(file);
423
424 if (!sfd->file->f_op->fsync)
425 return -EINVAL;
426 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
427}
428
429static unsigned long shm_get_unmapped_area(struct file *file,
430 unsigned long addr, unsigned long len, unsigned long pgoff,
431 unsigned long flags)
432{
433 struct shm_file_data *sfd = shm_file_data(file);
434 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
435 pgoff, flags);
436}
437
438static const struct file_operations shm_file_operations = {
439 .mmap = shm_mmap,
440 .fsync = shm_fsync,
441 .release = shm_release,
442#ifndef CONFIG_MMU
443 .get_unmapped_area = shm_get_unmapped_area,
444#endif
445 .llseek = noop_llseek,
446};
447
448static const struct file_operations shm_file_operations_huge = {
449 .mmap = shm_mmap,
450 .fsync = shm_fsync,
451 .release = shm_release,
452 .get_unmapped_area = shm_get_unmapped_area,
453 .llseek = noop_llseek,
454};
455
456int is_file_shm_hugepages(struct file *file)
457{
458 return file->f_op == &shm_file_operations_huge;
459}
460
461static const struct vm_operations_struct shm_vm_ops = {
462 .open = shm_open, /* callback for a new vm-area open */
463 .close = shm_close, /* callback for when the vm-area is released */
464 .fault = shm_fault,
465#if defined(CONFIG_NUMA)
466 .set_policy = shm_set_policy,
467 .get_policy = shm_get_policy,
468#endif
469};
470
471/**
472 * newseg - Create a new shared memory segment
473 * @ns: namespace
474 * @params: ptr to the structure that contains key, size and shmflg
475 *
476 * Called with shm_ids.rw_mutex held as a writer.
477 */
478
479static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
480{
481 key_t key = params->key;
482 int shmflg = params->flg;
483 size_t size = params->u.size;
484 int error;
485 struct shmid_kernel *shp;
486 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
487 struct file * file;
488 char name[13];
489 int id;
490 vm_flags_t acctflag = 0;
491
492 if (size < SHMMIN || size > ns->shm_ctlmax)
493 return -EINVAL;
494
495 if (ns->shm_tot + numpages > ns->shm_ctlall)
496 return -ENOSPC;
497
498 shp = ipc_rcu_alloc(sizeof(*shp));
499 if (!shp)
500 return -ENOMEM;
501
502 shp->shm_perm.key = key;
503 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
504 shp->mlock_user = NULL;
505
506 shp->shm_perm.security = NULL;
507 error = security_shm_alloc(shp);
508 if (error) {
509 ipc_rcu_putref(shp);
510 return error;
511 }
512
513 sprintf (name, "SYSV%08x", key);
514 if (shmflg & SHM_HUGETLB) {
515 size_t hugesize = ALIGN(size, huge_page_size(&default_hstate));
516
517 /* hugetlb_file_setup applies strict accounting */
518 if (shmflg & SHM_NORESERVE)
519 acctflag = VM_NORESERVE;
520 file = hugetlb_file_setup(name, hugesize, acctflag,
521 &shp->mlock_user, HUGETLB_SHMFS_INODE);
522 } else {
523 /*
524 * Do not allow no accounting for OVERCOMMIT_NEVER, even
525 * if it's asked for.
526 */
527 if ((shmflg & SHM_NORESERVE) &&
528 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
529 acctflag = VM_NORESERVE;
530 file = shmem_file_setup(name, size, acctflag);
531 }
532 error = PTR_ERR(file);
533 if (IS_ERR(file))
534 goto no_file;
535
536 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
537 if (id < 0) {
538 error = id;
539 goto no_id;
540 }
541
542 shp->shm_cprid = task_tgid_vnr(current);
543 shp->shm_lprid = 0;
544 shp->shm_atim = shp->shm_dtim = 0;
545 shp->shm_ctim = get_seconds();
546 shp->shm_segsz = size;
547 shp->shm_nattch = 0;
548 shp->shm_file = file;
549 shp->shm_creator = current;
lh758261d2023-07-13 05:52:04 -0700550#ifdef CONFIG_SYSVIPC_CROSS_SHM
551 if((key & SHM_REMOTE_ATTR_MASK) == SHM_REMOTE_ATTR_MASK)
552 shp->shm_perm.rpmflag = TRUE;
553 else
554 shp->shm_perm.rpmflag = FALSE;
555#endif
lh9ed821d2023-04-07 01:36:19 -0700556 /*
557 * shmid gets reported as "inode#" in /proc/pid/maps.
558 * proc-ps tools use this. Changing this will break them.
559 */
560 file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
561
562 ns->shm_tot += numpages;
563 error = shp->shm_perm.id;
564 shm_unlock(shp);
565 return error;
566
567no_id:
568 if (is_file_hugepages(file) && shp->mlock_user)
569 user_shm_unlock(size, shp->mlock_user);
570 fput(file);
571no_file:
572 security_shm_free(shp);
573 ipc_rcu_putref(shp);
574 return error;
575}
576
577/*
578 * Called with shm_ids.rw_mutex and ipcp locked.
579 */
580static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
581{
582 struct shmid_kernel *shp;
583
584 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
585 return security_shm_associate(shp, shmflg);
586}
587
588/*
589 * Called with shm_ids.rw_mutex and ipcp locked.
590 */
591static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
592 struct ipc_params *params)
593{
594 struct shmid_kernel *shp;
595
596 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
597 if (shp->shm_segsz < params->u.size)
598 return -EINVAL;
599
600 return 0;
601}
602
603SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
604{
605 struct ipc_namespace *ns;
606 struct ipc_ops shm_ops;
607 struct ipc_params shm_params;
608
609 ns = current->nsproxy->ipc_ns;
610
611 shm_ops.getnew = newseg;
612 shm_ops.associate = shm_security;
613 shm_ops.more_checks = shm_more_checks;
614
615 shm_params.key = key;
616 shm_params.flg = shmflg;
617 shm_params.u.size = size;
618
619 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
620}
621
622static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
623{
624 switch(version) {
625 case IPC_64:
626 return copy_to_user(buf, in, sizeof(*in));
627 case IPC_OLD:
628 {
629 struct shmid_ds out;
630
631 memset(&out, 0, sizeof(out));
632 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
633 out.shm_segsz = in->shm_segsz;
634 out.shm_atime = in->shm_atime;
635 out.shm_dtime = in->shm_dtime;
636 out.shm_ctime = in->shm_ctime;
637 out.shm_cpid = in->shm_cpid;
638 out.shm_lpid = in->shm_lpid;
639 out.shm_nattch = in->shm_nattch;
640
641 return copy_to_user(buf, &out, sizeof(out));
642 }
643 default:
644 return -EINVAL;
645 }
646}
647
648static inline unsigned long
649copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
650{
651 switch(version) {
652 case IPC_64:
653 if (copy_from_user(out, buf, sizeof(*out)))
654 return -EFAULT;
655 return 0;
656 case IPC_OLD:
657 {
658 struct shmid_ds tbuf_old;
659
660 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
661 return -EFAULT;
662
663 out->shm_perm.uid = tbuf_old.shm_perm.uid;
664 out->shm_perm.gid = tbuf_old.shm_perm.gid;
665 out->shm_perm.mode = tbuf_old.shm_perm.mode;
666
667 return 0;
668 }
669 default:
670 return -EINVAL;
671 }
672}
673
674static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
675{
676 switch(version) {
677 case IPC_64:
678 return copy_to_user(buf, in, sizeof(*in));
679 case IPC_OLD:
680 {
681 struct shminfo out;
682
683 if(in->shmmax > INT_MAX)
684 out.shmmax = INT_MAX;
685 else
686 out.shmmax = (int)in->shmmax;
687
688 out.shmmin = in->shmmin;
689 out.shmmni = in->shmmni;
690 out.shmseg = in->shmseg;
691 out.shmall = in->shmall;
692
693 return copy_to_user(buf, &out, sizeof(out));
694 }
695 default:
696 return -EINVAL;
697 }
698}
699
700/*
701 * Calculate and add used RSS and swap pages of a shm.
702 * Called with shm_ids.rw_mutex held as a reader
703 */
704static void shm_add_rss_swap(struct shmid_kernel *shp,
705 unsigned long *rss_add, unsigned long *swp_add)
706{
707 struct inode *inode;
708
709 inode = shp->shm_file->f_path.dentry->d_inode;
710
711 if (is_file_hugepages(shp->shm_file)) {
712 struct address_space *mapping = inode->i_mapping;
713 struct hstate *h = hstate_file(shp->shm_file);
714 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
715 } else {
716#ifdef CONFIG_SHMEM
717 struct shmem_inode_info *info = SHMEM_I(inode);
718 spin_lock(&info->lock);
719 *rss_add += inode->i_mapping->nrpages;
720 *swp_add += info->swapped;
721 spin_unlock(&info->lock);
722#else
723 *rss_add += inode->i_mapping->nrpages;
724#endif
725 }
726}
727
728/*
729 * Called with shm_ids.rw_mutex held as a reader
730 */
731static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
732 unsigned long *swp)
733{
734 int next_id;
735 int total, in_use;
736
737 *rss = 0;
738 *swp = 0;
739
740 in_use = shm_ids(ns).in_use;
741
742 for (total = 0, next_id = 0; total < in_use; next_id++) {
743 struct kern_ipc_perm *ipc;
744 struct shmid_kernel *shp;
745
746 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
747 if (ipc == NULL)
748 continue;
749 shp = container_of(ipc, struct shmid_kernel, shm_perm);
750
751 shm_add_rss_swap(shp, rss, swp);
752
753 total++;
754 }
755}
756
757/*
758 * This function handles some shmctl commands which require the rw_mutex
759 * to be held in write mode.
760 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
761 */
762static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
763 struct shmid_ds __user *buf, int version)
764{
765 struct kern_ipc_perm *ipcp;
766 struct shmid64_ds shmid64;
767 struct shmid_kernel *shp;
768 int err;
769
770 if (cmd == IPC_SET) {
771 if (copy_shmid_from_user(&shmid64, buf, version))
772 return -EFAULT;
773 }
774
775 ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
776 &shmid64.shm_perm, 0);
777 if (IS_ERR(ipcp))
778 return PTR_ERR(ipcp);
779
780 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
781
782 err = security_shm_shmctl(shp, cmd);
783 if (err)
784 goto out_unlock;
785 switch (cmd) {
786 case IPC_RMID:
787 do_shm_rmid(ns, ipcp);
788 goto out_up;
789 case IPC_SET:
790 ipc_update_perm(&shmid64.shm_perm, ipcp);
791 shp->shm_ctim = get_seconds();
792 break;
793 default:
794 err = -EINVAL;
795 }
796out_unlock:
797 shm_unlock(shp);
798out_up:
799 up_write(&shm_ids(ns).rw_mutex);
800 return err;
801}
802
803SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
804{
805 struct shmid_kernel *shp;
806 int err, version;
807 struct ipc_namespace *ns;
808
809 if (cmd < 0 || shmid < 0) {
810 err = -EINVAL;
811 goto out;
812 }
813
814 version = ipc_parse_version(&cmd);
815 ns = current->nsproxy->ipc_ns;
816
817 switch (cmd) { /* replace with proc interface ? */
818 case IPC_INFO:
819 {
820 struct shminfo64 shminfo;
821
822 err = security_shm_shmctl(NULL, cmd);
823 if (err)
824 return err;
825
826 memset(&shminfo, 0, sizeof(shminfo));
827 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
828 shminfo.shmmax = ns->shm_ctlmax;
829 shminfo.shmall = ns->shm_ctlall;
830
831 shminfo.shmmin = SHMMIN;
832 if(copy_shminfo_to_user (buf, &shminfo, version))
833 return -EFAULT;
834
835 down_read(&shm_ids(ns).rw_mutex);
836 err = ipc_get_maxid(&shm_ids(ns));
837 up_read(&shm_ids(ns).rw_mutex);
838
839 if(err<0)
840 err = 0;
841 goto out;
842 }
843 case SHM_INFO:
844 {
845 struct shm_info shm_info;
846
847 err = security_shm_shmctl(NULL, cmd);
848 if (err)
849 return err;
850
851 memset(&shm_info, 0, sizeof(shm_info));
852 down_read(&shm_ids(ns).rw_mutex);
853 shm_info.used_ids = shm_ids(ns).in_use;
854 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
855 shm_info.shm_tot = ns->shm_tot;
856 shm_info.swap_attempts = 0;
857 shm_info.swap_successes = 0;
858 err = ipc_get_maxid(&shm_ids(ns));
859 up_read(&shm_ids(ns).rw_mutex);
860 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
861 err = -EFAULT;
862 goto out;
863 }
864
865 err = err < 0 ? 0 : err;
866 goto out;
867 }
868 case SHM_STAT:
869 case IPC_STAT:
870 {
871 struct shmid64_ds tbuf;
872 int result;
873
874 if (cmd == SHM_STAT) {
875 shp = shm_lock(ns, shmid);
876 if (IS_ERR(shp)) {
877 err = PTR_ERR(shp);
878 goto out;
879 }
880 result = shp->shm_perm.id;
881 } else {
882 shp = shm_lock_check(ns, shmid);
883 if (IS_ERR(shp)) {
884 err = PTR_ERR(shp);
885 goto out;
886 }
887 result = 0;
888 }
889 err = -EACCES;
890 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
891 goto out_unlock;
892 err = security_shm_shmctl(shp, cmd);
893 if (err)
894 goto out_unlock;
895 memset(&tbuf, 0, sizeof(tbuf));
896 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
897 tbuf.shm_segsz = shp->shm_segsz;
898 tbuf.shm_atime = shp->shm_atim;
899 tbuf.shm_dtime = shp->shm_dtim;
900 tbuf.shm_ctime = shp->shm_ctim;
901 tbuf.shm_cpid = shp->shm_cprid;
902 tbuf.shm_lpid = shp->shm_lprid;
903 tbuf.shm_nattch = shp->shm_nattch;
904 shm_unlock(shp);
905 if(copy_shmid_to_user (buf, &tbuf, version))
906 err = -EFAULT;
907 else
908 err = result;
909 goto out;
910 }
911 case SHM_LOCK:
912 case SHM_UNLOCK:
913 {
914 struct file *shm_file;
915
916 shp = shm_lock_check(ns, shmid);
917 if (IS_ERR(shp)) {
918 err = PTR_ERR(shp);
919 goto out;
920 }
921
922 audit_ipc_obj(&(shp->shm_perm));
923
924 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
925 uid_t euid = current_euid();
926 err = -EPERM;
927 if (euid != shp->shm_perm.uid &&
928 euid != shp->shm_perm.cuid)
929 goto out_unlock;
930 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
931 goto out_unlock;
932 }
933
934 err = security_shm_shmctl(shp, cmd);
935 if (err)
936 goto out_unlock;
937
938 shm_file = shp->shm_file;
939 if (is_file_hugepages(shm_file))
940 goto out_unlock;
941
942 if (cmd == SHM_LOCK) {
943 struct user_struct *user = current_user();
944 err = shmem_lock(shm_file, 1, user);
945 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
946 shp->shm_perm.mode |= SHM_LOCKED;
947 shp->mlock_user = user;
948 }
949 goto out_unlock;
950 }
951
952 /* SHM_UNLOCK */
953 if (!(shp->shm_perm.mode & SHM_LOCKED))
954 goto out_unlock;
955 shmem_lock(shm_file, 0, shp->mlock_user);
956 shp->shm_perm.mode &= ~SHM_LOCKED;
957 shp->mlock_user = NULL;
958 get_file(shm_file);
959 shm_unlock(shp);
960 shmem_unlock_mapping(shm_file->f_mapping);
961 fput(shm_file);
962 goto out;
963 }
964 case IPC_RMID:
965 case IPC_SET:
966 err = shmctl_down(ns, shmid, cmd, buf, version);
967 return err;
968 default:
969 return -EINVAL;
970 }
971
972out_unlock:
973 shm_unlock(shp);
974out:
975 return err;
976}
977
978/*
979 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
980 *
981 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
982 * "raddr" thing points to kernel space, and there has to be a wrapper around
983 * this.
984 */
985long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
986{
987 struct shmid_kernel *shp;
988 unsigned long addr;
989 unsigned long size;
990 struct file * file;
991 int err;
992 unsigned long flags;
993 unsigned long prot;
994 int acc_mode;
995 unsigned long user_addr;
996 struct ipc_namespace *ns;
997 struct shm_file_data *sfd;
998 struct path path;
999 fmode_t f_mode;
1000
1001 err = -EINVAL;
1002 if (shmid < 0)
1003 goto out;
1004 else if ((addr = (ulong)shmaddr)) {
1005 if (addr & (SHMLBA-1)) {
1006 if (shmflg & SHM_RND){
1007 addr &= ~(SHMLBA-1); /* round down */
1008
1009 /*Fix for HUB CVE-2017-5669
1010 * Ensure that the round-down is non-nil
1011 * when remapping. This can happen for
1012 * cases when addr < shmlba.
1013 */
1014 if (!addr && (shmflg & SHM_REMAP))
1015 goto out;
1016 }else
1017
1018#ifndef __ARCH_FORCE_SHMLBA
1019 if (addr & ~PAGE_MASK)
1020#endif
1021 goto out;
1022 }
1023 flags = MAP_SHARED | MAP_FIXED;
1024 } else {
1025 if ((shmflg & SHM_REMAP))
1026 goto out;
1027
1028 flags = MAP_SHARED;
1029 }
1030
1031 if (shmflg & SHM_RDONLY) {
1032 prot = PROT_READ;
1033 acc_mode = S_IRUGO;
1034 f_mode = FMODE_READ;
1035 } else {
1036 prot = PROT_READ | PROT_WRITE;
1037 acc_mode = S_IRUGO | S_IWUGO;
1038 f_mode = FMODE_READ | FMODE_WRITE;
1039 }
1040 if (shmflg & SHM_EXEC) {
1041 prot |= PROT_EXEC;
1042 acc_mode |= S_IXUGO;
1043 }
1044
1045 /*
1046 * We cannot rely on the fs check since SYSV IPC does have an
1047 * additional creator id...
1048 */
1049 ns = current->nsproxy->ipc_ns;
1050 shp = shm_lock_check(ns, shmid);
1051 if (IS_ERR(shp)) {
1052 err = PTR_ERR(shp);
1053 goto out;
1054 }
1055
1056 err = -EACCES;
1057 if (ipcperms(ns, &shp->shm_perm, acc_mode))
1058 goto out_unlock;
1059
1060 err = security_shm_shmat(shp, shmaddr, shmflg);
1061 if (err)
1062 goto out_unlock;
1063
1064 path = shp->shm_file->f_path;
1065 path_get(&path);
1066 shp->shm_nattch++;
1067 size = i_size_read(path.dentry->d_inode);
1068 shm_unlock(shp);
1069
1070 err = -ENOMEM;
1071 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1072 if (!sfd)
1073 goto out_put_dentry;
1074
1075 file = alloc_file(&path, f_mode,
1076 is_file_hugepages(shp->shm_file) ?
1077 &shm_file_operations_huge :
1078 &shm_file_operations);
1079 if (!file)
1080 goto out_free;
1081
1082 file->private_data = sfd;
1083 file->f_mapping = shp->shm_file->f_mapping;
1084 sfd->id = shp->shm_perm.id;
1085 sfd->ns = get_ipc_ns(ns);
1086 sfd->file = shp->shm_file;
1087 sfd->vm_ops = NULL;
lh758261d2023-07-13 05:52:04 -07001088#ifdef CONFIG_SYSVIPC_CROSS_SHM
1089 if(shp->shm_perm.rpmflag == TRUE)
1090 file->f_flags = SHM_REMOTE_ATTR_YES;
1091#endif
lh9ed821d2023-04-07 01:36:19 -07001092 down_write(&current->mm->mmap_sem);
1093 if (addr && !(shmflg & SHM_REMAP)) {
1094 err = -EINVAL;
1095 if (find_vma_intersection(current->mm, addr, addr + size))
1096 goto invalid;
1097 /*
1098 * If shm segment goes below stack, make sure there is some
1099 * space left for the stack to grow (at least 4 pages).
1100 */
1101 if (addr < current->mm->start_stack &&
1102 addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1103 goto invalid;
1104 }
1105
1106 user_addr = do_mmap (file, addr, size, prot, flags, 0);
1107 *raddr = user_addr;
1108 err = 0;
1109 if (IS_ERR_VALUE(user_addr))
1110 err = (long)user_addr;
1111invalid:
1112 up_write(&current->mm->mmap_sem);
1113
1114 fput(file);
1115
1116out_nattch:
1117 down_write(&shm_ids(ns).rw_mutex);
1118 shp = shm_lock(ns, shmid);
1119 BUG_ON(IS_ERR(shp));
1120 shp->shm_nattch--;
1121 if (shm_may_destroy(ns, shp))
1122 shm_destroy(ns, shp);
1123 else
1124 shm_unlock(shp);
1125 up_write(&shm_ids(ns).rw_mutex);
1126
1127out:
1128 return err;
1129
1130out_unlock:
1131 shm_unlock(shp);
1132 goto out;
1133
1134out_free:
1135 kfree(sfd);
1136out_put_dentry:
1137 path_put(&path);
1138 goto out_nattch;
1139}
1140
1141SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1142{
1143 unsigned long ret;
1144 long err;
1145
1146 err = do_shmat(shmid, shmaddr, shmflg, &ret);
1147 if (err)
1148 return err;
1149 force_successful_syscall_return();
1150 return (long)ret;
1151}
1152
1153/*
1154 * detach and kill segment if marked destroyed.
1155 * The work is done in shm_close.
1156 */
1157SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1158{
1159 struct mm_struct *mm = current->mm;
1160 struct vm_area_struct *vma;
1161 unsigned long addr = (unsigned long)shmaddr;
1162 int retval = -EINVAL;
1163#ifdef CONFIG_MMU
1164 loff_t size = 0;
1165 struct vm_area_struct *next;
1166#endif
1167
1168 if (addr & ~PAGE_MASK)
1169 return retval;
1170
1171 down_write(&mm->mmap_sem);
1172
1173 /*
1174 * This function tries to be smart and unmap shm segments that
1175 * were modified by partial mlock or munmap calls:
1176 * - It first determines the size of the shm segment that should be
1177 * unmapped: It searches for a vma that is backed by shm and that
1178 * started at address shmaddr. It records it's size and then unmaps
1179 * it.
1180 * - Then it unmaps all shm vmas that started at shmaddr and that
1181 * are within the initially determined size.
1182 * Errors from do_munmap are ignored: the function only fails if
1183 * it's called with invalid parameters or if it's called to unmap
1184 * a part of a vma. Both calls in this function are for full vmas,
1185 * the parameters are directly copied from the vma itself and always
1186 * valid - therefore do_munmap cannot fail. (famous last words?)
1187 */
1188 /*
1189 * If it had been mremap()'d, the starting address would not
1190 * match the usual checks anyway. So assume all vma's are
1191 * above the starting address given.
1192 */
1193 vma = find_vma(mm, addr);
1194
1195#ifdef CONFIG_MMU
1196 while (vma) {
1197 next = vma->vm_next;
1198
1199 /*
1200 * Check if the starting address would match, i.e. it's
1201 * a fragment created by mprotect() and/or munmap(), or it
1202 * otherwise it starts at this address with no hassles.
1203 */
1204 if ((vma->vm_ops == &shm_vm_ops) &&
1205 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1206
1207
1208 size = vma->vm_file->f_path.dentry->d_inode->i_size;
1209 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1210 /*
1211 * We discovered the size of the shm segment, so
1212 * break out of here and fall through to the next
1213 * loop that uses the size information to stop
1214 * searching for matching vma's.
1215 */
1216 retval = 0;
1217 vma = next;
1218 break;
1219 }
1220 vma = next;
1221 }
1222
1223 /*
1224 * We need look no further than the maximum address a fragment
1225 * could possibly have landed at. Also cast things to loff_t to
1226 * prevent overflows and make comparisons vs. equal-width types.
1227 */
1228 size = PAGE_ALIGN(size);
1229 while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1230 next = vma->vm_next;
1231
1232 /* finding a matching vma now does not alter retval */
1233 if ((vma->vm_ops == &shm_vm_ops) &&
1234 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1235
1236 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1237 vma = next;
1238 }
1239
1240#else /* CONFIG_MMU */
1241 /* under NOMMU conditions, the exact address to be destroyed must be
1242 * given */
1243 retval = -EINVAL;
1244 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1245 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1246 retval = 0;
1247 }
1248
1249#endif
1250
1251 up_write(&mm->mmap_sem);
1252 return retval;
1253}
1254
1255#ifdef CONFIG_PROC_FS
1256static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1257{
1258 struct shmid_kernel *shp = it;
1259 unsigned long rss = 0, swp = 0;
1260
1261 shm_add_rss_swap(shp, &rss, &swp);
1262
1263#if BITS_PER_LONG <= 32
1264#define SIZE_SPEC "%10lu"
1265#else
1266#define SIZE_SPEC "%21lu"
1267#endif
1268
1269 return seq_printf(s,
1270 "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1271 "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1272 SIZE_SPEC " " SIZE_SPEC "\n",
1273 shp->shm_perm.key,
1274 shp->shm_perm.id,
1275 shp->shm_perm.mode,
1276 shp->shm_segsz,
1277 shp->shm_cprid,
1278 shp->shm_lprid,
1279 shp->shm_nattch,
1280 shp->shm_perm.uid,
1281 shp->shm_perm.gid,
1282 shp->shm_perm.cuid,
1283 shp->shm_perm.cgid,
1284 shp->shm_atim,
1285 shp->shm_dtim,
1286 shp->shm_ctim,
1287 rss * PAGE_SIZE,
1288 swp * PAGE_SIZE);
1289}
1290#endif