blob: 8bf7f7b84993cbd32124ec506f7cf832ceaf0f5f [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 */
23
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/hugetlb.h>
27#include <linux/shm.h>
28#include <linux/init.h>
29#include <linux/file.h>
30#include <linux/mman.h>
31#include <linux/shmem_fs.h>
32#include <linux/security.h>
33#include <linux/syscalls.h>
34#include <linux/audit.h>
35#include <linux/capability.h>
36#include <linux/ptrace.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/mount.h>
41#include <linux/ipc_namespace.h>
42
43#include <asm/uaccess.h>
44
45#include "util.h"
lh758261d2023-07-13 05:52:04 -070046#ifdef CONFIG_SYSVIPC_CROSS_SHM
47#include "shm_ctrl.h"
48#endif
lh9ed821d2023-04-07 01:36:19 -070049
50struct shm_file_data {
51 int id;
52 struct ipc_namespace *ns;
53 struct file *file;
54 const struct vm_operations_struct *vm_ops;
55};
56
57#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
58
59static const struct file_operations shm_file_operations;
60static const struct vm_operations_struct shm_vm_ops;
61
62#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
63
64#define shm_unlock(shp) \
65 ipc_unlock(&(shp)->shm_perm)
66
67static int newseg(struct ipc_namespace *, struct ipc_params *);
68static void shm_open(struct vm_area_struct *vma);
69static void shm_close(struct vm_area_struct *vma);
70static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
71#ifdef CONFIG_PROC_FS
72static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73#endif
74
75void shm_init_ns(struct ipc_namespace *ns)
76{
77 ns->shm_ctlmax = SHMMAX;
78 ns->shm_ctlall = SHMALL;
79 ns->shm_ctlmni = SHMMNI;
80 ns->shm_rmid_forced = 0;
81 ns->shm_tot = 0;
82 ipc_init_ids(&shm_ids(ns));
83}
84
85/*
86 * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
87 * Only shm_ids.rw_mutex remains locked on exit.
88 */
89static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
90{
91 struct shmid_kernel *shp;
92 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
93
94 if (shp->shm_nattch){
95 shp->shm_perm.mode |= SHM_DEST;
96 /* Do not find it any more */
97 shp->shm_perm.key = IPC_PRIVATE;
98 shm_unlock(shp);
99 } else
100 shm_destroy(ns, shp);
101}
102
103#ifdef CONFIG_IPC_NS
104void shm_exit_ns(struct ipc_namespace *ns)
105{
106 free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
107 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
108}
109#endif
110
111static int __init ipc_ns_init(void)
112{
113 shm_init_ns(&init_ipc_ns);
114 return 0;
115}
116
117pure_initcall(ipc_ns_init);
118
119void __init shm_init (void)
120{
121 if (IS_ENABLED(CONFIG_PROC_STRIPPED))
122 return 0;
123
124 ipc_init_proc_interface("sysvipc/shm",
125#if BITS_PER_LONG <= 32
126 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
127#else
128 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
129#endif
130 IPC_SHM_IDS, sysvipc_shm_proc_show);
131}
132
133/*
134 * shm_lock_(check_) routines are called in the paths where the rw_mutex
135 * is not necessarily held.
136 */
137static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
138{
139 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
140
141 if (IS_ERR(ipcp))
142 return (struct shmid_kernel *)ipcp;
143
144 return container_of(ipcp, struct shmid_kernel, shm_perm);
145}
146
lh758261d2023-07-13 05:52:04 -0700147#ifdef CONFIG_SYSVIPC_CROSS_SHM
xf.li6c8fc1e2023-08-12 00:11:09 -0700148key_t shm_do_remote_analy_key(struct file *file)
lh758261d2023-07-13 05:52:04 -0700149{
xf.li6c8fc1e2023-08-12 00:11:09 -0700150 key_t key = 0;
151 char *shm_name = NULL;
lh758261d2023-07-13 05:52:04 -0700152 struct shm_file_data *sfd;
153 struct shmid_kernel *shp;
xf.li6c8fc1e2023-08-12 00:11:09 -0700154 struct path *shm_path = NULL;
155 struct dentry *shm_dentry = NULL;
lh758261d2023-07-13 05:52:04 -0700156
xf.li6c8fc1e2023-08-12 00:11:09 -0700157 if (file && (file->shm_flags == SHM_REMOTE_SYSV_YES))
158 {
159 sfd = shm_file_data(file);
160 shp = shm_lock(sfd->ns, sfd->id);
161 key = shp->shm_perm.key;
162 shm_unlock(shp);
163 }
164 else if (file && (file->shm_flags == SHM_REMOTE_POSIX_YES))
165 {
166 shm_path = &file->f_path;
167
168 if(shm_path && (shm_path->dentry))
169 {
170 shm_name = shm_path->dentry->d_name.name;
171 if (!shm_name)
172 panic("shm_posix_mmap_pagetable name is NULL\n");
173 }
174 key = shm_hash_name_to_key(shm_name, strlen(shm_name));
175 }
176 return key;
177}
lh758261d2023-07-13 05:52:04 -0700178
xf.li6c8fc1e2023-08-12 00:11:09 -0700179int shm_ipc_mmap_pagetable(struct vm_area_struct *vma, struct file *file)
180{
181 int ret = 0;
182 key_t shm_key = 0;
183
184 shm_key = shm_do_remote_analy_key(file);
185 ret = shm_do_remote_map_vma(vma, shm_key);
lh758261d2023-07-13 05:52:04 -0700186 if (ret < 0)
xf.li6c8fc1e2023-08-12 00:11:09 -0700187 {
188 printk("shm_ipc_mmap_pagetable Error: No Mem\n");
189 return -ENOMEM;
190 }
191 return ret;
lh758261d2023-07-13 05:52:04 -0700192}
193#endif
194
lh9ed821d2023-04-07 01:36:19 -0700195static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
196{
197 rcu_read_lock();
198 spin_lock(&ipcp->shm_perm.lock);
199}
200
201static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
202 int id)
203{
204 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
205
206 if (IS_ERR(ipcp))
207 return (struct shmid_kernel *)ipcp;
208
209 return container_of(ipcp, struct shmid_kernel, shm_perm);
210}
211
212static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
213{
214 ipc_rmid(&shm_ids(ns), &s->shm_perm);
215}
216
217
218/* This is called by fork, once for every shm attach. */
219static void shm_open(struct vm_area_struct *vma)
220{
221 struct file *file = vma->vm_file;
222 struct shm_file_data *sfd = shm_file_data(file);
223 struct shmid_kernel *shp;
224
225 shp = shm_lock(sfd->ns, sfd->id);
226 BUG_ON(IS_ERR(shp));
227 shp->shm_atim = get_seconds();
228 shp->shm_lprid = task_tgid_vnr(current);
229 shp->shm_nattch++;
230 shm_unlock(shp);
231}
232
233/*
234 * shm_destroy - free the struct shmid_kernel
235 *
236 * @ns: namespace
237 * @shp: struct to free
238 *
239 * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
240 * but returns with shp unlocked and freed.
241 */
242static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
243{
244 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
245 shm_rmid(ns, shp);
246 shm_unlock(shp);
247 if (!is_file_hugepages(shp->shm_file))
248 shmem_lock(shp->shm_file, 0, shp->mlock_user);
249 else if (shp->mlock_user)
250 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
251 shp->mlock_user);
252 fput (shp->shm_file);
253 security_shm_free(shp);
254 ipc_rcu_putref(shp);
255}
256
257/*
258 * shm_may_destroy - identifies whether shm segment should be destroyed now
259 *
260 * Returns true if and only if there are no active users of the segment and
261 * one of the following is true:
262 *
263 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
264 *
265 * 2) sysctl kernel.shm_rmid_forced is set to 1.
266 */
267static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
268{
269 return (shp->shm_nattch == 0) &&
270 (ns->shm_rmid_forced ||
271 (shp->shm_perm.mode & SHM_DEST));
272}
273
274/*
275 * remove the attach descriptor vma.
276 * free memory for segment if it is marked destroyed.
277 * The descriptor has already been removed from the current->mm->mmap list
278 * and will later be kfree()d.
279 */
280static void shm_close(struct vm_area_struct *vma)
281{
282 struct file * file = vma->vm_file;
283 struct shm_file_data *sfd = shm_file_data(file);
284 struct shmid_kernel *shp;
285 struct ipc_namespace *ns = sfd->ns;
286
287 down_write(&shm_ids(ns).rw_mutex);
288 /* remove from the list of attaches of the shm segment */
289 shp = shm_lock(ns, sfd->id);
290 BUG_ON(IS_ERR(shp));
291 shp->shm_lprid = task_tgid_vnr(current);
292 shp->shm_dtim = get_seconds();
293 shp->shm_nattch--;
xf.li6c8fc1e2023-08-12 00:11:09 -0700294
lh9ed821d2023-04-07 01:36:19 -0700295 if (shm_may_destroy(ns, shp))
296 shm_destroy(ns, shp);
297 else
298 shm_unlock(shp);
299 up_write(&shm_ids(ns).rw_mutex);
300}
301
302/* Called with ns->shm_ids(ns).rw_mutex locked */
303static int shm_try_destroy_current(int id, void *p, void *data)
304{
305 struct ipc_namespace *ns = data;
306 struct kern_ipc_perm *ipcp = p;
307 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
308
309 if (shp->shm_creator != current)
310 return 0;
311
312 /*
313 * Mark it as orphaned to destroy the segment when
314 * kernel.shm_rmid_forced is changed.
315 * It is noop if the following shm_may_destroy() returns true.
316 */
317 shp->shm_creator = NULL;
318
319 /*
320 * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
321 * is not set, it shouldn't be deleted here.
322 */
323 if (!ns->shm_rmid_forced)
324 return 0;
325
326 if (shm_may_destroy(ns, shp)) {
327 shm_lock_by_ptr(shp);
328 shm_destroy(ns, shp);
329 }
330 return 0;
331}
332
333/* Called with ns->shm_ids(ns).rw_mutex locked */
334static int shm_try_destroy_orphaned(int id, void *p, void *data)
335{
336 struct ipc_namespace *ns = data;
337 struct kern_ipc_perm *ipcp = p;
338 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
339
340 /*
341 * We want to destroy segments without users and with already
342 * exit'ed originating process.
343 *
344 * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
345 */
346 if (shp->shm_creator != NULL)
347 return 0;
348
349 if (shm_may_destroy(ns, shp)) {
350 shm_lock_by_ptr(shp);
351 shm_destroy(ns, shp);
352 }
353 return 0;
354}
355
356void shm_destroy_orphaned(struct ipc_namespace *ns)
357{
358 down_write(&shm_ids(ns).rw_mutex);
359 if (shm_ids(ns).in_use)
360 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
361 up_write(&shm_ids(ns).rw_mutex);
362}
363
364
365void exit_shm(struct task_struct *task)
366{
367 struct ipc_namespace *ns = task->nsproxy->ipc_ns;
368
369 if (shm_ids(ns).in_use == 0)
370 return;
371
372 /* Destroy all already created segments, but not mapped yet */
373 down_write(&shm_ids(ns).rw_mutex);
374 if (shm_ids(ns).in_use)
375 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
376 up_write(&shm_ids(ns).rw_mutex);
377}
378
379static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
380{
381 struct file *file = vma->vm_file;
382 struct shm_file_data *sfd = shm_file_data(file);
383
384 return sfd->vm_ops->fault(vma, vmf);
385}
386
387#ifdef CONFIG_NUMA
388static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
389{
390 struct file *file = vma->vm_file;
391 struct shm_file_data *sfd = shm_file_data(file);
392 int err = 0;
393 if (sfd->vm_ops->set_policy)
394 err = sfd->vm_ops->set_policy(vma, new);
395 return err;
396}
397
398static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
399 unsigned long addr)
400{
401 struct file *file = vma->vm_file;
402 struct shm_file_data *sfd = shm_file_data(file);
403 struct mempolicy *pol = NULL;
404
405 if (sfd->vm_ops->get_policy)
406 pol = sfd->vm_ops->get_policy(vma, addr);
407 else if (vma->vm_policy)
408 pol = vma->vm_policy;
409
410 return pol;
411}
412#endif
413
414static int shm_mmap(struct file * file, struct vm_area_struct * vma)
415{
416 struct shm_file_data *sfd = shm_file_data(file);
417 int ret;
418
419 ret = sfd->file->f_op->mmap(sfd->file, vma);
420 if (ret != 0)
421 return ret;
422 sfd->vm_ops = vma->vm_ops;
423#ifdef CONFIG_MMU
424 BUG_ON(!sfd->vm_ops->fault);
425#endif
426 vma->vm_ops = &shm_vm_ops;
427 shm_open(vma);
428
429 return ret;
430}
431
432static int shm_release(struct inode *ino, struct file *file)
433{
434 struct shm_file_data *sfd = shm_file_data(file);
435
436 put_ipc_ns(sfd->ns);
437 shm_file_data(file) = NULL;
438 kfree(sfd);
439 return 0;
440}
441
442static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
443{
444 struct shm_file_data *sfd = shm_file_data(file);
445
446 if (!sfd->file->f_op->fsync)
447 return -EINVAL;
448 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
449}
450
451static unsigned long shm_get_unmapped_area(struct file *file,
452 unsigned long addr, unsigned long len, unsigned long pgoff,
453 unsigned long flags)
454{
455 struct shm_file_data *sfd = shm_file_data(file);
456 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
457 pgoff, flags);
458}
459
460static const struct file_operations shm_file_operations = {
461 .mmap = shm_mmap,
462 .fsync = shm_fsync,
463 .release = shm_release,
464#ifndef CONFIG_MMU
465 .get_unmapped_area = shm_get_unmapped_area,
466#endif
467 .llseek = noop_llseek,
468};
469
470static const struct file_operations shm_file_operations_huge = {
471 .mmap = shm_mmap,
472 .fsync = shm_fsync,
473 .release = shm_release,
474 .get_unmapped_area = shm_get_unmapped_area,
475 .llseek = noop_llseek,
476};
477
478int is_file_shm_hugepages(struct file *file)
479{
480 return file->f_op == &shm_file_operations_huge;
481}
482
483static const struct vm_operations_struct shm_vm_ops = {
484 .open = shm_open, /* callback for a new vm-area open */
485 .close = shm_close, /* callback for when the vm-area is released */
486 .fault = shm_fault,
487#if defined(CONFIG_NUMA)
488 .set_policy = shm_set_policy,
489 .get_policy = shm_get_policy,
490#endif
491};
492
493/**
494 * newseg - Create a new shared memory segment
495 * @ns: namespace
496 * @params: ptr to the structure that contains key, size and shmflg
497 *
498 * Called with shm_ids.rw_mutex held as a writer.
499 */
500
501static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
502{
503 key_t key = params->key;
504 int shmflg = params->flg;
505 size_t size = params->u.size;
506 int error;
507 struct shmid_kernel *shp;
508 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
509 struct file * file;
510 char name[13];
511 int id;
512 vm_flags_t acctflag = 0;
513
514 if (size < SHMMIN || size > ns->shm_ctlmax)
515 return -EINVAL;
516
517 if (ns->shm_tot + numpages > ns->shm_ctlall)
518 return -ENOSPC;
519
520 shp = ipc_rcu_alloc(sizeof(*shp));
521 if (!shp)
522 return -ENOMEM;
523
524 shp->shm_perm.key = key;
525 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
526 shp->mlock_user = NULL;
527
528 shp->shm_perm.security = NULL;
529 error = security_shm_alloc(shp);
530 if (error) {
531 ipc_rcu_putref(shp);
532 return error;
533 }
534
535 sprintf (name, "SYSV%08x", key);
536 if (shmflg & SHM_HUGETLB) {
537 size_t hugesize = ALIGN(size, huge_page_size(&default_hstate));
538
539 /* hugetlb_file_setup applies strict accounting */
540 if (shmflg & SHM_NORESERVE)
541 acctflag = VM_NORESERVE;
542 file = hugetlb_file_setup(name, hugesize, acctflag,
543 &shp->mlock_user, HUGETLB_SHMFS_INODE);
544 } else {
545 /*
546 * Do not allow no accounting for OVERCOMMIT_NEVER, even
547 * if it's asked for.
548 */
549 if ((shmflg & SHM_NORESERVE) &&
550 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
551 acctflag = VM_NORESERVE;
552 file = shmem_file_setup(name, size, acctflag);
553 }
554 error = PTR_ERR(file);
555 if (IS_ERR(file))
556 goto no_file;
557
558 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
559 if (id < 0) {
560 error = id;
561 goto no_id;
562 }
563
564 shp->shm_cprid = task_tgid_vnr(current);
565 shp->shm_lprid = 0;
566 shp->shm_atim = shp->shm_dtim = 0;
567 shp->shm_ctim = get_seconds();
568 shp->shm_segsz = size;
569 shp->shm_nattch = 0;
570 shp->shm_file = file;
571 shp->shm_creator = current;
lh758261d2023-07-13 05:52:04 -0700572#ifdef CONFIG_SYSVIPC_CROSS_SHM
xf.li6c8fc1e2023-08-12 00:11:09 -0700573 if((key & SHM_REMOTE_SYSV_MASK) == SHM_REMOTE_SYSV_MASK)
574 {
575 error = shm_do_newseg_check(key, size);
576 if (error < 0)
577 {
578 printk("shm size error, should be the same PAGE_ALIGN size\n");
579 return error;
580 }
581 else
582 shp->shm_perm.rpmflag = TRUE;
583 }
lh758261d2023-07-13 05:52:04 -0700584 else
xf.li6c8fc1e2023-08-12 00:11:09 -0700585 {
lh758261d2023-07-13 05:52:04 -0700586 shp->shm_perm.rpmflag = FALSE;
xf.li6c8fc1e2023-08-12 00:11:09 -0700587 }
lh758261d2023-07-13 05:52:04 -0700588#endif
lh9ed821d2023-04-07 01:36:19 -0700589 /*
590 * shmid gets reported as "inode#" in /proc/pid/maps.
591 * proc-ps tools use this. Changing this will break them.
592 */
593 file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
594
595 ns->shm_tot += numpages;
596 error = shp->shm_perm.id;
597 shm_unlock(shp);
598 return error;
599
600no_id:
601 if (is_file_hugepages(file) && shp->mlock_user)
602 user_shm_unlock(size, shp->mlock_user);
603 fput(file);
604no_file:
605 security_shm_free(shp);
606 ipc_rcu_putref(shp);
607 return error;
608}
609
610/*
611 * Called with shm_ids.rw_mutex and ipcp locked.
612 */
613static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
614{
615 struct shmid_kernel *shp;
616
617 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
618 return security_shm_associate(shp, shmflg);
619}
620
621/*
622 * Called with shm_ids.rw_mutex and ipcp locked.
623 */
624static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
625 struct ipc_params *params)
626{
627 struct shmid_kernel *shp;
628
629 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
630 if (shp->shm_segsz < params->u.size)
631 return -EINVAL;
632
633 return 0;
634}
635
636SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
637{
638 struct ipc_namespace *ns;
639 struct ipc_ops shm_ops;
640 struct ipc_params shm_params;
641
642 ns = current->nsproxy->ipc_ns;
643
644 shm_ops.getnew = newseg;
645 shm_ops.associate = shm_security;
646 shm_ops.more_checks = shm_more_checks;
647
648 shm_params.key = key;
649 shm_params.flg = shmflg;
650 shm_params.u.size = size;
651
652 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
653}
654
655static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
656{
657 switch(version) {
658 case IPC_64:
659 return copy_to_user(buf, in, sizeof(*in));
660 case IPC_OLD:
661 {
662 struct shmid_ds out;
663
664 memset(&out, 0, sizeof(out));
665 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
666 out.shm_segsz = in->shm_segsz;
667 out.shm_atime = in->shm_atime;
668 out.shm_dtime = in->shm_dtime;
669 out.shm_ctime = in->shm_ctime;
670 out.shm_cpid = in->shm_cpid;
671 out.shm_lpid = in->shm_lpid;
672 out.shm_nattch = in->shm_nattch;
673
674 return copy_to_user(buf, &out, sizeof(out));
675 }
676 default:
677 return -EINVAL;
678 }
679}
680
681static inline unsigned long
682copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
683{
684 switch(version) {
685 case IPC_64:
686 if (copy_from_user(out, buf, sizeof(*out)))
687 return -EFAULT;
688 return 0;
689 case IPC_OLD:
690 {
691 struct shmid_ds tbuf_old;
692
693 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
694 return -EFAULT;
695
696 out->shm_perm.uid = tbuf_old.shm_perm.uid;
697 out->shm_perm.gid = tbuf_old.shm_perm.gid;
698 out->shm_perm.mode = tbuf_old.shm_perm.mode;
699
700 return 0;
701 }
702 default:
703 return -EINVAL;
704 }
705}
706
707static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
708{
709 switch(version) {
710 case IPC_64:
711 return copy_to_user(buf, in, sizeof(*in));
712 case IPC_OLD:
713 {
714 struct shminfo out;
715
716 if(in->shmmax > INT_MAX)
717 out.shmmax = INT_MAX;
718 else
719 out.shmmax = (int)in->shmmax;
720
721 out.shmmin = in->shmmin;
722 out.shmmni = in->shmmni;
723 out.shmseg = in->shmseg;
724 out.shmall = in->shmall;
725
726 return copy_to_user(buf, &out, sizeof(out));
727 }
728 default:
729 return -EINVAL;
730 }
731}
732
733/*
734 * Calculate and add used RSS and swap pages of a shm.
735 * Called with shm_ids.rw_mutex held as a reader
736 */
737static void shm_add_rss_swap(struct shmid_kernel *shp,
738 unsigned long *rss_add, unsigned long *swp_add)
739{
740 struct inode *inode;
741
742 inode = shp->shm_file->f_path.dentry->d_inode;
743
744 if (is_file_hugepages(shp->shm_file)) {
745 struct address_space *mapping = inode->i_mapping;
746 struct hstate *h = hstate_file(shp->shm_file);
747 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
748 } else {
749#ifdef CONFIG_SHMEM
750 struct shmem_inode_info *info = SHMEM_I(inode);
751 spin_lock(&info->lock);
752 *rss_add += inode->i_mapping->nrpages;
753 *swp_add += info->swapped;
754 spin_unlock(&info->lock);
755#else
756 *rss_add += inode->i_mapping->nrpages;
757#endif
758 }
759}
760
761/*
762 * Called with shm_ids.rw_mutex held as a reader
763 */
764static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
765 unsigned long *swp)
766{
767 int next_id;
768 int total, in_use;
769
770 *rss = 0;
771 *swp = 0;
772
773 in_use = shm_ids(ns).in_use;
774
775 for (total = 0, next_id = 0; total < in_use; next_id++) {
776 struct kern_ipc_perm *ipc;
777 struct shmid_kernel *shp;
778
779 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
780 if (ipc == NULL)
781 continue;
782 shp = container_of(ipc, struct shmid_kernel, shm_perm);
783
784 shm_add_rss_swap(shp, rss, swp);
785
786 total++;
787 }
788}
789
790/*
791 * This function handles some shmctl commands which require the rw_mutex
792 * to be held in write mode.
793 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
794 */
795static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
796 struct shmid_ds __user *buf, int version)
797{
798 struct kern_ipc_perm *ipcp;
799 struct shmid64_ds shmid64;
800 struct shmid_kernel *shp;
801 int err;
802
803 if (cmd == IPC_SET) {
804 if (copy_shmid_from_user(&shmid64, buf, version))
805 return -EFAULT;
806 }
807
808 ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
809 &shmid64.shm_perm, 0);
810 if (IS_ERR(ipcp))
811 return PTR_ERR(ipcp);
812
813 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
814
815 err = security_shm_shmctl(shp, cmd);
816 if (err)
817 goto out_unlock;
818 switch (cmd) {
819 case IPC_RMID:
820 do_shm_rmid(ns, ipcp);
821 goto out_up;
822 case IPC_SET:
823 ipc_update_perm(&shmid64.shm_perm, ipcp);
824 shp->shm_ctim = get_seconds();
825 break;
826 default:
827 err = -EINVAL;
828 }
829out_unlock:
830 shm_unlock(shp);
831out_up:
832 up_write(&shm_ids(ns).rw_mutex);
833 return err;
834}
835
836SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
837{
838 struct shmid_kernel *shp;
839 int err, version;
840 struct ipc_namespace *ns;
841
842 if (cmd < 0 || shmid < 0) {
843 err = -EINVAL;
844 goto out;
845 }
846
847 version = ipc_parse_version(&cmd);
848 ns = current->nsproxy->ipc_ns;
849
850 switch (cmd) { /* replace with proc interface ? */
851 case IPC_INFO:
852 {
853 struct shminfo64 shminfo;
854
855 err = security_shm_shmctl(NULL, cmd);
856 if (err)
857 return err;
858
859 memset(&shminfo, 0, sizeof(shminfo));
860 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
861 shminfo.shmmax = ns->shm_ctlmax;
862 shminfo.shmall = ns->shm_ctlall;
863
864 shminfo.shmmin = SHMMIN;
865 if(copy_shminfo_to_user (buf, &shminfo, version))
866 return -EFAULT;
867
868 down_read(&shm_ids(ns).rw_mutex);
869 err = ipc_get_maxid(&shm_ids(ns));
870 up_read(&shm_ids(ns).rw_mutex);
871
872 if(err<0)
873 err = 0;
874 goto out;
875 }
876 case SHM_INFO:
877 {
878 struct shm_info shm_info;
879
880 err = security_shm_shmctl(NULL, cmd);
881 if (err)
882 return err;
883
884 memset(&shm_info, 0, sizeof(shm_info));
885 down_read(&shm_ids(ns).rw_mutex);
886 shm_info.used_ids = shm_ids(ns).in_use;
887 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
888 shm_info.shm_tot = ns->shm_tot;
889 shm_info.swap_attempts = 0;
890 shm_info.swap_successes = 0;
891 err = ipc_get_maxid(&shm_ids(ns));
892 up_read(&shm_ids(ns).rw_mutex);
893 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
894 err = -EFAULT;
895 goto out;
896 }
897
898 err = err < 0 ? 0 : err;
899 goto out;
900 }
901 case SHM_STAT:
902 case IPC_STAT:
903 {
904 struct shmid64_ds tbuf;
905 int result;
906
907 if (cmd == SHM_STAT) {
908 shp = shm_lock(ns, shmid);
909 if (IS_ERR(shp)) {
910 err = PTR_ERR(shp);
911 goto out;
912 }
913 result = shp->shm_perm.id;
914 } else {
915 shp = shm_lock_check(ns, shmid);
916 if (IS_ERR(shp)) {
917 err = PTR_ERR(shp);
918 goto out;
919 }
920 result = 0;
921 }
922 err = -EACCES;
923 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
924 goto out_unlock;
925 err = security_shm_shmctl(shp, cmd);
926 if (err)
927 goto out_unlock;
928 memset(&tbuf, 0, sizeof(tbuf));
929 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
930 tbuf.shm_segsz = shp->shm_segsz;
931 tbuf.shm_atime = shp->shm_atim;
932 tbuf.shm_dtime = shp->shm_dtim;
933 tbuf.shm_ctime = shp->shm_ctim;
934 tbuf.shm_cpid = shp->shm_cprid;
935 tbuf.shm_lpid = shp->shm_lprid;
936 tbuf.shm_nattch = shp->shm_nattch;
937 shm_unlock(shp);
938 if(copy_shmid_to_user (buf, &tbuf, version))
939 err = -EFAULT;
940 else
941 err = result;
942 goto out;
943 }
944 case SHM_LOCK:
945 case SHM_UNLOCK:
946 {
947 struct file *shm_file;
948
949 shp = shm_lock_check(ns, shmid);
950 if (IS_ERR(shp)) {
951 err = PTR_ERR(shp);
952 goto out;
953 }
954
955 audit_ipc_obj(&(shp->shm_perm));
956
957 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
958 uid_t euid = current_euid();
959 err = -EPERM;
960 if (euid != shp->shm_perm.uid &&
961 euid != shp->shm_perm.cuid)
962 goto out_unlock;
963 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
964 goto out_unlock;
965 }
966
967 err = security_shm_shmctl(shp, cmd);
968 if (err)
969 goto out_unlock;
970
971 shm_file = shp->shm_file;
972 if (is_file_hugepages(shm_file))
973 goto out_unlock;
974
975 if (cmd == SHM_LOCK) {
976 struct user_struct *user = current_user();
977 err = shmem_lock(shm_file, 1, user);
978 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
979 shp->shm_perm.mode |= SHM_LOCKED;
980 shp->mlock_user = user;
981 }
982 goto out_unlock;
983 }
984
985 /* SHM_UNLOCK */
986 if (!(shp->shm_perm.mode & SHM_LOCKED))
987 goto out_unlock;
988 shmem_lock(shm_file, 0, shp->mlock_user);
989 shp->shm_perm.mode &= ~SHM_LOCKED;
990 shp->mlock_user = NULL;
991 get_file(shm_file);
992 shm_unlock(shp);
993 shmem_unlock_mapping(shm_file->f_mapping);
994 fput(shm_file);
995 goto out;
996 }
997 case IPC_RMID:
998 case IPC_SET:
999 err = shmctl_down(ns, shmid, cmd, buf, version);
1000 return err;
1001 default:
1002 return -EINVAL;
1003 }
1004
1005out_unlock:
1006 shm_unlock(shp);
1007out:
1008 return err;
1009}
1010
1011/*
1012 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1013 *
1014 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1015 * "raddr" thing points to kernel space, and there has to be a wrapper around
1016 * this.
1017 */
1018long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
1019{
1020 struct shmid_kernel *shp;
1021 unsigned long addr;
1022 unsigned long size;
1023 struct file * file;
1024 int err;
1025 unsigned long flags;
1026 unsigned long prot;
1027 int acc_mode;
1028 unsigned long user_addr;
1029 struct ipc_namespace *ns;
1030 struct shm_file_data *sfd;
1031 struct path path;
1032 fmode_t f_mode;
1033
1034 err = -EINVAL;
1035 if (shmid < 0)
1036 goto out;
1037 else if ((addr = (ulong)shmaddr)) {
1038 if (addr & (SHMLBA-1)) {
1039 if (shmflg & SHM_RND){
1040 addr &= ~(SHMLBA-1); /* round down */
1041
1042 /*Fix for HUB CVE-2017-5669
1043 * Ensure that the round-down is non-nil
1044 * when remapping. This can happen for
1045 * cases when addr < shmlba.
1046 */
1047 if (!addr && (shmflg & SHM_REMAP))
1048 goto out;
1049 }else
1050
1051#ifndef __ARCH_FORCE_SHMLBA
1052 if (addr & ~PAGE_MASK)
1053#endif
1054 goto out;
1055 }
1056 flags = MAP_SHARED | MAP_FIXED;
1057 } else {
1058 if ((shmflg & SHM_REMAP))
1059 goto out;
1060
1061 flags = MAP_SHARED;
1062 }
1063
1064 if (shmflg & SHM_RDONLY) {
1065 prot = PROT_READ;
1066 acc_mode = S_IRUGO;
1067 f_mode = FMODE_READ;
1068 } else {
1069 prot = PROT_READ | PROT_WRITE;
1070 acc_mode = S_IRUGO | S_IWUGO;
1071 f_mode = FMODE_READ | FMODE_WRITE;
1072 }
1073 if (shmflg & SHM_EXEC) {
1074 prot |= PROT_EXEC;
1075 acc_mode |= S_IXUGO;
1076 }
1077
1078 /*
1079 * We cannot rely on the fs check since SYSV IPC does have an
1080 * additional creator id...
1081 */
1082 ns = current->nsproxy->ipc_ns;
1083 shp = shm_lock_check(ns, shmid);
1084 if (IS_ERR(shp)) {
1085 err = PTR_ERR(shp);
1086 goto out;
1087 }
1088
1089 err = -EACCES;
1090 if (ipcperms(ns, &shp->shm_perm, acc_mode))
1091 goto out_unlock;
1092
1093 err = security_shm_shmat(shp, shmaddr, shmflg);
1094 if (err)
1095 goto out_unlock;
1096
1097 path = shp->shm_file->f_path;
1098 path_get(&path);
1099 shp->shm_nattch++;
1100 size = i_size_read(path.dentry->d_inode);
1101 shm_unlock(shp);
1102
1103 err = -ENOMEM;
1104 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1105 if (!sfd)
1106 goto out_put_dentry;
1107
1108 file = alloc_file(&path, f_mode,
1109 is_file_hugepages(shp->shm_file) ?
1110 &shm_file_operations_huge :
1111 &shm_file_operations);
1112 if (!file)
1113 goto out_free;
1114
1115 file->private_data = sfd;
1116 file->f_mapping = shp->shm_file->f_mapping;
1117 sfd->id = shp->shm_perm.id;
1118 sfd->ns = get_ipc_ns(ns);
1119 sfd->file = shp->shm_file;
1120 sfd->vm_ops = NULL;
lh758261d2023-07-13 05:52:04 -07001121#ifdef CONFIG_SYSVIPC_CROSS_SHM
1122 if(shp->shm_perm.rpmflag == TRUE)
xf.li6c8fc1e2023-08-12 00:11:09 -07001123 file->shm_flags = SHM_REMOTE_SYSV_YES;
lh758261d2023-07-13 05:52:04 -07001124#endif
lh9ed821d2023-04-07 01:36:19 -07001125 down_write(&current->mm->mmap_sem);
1126 if (addr && !(shmflg & SHM_REMAP)) {
1127 err = -EINVAL;
1128 if (find_vma_intersection(current->mm, addr, addr + size))
1129 goto invalid;
1130 /*
1131 * If shm segment goes below stack, make sure there is some
1132 * space left for the stack to grow (at least 4 pages).
1133 */
1134 if (addr < current->mm->start_stack &&
1135 addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1136 goto invalid;
1137 }
1138
1139 user_addr = do_mmap (file, addr, size, prot, flags, 0);
1140 *raddr = user_addr;
1141 err = 0;
1142 if (IS_ERR_VALUE(user_addr))
1143 err = (long)user_addr;
1144invalid:
1145 up_write(&current->mm->mmap_sem);
1146
1147 fput(file);
1148
1149out_nattch:
1150 down_write(&shm_ids(ns).rw_mutex);
1151 shp = shm_lock(ns, shmid);
1152 BUG_ON(IS_ERR(shp));
1153 shp->shm_nattch--;
1154 if (shm_may_destroy(ns, shp))
1155 shm_destroy(ns, shp);
1156 else
1157 shm_unlock(shp);
1158 up_write(&shm_ids(ns).rw_mutex);
1159
1160out:
1161 return err;
1162
1163out_unlock:
1164 shm_unlock(shp);
1165 goto out;
1166
1167out_free:
1168 kfree(sfd);
1169out_put_dentry:
1170 path_put(&path);
1171 goto out_nattch;
1172}
1173
1174SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1175{
1176 unsigned long ret;
1177 long err;
1178
1179 err = do_shmat(shmid, shmaddr, shmflg, &ret);
1180 if (err)
1181 return err;
1182 force_successful_syscall_return();
1183 return (long)ret;
1184}
1185
1186/*
1187 * detach and kill segment if marked destroyed.
1188 * The work is done in shm_close.
1189 */
1190SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1191{
1192 struct mm_struct *mm = current->mm;
1193 struct vm_area_struct *vma;
1194 unsigned long addr = (unsigned long)shmaddr;
1195 int retval = -EINVAL;
1196#ifdef CONFIG_MMU
1197 loff_t size = 0;
1198 struct vm_area_struct *next;
1199#endif
1200
1201 if (addr & ~PAGE_MASK)
1202 return retval;
1203
1204 down_write(&mm->mmap_sem);
1205
1206 /*
1207 * This function tries to be smart and unmap shm segments that
1208 * were modified by partial mlock or munmap calls:
1209 * - It first determines the size of the shm segment that should be
1210 * unmapped: It searches for a vma that is backed by shm and that
1211 * started at address shmaddr. It records it's size and then unmaps
1212 * it.
1213 * - Then it unmaps all shm vmas that started at shmaddr and that
1214 * are within the initially determined size.
1215 * Errors from do_munmap are ignored: the function only fails if
1216 * it's called with invalid parameters or if it's called to unmap
1217 * a part of a vma. Both calls in this function are for full vmas,
1218 * the parameters are directly copied from the vma itself and always
1219 * valid - therefore do_munmap cannot fail. (famous last words?)
1220 */
1221 /*
1222 * If it had been mremap()'d, the starting address would not
1223 * match the usual checks anyway. So assume all vma's are
1224 * above the starting address given.
1225 */
1226 vma = find_vma(mm, addr);
1227
1228#ifdef CONFIG_MMU
1229 while (vma) {
1230 next = vma->vm_next;
1231
1232 /*
1233 * Check if the starting address would match, i.e. it's
1234 * a fragment created by mprotect() and/or munmap(), or it
1235 * otherwise it starts at this address with no hassles.
1236 */
1237 if ((vma->vm_ops == &shm_vm_ops) &&
1238 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1239
1240
1241 size = vma->vm_file->f_path.dentry->d_inode->i_size;
1242 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1243 /*
1244 * We discovered the size of the shm segment, so
1245 * break out of here and fall through to the next
1246 * loop that uses the size information to stop
1247 * searching for matching vma's.
1248 */
1249 retval = 0;
1250 vma = next;
1251 break;
1252 }
1253 vma = next;
1254 }
1255
1256 /*
1257 * We need look no further than the maximum address a fragment
1258 * could possibly have landed at. Also cast things to loff_t to
1259 * prevent overflows and make comparisons vs. equal-width types.
1260 */
1261 size = PAGE_ALIGN(size);
1262 while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1263 next = vma->vm_next;
1264
1265 /* finding a matching vma now does not alter retval */
1266 if ((vma->vm_ops == &shm_vm_ops) &&
1267 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1268
1269 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1270 vma = next;
1271 }
1272
1273#else /* CONFIG_MMU */
1274 /* under NOMMU conditions, the exact address to be destroyed must be
1275 * given */
1276 retval = -EINVAL;
1277 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1278 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1279 retval = 0;
1280 }
1281
1282#endif
1283
1284 up_write(&mm->mmap_sem);
1285 return retval;
1286}
1287
1288#ifdef CONFIG_PROC_FS
1289static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1290{
1291 struct shmid_kernel *shp = it;
1292 unsigned long rss = 0, swp = 0;
1293
1294 shm_add_rss_swap(shp, &rss, &swp);
1295
1296#if BITS_PER_LONG <= 32
1297#define SIZE_SPEC "%10lu"
1298#else
1299#define SIZE_SPEC "%21lu"
1300#endif
1301
1302 return seq_printf(s,
1303 "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1304 "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1305 SIZE_SPEC " " SIZE_SPEC "\n",
1306 shp->shm_perm.key,
1307 shp->shm_perm.id,
1308 shp->shm_perm.mode,
1309 shp->shm_segsz,
1310 shp->shm_cprid,
1311 shp->shm_lprid,
1312 shp->shm_nattch,
1313 shp->shm_perm.uid,
1314 shp->shm_perm.gid,
1315 shp->shm_perm.cuid,
1316 shp->shm_perm.cgid,
1317 shp->shm_atim,
1318 shp->shm_dtim,
1319 shp->shm_ctim,
1320 rss * PAGE_SIZE,
1321 swp * PAGE_SIZE);
1322}
1323#endif