blob: 7557fb429df5c0c22b9ed9ce05161ab756dc6595 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * binfmt_misc.c
4 *
5 * Copyright (C) 1997 Richard Günther
6 *
7 * binfmt_misc detects binaries via a magic or filename extension and invokes
8 * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details.
9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/init.h>
16#include <linux/sched/mm.h>
17#include <linux/magic.h>
18#include <linux/binfmts.h>
19#include <linux/slab.h>
20#include <linux/ctype.h>
21#include <linux/string_helpers.h>
22#include <linux/file.h>
23#include <linux/pagemap.h>
24#include <linux/namei.h>
25#include <linux/mount.h>
26#include <linux/fs_context.h>
27#include <linux/syscalls.h>
28#include <linux/fs.h>
29#include <linux/uaccess.h>
30
31#include "internal.h"
32
33#ifdef DEBUG
34# define USE_DEBUG 1
35#else
36# define USE_DEBUG 0
37#endif
38
39enum {
40 VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
41};
42
43static LIST_HEAD(entries);
44static int enabled = 1;
45
46enum {Enabled, Magic};
47#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
48#define MISC_FMT_OPEN_BINARY (1UL << 30)
49#define MISC_FMT_CREDENTIALS (1UL << 29)
50#define MISC_FMT_OPEN_FILE (1UL << 28)
51
52typedef struct {
53 struct list_head list;
54 unsigned long flags; /* type, status, etc. */
55 int offset; /* offset of magic */
56 int size; /* size of magic/mask */
57 char *magic; /* magic or filename extension */
58 char *mask; /* mask, NULL for exact match */
59 const char *interpreter; /* filename of interpreter */
60 char *name;
61 struct dentry *dentry;
62 struct file *interp_file;
63 refcount_t users; /* sync removal with load_misc_binary() */
64} Node;
65
66static DEFINE_RWLOCK(entries_lock);
67static struct file_system_type bm_fs_type;
68
69/*
70 * Max length of the register string. Determined by:
71 * - 7 delimiters
72 * - name: ~50 bytes
73 * - type: 1 byte
74 * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
75 * - magic: 128 bytes (512 in escaped form)
76 * - mask: 128 bytes (512 in escaped form)
77 * - interp: ~50 bytes
78 * - flags: 5 bytes
79 * Round that up a bit, and then back off to hold the internal data
80 * (like struct Node).
81 */
82#define MAX_REGISTER_LENGTH 1920
83
84/**
85 * search_binfmt_handler - search for a binary handler for @bprm
86 * @misc: handle to binfmt_misc instance
87 * @bprm: binary for which we are looking for a handler
88 *
89 * Search for a binary type handler for @bprm in the list of registered binary
90 * type handlers.
91 *
92 * Return: binary type list entry on success, NULL on failure
93 */
94static Node *search_binfmt_handler(struct linux_binprm *bprm)
95{
96 char *p = strrchr(bprm->interp, '.');
97 Node *e;
98
99 /* Walk all the registered handlers. */
100 list_for_each_entry(e, &entries, list) {
101 char *s;
102 int j;
103
104 /* Make sure this one is currently enabled. */
105 if (!test_bit(Enabled, &e->flags))
106 continue;
107
108 /* Do matching based on extension if applicable. */
109 if (!test_bit(Magic, &e->flags)) {
110 if (p && !strcmp(e->magic, p + 1))
111 return e;
112 continue;
113 }
114
115 /* Do matching based on magic & mask. */
116 s = bprm->buf + e->offset;
117 if (e->mask) {
118 for (j = 0; j < e->size; j++)
119 if ((*s++ ^ e->magic[j]) & e->mask[j])
120 break;
121 } else {
122 for (j = 0; j < e->size; j++)
123 if ((*s++ ^ e->magic[j]))
124 break;
125 }
126 if (j == e->size)
127 return e;
128 }
129
130 return NULL;
131}
132
133/**
134 * get_binfmt_handler - try to find a binary type handler
135 * @misc: handle to binfmt_misc instance
136 * @bprm: binary for which we are looking for a handler
137 *
138 * Try to find a binfmt handler for the binary type. If one is found take a
139 * reference to protect against removal via bm_{entry,status}_write().
140 *
141 * Return: binary type list entry on success, NULL on failure
142 */
143static Node *get_binfmt_handler(struct linux_binprm *bprm)
144{
145 Node *e;
146
147 read_lock(&entries_lock);
148 e = search_binfmt_handler(bprm);
149 if (e)
150 refcount_inc(&e->users);
151 read_unlock(&entries_lock);
152 return e;
153}
154
155/**
156 * put_binfmt_handler - put binary handler node
157 * @e: node to put
158 *
159 * Free node syncing with load_misc_binary() and defer final free to
160 * load_misc_binary() in case it is using the binary type handler we were
161 * requested to remove.
162 */
163static void put_binfmt_handler(Node *e)
164{
165 if (refcount_dec_and_test(&e->users)) {
166 if (e->flags & MISC_FMT_OPEN_FILE)
167 filp_close(e->interp_file, NULL);
168 kfree(e);
169 }
170}
171
172/*
173 * the loader itself
174 */
175static int load_misc_binary(struct linux_binprm *bprm)
176{
177 Node *fmt;
178 struct file *interp_file = NULL;
179 int retval;
180 int fd_binary = -1;
181
182 retval = -ENOEXEC;
183 if (!enabled)
184 return retval;
185
186 fmt = get_binfmt_handler(bprm);
187 if (!fmt)
188 return retval;
189
190 /* Need to be able to load the file after exec */
191 retval = -ENOENT;
192 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
193 goto ret;
194
195 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
196 retval = remove_arg_zero(bprm);
197 if (retval)
198 goto ret;
199 }
200
201 if (fmt->flags & MISC_FMT_OPEN_BINARY) {
202
203 /* if the binary should be opened on behalf of the
204 * interpreter than keep it open and assign descriptor
205 * to it
206 */
207 fd_binary = get_unused_fd_flags(0);
208 if (fd_binary < 0) {
209 retval = fd_binary;
210 goto ret;
211 }
212 fd_install(fd_binary, bprm->file);
213
214 /* if the binary is not readable than enforce mm->dumpable=0
215 regardless of the interpreter's permissions */
216 would_dump(bprm, bprm->file);
217
218 allow_write_access(bprm->file);
219 bprm->file = NULL;
220
221 /* mark the bprm that fd should be passed to interp */
222 bprm->interp_flags |= BINPRM_FLAGS_EXECFD;
223 bprm->interp_data = fd_binary;
224
225 } else {
226 allow_write_access(bprm->file);
227 fput(bprm->file);
228 bprm->file = NULL;
229 }
230 /* make argv[1] be the path to the binary */
231 retval = copy_strings_kernel(1, &bprm->interp, bprm);
232 if (retval < 0)
233 goto error;
234 bprm->argc++;
235
236 /* add the interp as argv[0] */
237 retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
238 if (retval < 0)
239 goto error;
240 bprm->argc++;
241
242 /* Update interp in case binfmt_script needs it. */
243 retval = bprm_change_interp(fmt->interpreter, bprm);
244 if (retval < 0)
245 goto error;
246
247 if (fmt->flags & MISC_FMT_OPEN_FILE) {
248 interp_file = file_clone_open(fmt->interp_file);
249 if (!IS_ERR(interp_file))
250 deny_write_access(interp_file);
251 } else {
252 interp_file = open_exec(fmt->interpreter);
253 }
254 retval = PTR_ERR(interp_file);
255 if (IS_ERR(interp_file))
256 goto error;
257
258 bprm->file = interp_file;
259 if (fmt->flags & MISC_FMT_CREDENTIALS) {
260 loff_t pos = 0;
261
262 /*
263 * No need to call prepare_binprm(), it's already been
264 * done. bprm->buf is stale, update from interp_file.
265 */
266 memset(bprm->buf, 0, BINPRM_BUF_SIZE);
267 retval = kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE,
268 &pos);
269 } else
270 retval = prepare_binprm(bprm);
271
272 if (retval < 0)
273 goto error;
274
275 retval = search_binary_handler(bprm);
276 if (retval < 0)
277 goto error;
278
279ret:
280
281 /*
282 * If we actually put the node here all concurrent calls to
283 * load_misc_binary() will have finished. We also know
284 * that for the refcount to be zero ->evict_inode() must have removed
285 * the node to be deleted from the list. All that is left for us is to
286 * close and free.
287 */
288 put_binfmt_handler(fmt);
289
290 return retval;
291error:
292 if (fd_binary > 0)
293 ksys_close(fd_binary);
294 bprm->interp_flags = 0;
295 bprm->interp_data = 0;
296 goto ret;
297}
298
299/* Command parsers */
300
301/*
302 * parses and copies one argument enclosed in del from *sp to *dp,
303 * recognising the \x special.
304 * returns pointer to the copied argument or NULL in case of an
305 * error (and sets err) or null argument length.
306 */
307static char *scanarg(char *s, char del)
308{
309 char c;
310
311 while ((c = *s++) != del) {
312 if (c == '\\' && *s == 'x') {
313 s++;
314 if (!isxdigit(*s++))
315 return NULL;
316 if (!isxdigit(*s++))
317 return NULL;
318 }
319 }
320 s[-1] ='\0';
321 return s;
322}
323
324static char *check_special_flags(char *sfs, Node *e)
325{
326 char *p = sfs;
327 int cont = 1;
328
329 /* special flags */
330 while (cont) {
331 switch (*p) {
332 case 'P':
333 pr_debug("register: flag: P (preserve argv0)\n");
334 p++;
335 e->flags |= MISC_FMT_PRESERVE_ARGV0;
336 break;
337 case 'O':
338 pr_debug("register: flag: O (open binary)\n");
339 p++;
340 e->flags |= MISC_FMT_OPEN_BINARY;
341 break;
342 case 'C':
343 pr_debug("register: flag: C (preserve creds)\n");
344 p++;
345 /* this flags also implies the
346 open-binary flag */
347 e->flags |= (MISC_FMT_CREDENTIALS |
348 MISC_FMT_OPEN_BINARY);
349 break;
350 case 'F':
351 pr_debug("register: flag: F: open interpreter file now\n");
352 p++;
353 e->flags |= MISC_FMT_OPEN_FILE;
354 break;
355 default:
356 cont = 0;
357 }
358 }
359
360 return p;
361}
362
363/*
364 * This registers a new binary format, it recognises the syntax
365 * ':name:type:offset:magic:mask:interpreter:flags'
366 * where the ':' is the IFS, that can be chosen with the first char
367 */
368static Node *create_entry(const char __user *buffer, size_t count)
369{
370 Node *e;
371 int memsize, err;
372 char *buf, *p;
373 char del;
374
375 pr_debug("register: received %zu bytes\n", count);
376
377 /* some sanity checks */
378 err = -EINVAL;
379 if ((count < 11) || (count > MAX_REGISTER_LENGTH))
380 goto out;
381
382 err = -ENOMEM;
383 memsize = sizeof(Node) + count + 8;
384 e = kmalloc(memsize, GFP_KERNEL);
385 if (!e)
386 goto out;
387
388 p = buf = (char *)e + sizeof(Node);
389
390 memset(e, 0, sizeof(Node));
391 if (copy_from_user(buf, buffer, count))
392 goto efault;
393
394 del = *p++; /* delimeter */
395
396 pr_debug("register: delim: %#x {%c}\n", del, del);
397
398 /* Pad the buffer with the delim to simplify parsing below. */
399 memset(buf + count, del, 8);
400
401 /* Parse the 'name' field. */
402 e->name = p;
403 p = strchr(p, del);
404 if (!p)
405 goto einval;
406 *p++ = '\0';
407 if (!e->name[0] ||
408 !strcmp(e->name, ".") ||
409 !strcmp(e->name, "..") ||
410 strchr(e->name, '/'))
411 goto einval;
412
413 pr_debug("register: name: {%s}\n", e->name);
414
415 /* Parse the 'type' field. */
416 switch (*p++) {
417 case 'E':
418 pr_debug("register: type: E (extension)\n");
419 e->flags = 1 << Enabled;
420 break;
421 case 'M':
422 pr_debug("register: type: M (magic)\n");
423 e->flags = (1 << Enabled) | (1 << Magic);
424 break;
425 default:
426 goto einval;
427 }
428 if (*p++ != del)
429 goto einval;
430
431 if (test_bit(Magic, &e->flags)) {
432 /* Handle the 'M' (magic) format. */
433 char *s;
434
435 /* Parse the 'offset' field. */
436 s = strchr(p, del);
437 if (!s)
438 goto einval;
439 *s = '\0';
440 if (p != s) {
441 int r = kstrtoint(p, 10, &e->offset);
442 if (r != 0 || e->offset < 0)
443 goto einval;
444 }
445 p = s;
446 if (*p++)
447 goto einval;
448 pr_debug("register: offset: %#x\n", e->offset);
449
450 /* Parse the 'magic' field. */
451 e->magic = p;
452 p = scanarg(p, del);
453 if (!p)
454 goto einval;
455 if (!e->magic[0])
456 goto einval;
457 if (USE_DEBUG)
458 print_hex_dump_bytes(
459 KBUILD_MODNAME ": register: magic[raw]: ",
460 DUMP_PREFIX_NONE, e->magic, p - e->magic);
461
462 /* Parse the 'mask' field. */
463 e->mask = p;
464 p = scanarg(p, del);
465 if (!p)
466 goto einval;
467 if (!e->mask[0]) {
468 e->mask = NULL;
469 pr_debug("register: mask[raw]: none\n");
470 } else if (USE_DEBUG)
471 print_hex_dump_bytes(
472 KBUILD_MODNAME ": register: mask[raw]: ",
473 DUMP_PREFIX_NONE, e->mask, p - e->mask);
474
475 /*
476 * Decode the magic & mask fields.
477 * Note: while we might have accepted embedded NUL bytes from
478 * above, the unescape helpers here will stop at the first one
479 * it encounters.
480 */
481 e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
482 if (e->mask &&
483 string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
484 goto einval;
485 if (e->size > BINPRM_BUF_SIZE ||
486 BINPRM_BUF_SIZE - e->size < e->offset)
487 goto einval;
488 pr_debug("register: magic/mask length: %i\n", e->size);
489 if (USE_DEBUG) {
490 print_hex_dump_bytes(
491 KBUILD_MODNAME ": register: magic[decoded]: ",
492 DUMP_PREFIX_NONE, e->magic, e->size);
493
494 if (e->mask) {
495 int i;
496 char *masked = kmalloc(e->size, GFP_KERNEL);
497
498 print_hex_dump_bytes(
499 KBUILD_MODNAME ": register: mask[decoded]: ",
500 DUMP_PREFIX_NONE, e->mask, e->size);
501
502 if (masked) {
503 for (i = 0; i < e->size; ++i)
504 masked[i] = e->magic[i] & e->mask[i];
505 print_hex_dump_bytes(
506 KBUILD_MODNAME ": register: magic[masked]: ",
507 DUMP_PREFIX_NONE, masked, e->size);
508
509 kfree(masked);
510 }
511 }
512 }
513 } else {
514 /* Handle the 'E' (extension) format. */
515
516 /* Skip the 'offset' field. */
517 p = strchr(p, del);
518 if (!p)
519 goto einval;
520 *p++ = '\0';
521
522 /* Parse the 'magic' field. */
523 e->magic = p;
524 p = strchr(p, del);
525 if (!p)
526 goto einval;
527 *p++ = '\0';
528 if (!e->magic[0] || strchr(e->magic, '/'))
529 goto einval;
530 pr_debug("register: extension: {%s}\n", e->magic);
531
532 /* Skip the 'mask' field. */
533 p = strchr(p, del);
534 if (!p)
535 goto einval;
536 *p++ = '\0';
537 }
538
539 /* Parse the 'interpreter' field. */
540 e->interpreter = p;
541 p = strchr(p, del);
542 if (!p)
543 goto einval;
544 *p++ = '\0';
545 if (!e->interpreter[0])
546 goto einval;
547 pr_debug("register: interpreter: {%s}\n", e->interpreter);
548
549 /* Parse the 'flags' field. */
550 p = check_special_flags(p, e);
551 if (*p == '\n')
552 p++;
553 if (p != buf + count)
554 goto einval;
555
556 return e;
557
558out:
559 return ERR_PTR(err);
560
561efault:
562 kfree(e);
563 return ERR_PTR(-EFAULT);
564einval:
565 kfree(e);
566 return ERR_PTR(-EINVAL);
567}
568
569/*
570 * Set status of entry/binfmt_misc:
571 * '1' enables, '0' disables and '-1' clears entry/binfmt_misc
572 */
573static int parse_command(const char __user *buffer, size_t count)
574{
575 char s[4];
576
577 if (count > 3)
578 return -EINVAL;
579 if (copy_from_user(s, buffer, count))
580 return -EFAULT;
581 if (!count)
582 return 0;
583 if (s[count - 1] == '\n')
584 count--;
585 if (count == 1 && s[0] == '0')
586 return 1;
587 if (count == 1 && s[0] == '1')
588 return 2;
589 if (count == 2 && s[0] == '-' && s[1] == '1')
590 return 3;
591 return -EINVAL;
592}
593
594/* generic stuff */
595
596static void entry_status(Node *e, char *page)
597{
598 char *dp = page;
599 const char *status = "disabled";
600
601 if (test_bit(Enabled, &e->flags))
602 status = "enabled";
603
604 if (!VERBOSE_STATUS) {
605 sprintf(page, "%s\n", status);
606 return;
607 }
608
609 dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
610
611 /* print the special flags */
612 dp += sprintf(dp, "flags: ");
613 if (e->flags & MISC_FMT_PRESERVE_ARGV0)
614 *dp++ = 'P';
615 if (e->flags & MISC_FMT_OPEN_BINARY)
616 *dp++ = 'O';
617 if (e->flags & MISC_FMT_CREDENTIALS)
618 *dp++ = 'C';
619 if (e->flags & MISC_FMT_OPEN_FILE)
620 *dp++ = 'F';
621 *dp++ = '\n';
622
623 if (!test_bit(Magic, &e->flags)) {
624 sprintf(dp, "extension .%s\n", e->magic);
625 } else {
626 dp += sprintf(dp, "offset %i\nmagic ", e->offset);
627 dp = bin2hex(dp, e->magic, e->size);
628 if (e->mask) {
629 dp += sprintf(dp, "\nmask ");
630 dp = bin2hex(dp, e->mask, e->size);
631 }
632 *dp++ = '\n';
633 *dp = '\0';
634 }
635}
636
637static struct inode *bm_get_inode(struct super_block *sb, int mode)
638{
639 struct inode *inode = new_inode(sb);
640
641 if (inode) {
642 inode->i_ino = get_next_ino();
643 inode->i_mode = mode;
644 inode->i_atime = inode->i_mtime = inode->i_ctime =
645 current_time(inode);
646 }
647 return inode;
648}
649
650/**
651 * bm_evict_inode - cleanup data associated with @inode
652 * @inode: inode to which the data is attached
653 *
654 * Cleanup the binary type handler data associated with @inode if a binary type
655 * entry is removed or the filesystem is unmounted and the super block is
656 * shutdown.
657 *
658 * If the ->evict call was not caused by a super block shutdown but by a write
659 * to remove the entry or all entries via bm_{entry,status}_write() the entry
660 * will have already been removed from the list. We keep the list_empty() check
661 * to make that explicit.
662*/
663static void bm_evict_inode(struct inode *inode)
664{
665 Node *e = inode->i_private;
666
667 clear_inode(inode);
668
669 if (e) {
670 write_lock(&entries_lock);
671 if (!list_empty(&e->list))
672 list_del_init(&e->list);
673 write_unlock(&entries_lock);
674 put_binfmt_handler(e);
675 }
676}
677
678/**
679 * unlink_binfmt_dentry - remove the dentry for the binary type handler
680 * @dentry: dentry associated with the binary type handler
681 *
682 * Do the actual filesystem work to remove a dentry for a registered binary
683 * type handler. Since binfmt_misc only allows simple files to be created
684 * directly under the root dentry of the filesystem we ensure that we are
685 * indeed passed a dentry directly beneath the root dentry, that the inode
686 * associated with the root dentry is locked, and that it is a regular file we
687 * are asked to remove.
688 */
689static void unlink_binfmt_dentry(struct dentry *dentry)
690{
691 struct dentry *parent = dentry->d_parent;
692 struct inode *inode, *parent_inode;
693
694 /* All entries are immediate descendants of the root dentry. */
695 if (WARN_ON_ONCE(dentry->d_sb->s_root != parent))
696 return;
697
698 /* We only expect to be called on regular files. */
699 inode = d_inode(dentry);
700 if (WARN_ON_ONCE(!S_ISREG(inode->i_mode)))
701 return;
702
703 /* The parent inode must be locked. */
704 parent_inode = d_inode(parent);
705 if (WARN_ON_ONCE(!inode_is_locked(parent_inode)))
706 return;
707
708 if (simple_positive(dentry)) {
709 dget(dentry);
710 simple_unlink(parent_inode, dentry);
711 d_delete(dentry);
712 dput(dentry);
713 }
714}
715
716/**
717 * remove_binfmt_handler - remove a binary type handler
718 * @misc: handle to binfmt_misc instance
719 * @e: binary type handler to remove
720 *
721 * Remove a binary type handler from the list of binary type handlers and
722 * remove its associated dentry. This is called from
723 * binfmt_{entry,status}_write(). In the future, we might want to think about
724 * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's
725 * to use writes to files in order to delete binary type handlers. But it has
726 * worked for so long that it's not a pressing issue.
727 */
728static void remove_binfmt_handler(Node *e)
729{
730 write_lock(&entries_lock);
731 list_del_init(&e->list);
732 write_unlock(&entries_lock);
733 unlink_binfmt_dentry(e->dentry);
734}
735
736/* /<entry> */
737
738static ssize_t
739bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
740{
741 Node *e = file_inode(file)->i_private;
742 ssize_t res;
743 char *page;
744
745 page = (char *) __get_free_page(GFP_KERNEL);
746 if (!page)
747 return -ENOMEM;
748
749 entry_status(e, page);
750
751 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
752
753 free_page((unsigned long) page);
754 return res;
755}
756
757static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
758 size_t count, loff_t *ppos)
759{
760 struct inode *inode = file_inode(file);
761 Node *e = inode->i_private;
762 int res = parse_command(buffer, count);
763
764 switch (res) {
765 case 1:
766 /* Disable this handler. */
767 clear_bit(Enabled, &e->flags);
768 break;
769 case 2:
770 /* Enable this handler. */
771 set_bit(Enabled, &e->flags);
772 break;
773 case 3:
774 /* Delete this handler. */
775 inode = d_inode(inode->i_sb->s_root);
776 inode_lock(inode);
777
778 /*
779 * In order to add new element or remove elements from the list
780 * via bm_{entry,register,status}_write() inode_lock() on the
781 * root inode must be held.
782 * The lock is exclusive ensuring that the list can't be
783 * modified. Only load_misc_binary() can access but does so
784 * read-only. So we only need to take the write lock when we
785 * actually remove the entry from the list.
786 */
787 if (!list_empty(&e->list))
788 remove_binfmt_handler(e);
789
790 inode_unlock(inode);
791 break;
792 default:
793 return res;
794 }
795
796 return count;
797}
798
799static const struct file_operations bm_entry_operations = {
800 .read = bm_entry_read,
801 .write = bm_entry_write,
802 .llseek = default_llseek,
803};
804
805/* /register */
806
807static ssize_t bm_register_write(struct file *file, const char __user *buffer,
808 size_t count, loff_t *ppos)
809{
810 Node *e;
811 struct inode *inode;
812 struct super_block *sb = file_inode(file)->i_sb;
813 struct dentry *root = sb->s_root, *dentry;
814 int err = 0;
815 struct file *f = NULL;
816
817 e = create_entry(buffer, count);
818
819 if (IS_ERR(e))
820 return PTR_ERR(e);
821
822 if (e->flags & MISC_FMT_OPEN_FILE) {
823 f = open_exec(e->interpreter);
824 if (IS_ERR(f)) {
825 pr_notice("register: failed to install interpreter file %s\n",
826 e->interpreter);
827 kfree(e);
828 return PTR_ERR(f);
829 }
830 e->interp_file = f;
831 }
832
833 inode_lock(d_inode(root));
834 dentry = lookup_one_len(e->name, root, strlen(e->name));
835 err = PTR_ERR(dentry);
836 if (IS_ERR(dentry))
837 goto out;
838
839 err = -EEXIST;
840 if (d_really_is_positive(dentry))
841 goto out2;
842
843 inode = bm_get_inode(sb, S_IFREG | 0644);
844
845 err = -ENOMEM;
846 if (!inode)
847 goto out2;
848
849 refcount_set(&e->users, 1);
850 e->dentry = dget(dentry);
851 inode->i_private = e;
852 inode->i_fop = &bm_entry_operations;
853
854 d_instantiate(dentry, inode);
855 write_lock(&entries_lock);
856 list_add(&e->list, &entries);
857 write_unlock(&entries_lock);
858
859 err = 0;
860out2:
861 dput(dentry);
862out:
863 inode_unlock(d_inode(root));
864
865 if (err) {
866 if (f)
867 filp_close(f, NULL);
868 kfree(e);
869 return err;
870 }
871 return count;
872}
873
874static const struct file_operations bm_register_operations = {
875 .write = bm_register_write,
876 .llseek = noop_llseek,
877};
878
879/* /status */
880
881static ssize_t
882bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
883{
884 char *s = enabled ? "enabled\n" : "disabled\n";
885
886 return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
887}
888
889static ssize_t bm_status_write(struct file *file, const char __user *buffer,
890 size_t count, loff_t *ppos)
891{
892 int res = parse_command(buffer, count);
893 Node *e, *next;
894 struct inode *inode;
895
896 switch (res) {
897 case 1:
898 /* Disable all handlers. */
899 enabled = 0;
900 break;
901 case 2:
902 /* Enable all handlers. */
903 enabled = 1;
904 break;
905 case 3:
906 /* Delete all handlers. */
907 inode = d_inode(file_inode(file)->i_sb->s_root);
908 inode_lock(inode);
909
910 /*
911 * In order to add new element or remove elements from the list
912 * via bm_{entry,register,status}_write() inode_lock() on the
913 * root inode must be held.
914 * The lock is exclusive ensuring that the list can't be
915 * modified. Only load_misc_binary() can access but does so
916 * read-only. So we only need to take the write lock when we
917 * actually remove the entry from the list.
918 */
919 list_for_each_entry_safe(e, next, &entries, list)
920 remove_binfmt_handler(e);
921
922 inode_unlock(inode);
923 break;
924 default:
925 return res;
926 }
927
928 return count;
929}
930
931static const struct file_operations bm_status_operations = {
932 .read = bm_status_read,
933 .write = bm_status_write,
934 .llseek = default_llseek,
935};
936
937/* Superblock handling */
938
939static const struct super_operations s_ops = {
940 .statfs = simple_statfs,
941 .evict_inode = bm_evict_inode,
942};
943
944static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
945{
946 int err;
947 static const struct tree_descr bm_files[] = {
948 [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
949 [3] = {"register", &bm_register_operations, S_IWUSR},
950 /* last one */ {""}
951 };
952
953 err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
954 if (!err)
955 sb->s_op = &s_ops;
956 return err;
957}
958
959static int bm_get_tree(struct fs_context *fc)
960{
961 return get_tree_single(fc, bm_fill_super);
962}
963
964static const struct fs_context_operations bm_context_ops = {
965 .get_tree = bm_get_tree,
966};
967
968static int bm_init_fs_context(struct fs_context *fc)
969{
970 fc->ops = &bm_context_ops;
971 return 0;
972}
973
974static struct linux_binfmt misc_format = {
975 .module = THIS_MODULE,
976 .load_binary = load_misc_binary,
977};
978
979static struct file_system_type bm_fs_type = {
980 .owner = THIS_MODULE,
981 .name = "binfmt_misc",
982 .init_fs_context = bm_init_fs_context,
983 .kill_sb = kill_litter_super,
984};
985MODULE_ALIAS_FS("binfmt_misc");
986
987static int __init init_misc_binfmt(void)
988{
989 int err = register_filesystem(&bm_fs_type);
990 if (!err)
991 insert_binfmt(&misc_format);
992 return err;
993}
994
995static void __exit exit_misc_binfmt(void)
996{
997 unregister_binfmt(&misc_format);
998 unregister_filesystem(&bm_fs_type);
999}
1000
1001core_initcall(init_misc_binfmt);
1002module_exit(exit_misc_binfmt);
1003MODULE_LICENSE("GPL");