b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * User interface for Resource Alloction in Resource Director Technology(RDT) |
| 4 | * |
| 5 | * Copyright (C) 2016 Intel Corporation |
| 6 | * |
| 7 | * Author: Fenghua Yu <fenghua.yu@intel.com> |
| 8 | * |
| 9 | * More information about RDT be found in the Intel (R) x86 Architecture |
| 10 | * Software Developer Manual. |
| 11 | */ |
| 12 | |
| 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 14 | |
| 15 | #include <linux/cacheinfo.h> |
| 16 | #include <linux/cpu.h> |
| 17 | #include <linux/debugfs.h> |
| 18 | #include <linux/fs.h> |
| 19 | #include <linux/fs_parser.h> |
| 20 | #include <linux/sysfs.h> |
| 21 | #include <linux/kernfs.h> |
| 22 | #include <linux/seq_buf.h> |
| 23 | #include <linux/seq_file.h> |
| 24 | #include <linux/sched/signal.h> |
| 25 | #include <linux/sched/task.h> |
| 26 | #include <linux/slab.h> |
| 27 | #include <linux/task_work.h> |
| 28 | #include <linux/user_namespace.h> |
| 29 | |
| 30 | #include <uapi/linux/magic.h> |
| 31 | |
| 32 | #include <asm/resctrl_sched.h> |
| 33 | #include "internal.h" |
| 34 | |
| 35 | DEFINE_STATIC_KEY_FALSE(rdt_enable_key); |
| 36 | DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); |
| 37 | DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); |
| 38 | static struct kernfs_root *rdt_root; |
| 39 | struct rdtgroup rdtgroup_default; |
| 40 | LIST_HEAD(rdt_all_groups); |
| 41 | |
| 42 | /* Kernel fs node for "info" directory under root */ |
| 43 | static struct kernfs_node *kn_info; |
| 44 | |
| 45 | /* Kernel fs node for "mon_groups" directory under root */ |
| 46 | static struct kernfs_node *kn_mongrp; |
| 47 | |
| 48 | /* Kernel fs node for "mon_data" directory under root */ |
| 49 | static struct kernfs_node *kn_mondata; |
| 50 | |
| 51 | static struct seq_buf last_cmd_status; |
| 52 | static char last_cmd_status_buf[512]; |
| 53 | |
| 54 | struct dentry *debugfs_resctrl; |
| 55 | |
| 56 | void rdt_last_cmd_clear(void) |
| 57 | { |
| 58 | lockdep_assert_held(&rdtgroup_mutex); |
| 59 | seq_buf_clear(&last_cmd_status); |
| 60 | } |
| 61 | |
| 62 | void rdt_last_cmd_puts(const char *s) |
| 63 | { |
| 64 | lockdep_assert_held(&rdtgroup_mutex); |
| 65 | seq_buf_puts(&last_cmd_status, s); |
| 66 | } |
| 67 | |
| 68 | void rdt_last_cmd_printf(const char *fmt, ...) |
| 69 | { |
| 70 | va_list ap; |
| 71 | |
| 72 | va_start(ap, fmt); |
| 73 | lockdep_assert_held(&rdtgroup_mutex); |
| 74 | seq_buf_vprintf(&last_cmd_status, fmt, ap); |
| 75 | va_end(ap); |
| 76 | } |
| 77 | |
| 78 | /* |
| 79 | * Trivial allocator for CLOSIDs. Since h/w only supports a small number, |
| 80 | * we can keep a bitmap of free CLOSIDs in a single integer. |
| 81 | * |
| 82 | * Using a global CLOSID across all resources has some advantages and |
| 83 | * some drawbacks: |
| 84 | * + We can simply set "current->closid" to assign a task to a resource |
| 85 | * group. |
| 86 | * + Context switch code can avoid extra memory references deciding which |
| 87 | * CLOSID to load into the PQR_ASSOC MSR |
| 88 | * - We give up some options in configuring resource groups across multi-socket |
| 89 | * systems. |
| 90 | * - Our choices on how to configure each resource become progressively more |
| 91 | * limited as the number of resources grows. |
| 92 | */ |
| 93 | static int closid_free_map; |
| 94 | static int closid_free_map_len; |
| 95 | |
| 96 | int closids_supported(void) |
| 97 | { |
| 98 | return closid_free_map_len; |
| 99 | } |
| 100 | |
| 101 | static void closid_init(void) |
| 102 | { |
| 103 | struct rdt_resource *r; |
| 104 | int rdt_min_closid = 32; |
| 105 | |
| 106 | /* Compute rdt_min_closid across all resources */ |
| 107 | for_each_alloc_enabled_rdt_resource(r) |
| 108 | rdt_min_closid = min(rdt_min_closid, r->num_closid); |
| 109 | |
| 110 | closid_free_map = BIT_MASK(rdt_min_closid) - 1; |
| 111 | |
| 112 | /* CLOSID 0 is always reserved for the default group */ |
| 113 | closid_free_map &= ~1; |
| 114 | closid_free_map_len = rdt_min_closid; |
| 115 | } |
| 116 | |
| 117 | static int closid_alloc(void) |
| 118 | { |
| 119 | u32 closid = ffs(closid_free_map); |
| 120 | |
| 121 | if (closid == 0) |
| 122 | return -ENOSPC; |
| 123 | closid--; |
| 124 | closid_free_map &= ~(1 << closid); |
| 125 | |
| 126 | return closid; |
| 127 | } |
| 128 | |
| 129 | void closid_free(int closid) |
| 130 | { |
| 131 | closid_free_map |= 1 << closid; |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * closid_allocated - test if provided closid is in use |
| 136 | * @closid: closid to be tested |
| 137 | * |
| 138 | * Return: true if @closid is currently associated with a resource group, |
| 139 | * false if @closid is free |
| 140 | */ |
| 141 | static bool closid_allocated(unsigned int closid) |
| 142 | { |
| 143 | return (closid_free_map & (1 << closid)) == 0; |
| 144 | } |
| 145 | |
| 146 | /** |
| 147 | * rdtgroup_mode_by_closid - Return mode of resource group with closid |
| 148 | * @closid: closid if the resource group |
| 149 | * |
| 150 | * Each resource group is associated with a @closid. Here the mode |
| 151 | * of a resource group can be queried by searching for it using its closid. |
| 152 | * |
| 153 | * Return: mode as &enum rdtgrp_mode of resource group with closid @closid |
| 154 | */ |
| 155 | enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) |
| 156 | { |
| 157 | struct rdtgroup *rdtgrp; |
| 158 | |
| 159 | list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { |
| 160 | if (rdtgrp->closid == closid) |
| 161 | return rdtgrp->mode; |
| 162 | } |
| 163 | |
| 164 | return RDT_NUM_MODES; |
| 165 | } |
| 166 | |
| 167 | static const char * const rdt_mode_str[] = { |
| 168 | [RDT_MODE_SHAREABLE] = "shareable", |
| 169 | [RDT_MODE_EXCLUSIVE] = "exclusive", |
| 170 | [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", |
| 171 | [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", |
| 172 | }; |
| 173 | |
| 174 | /** |
| 175 | * rdtgroup_mode_str - Return the string representation of mode |
| 176 | * @mode: the resource group mode as &enum rdtgroup_mode |
| 177 | * |
| 178 | * Return: string representation of valid mode, "unknown" otherwise |
| 179 | */ |
| 180 | static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) |
| 181 | { |
| 182 | if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) |
| 183 | return "unknown"; |
| 184 | |
| 185 | return rdt_mode_str[mode]; |
| 186 | } |
| 187 | |
| 188 | /* set uid and gid of rdtgroup dirs and files to that of the creator */ |
| 189 | static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) |
| 190 | { |
| 191 | struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, |
| 192 | .ia_uid = current_fsuid(), |
| 193 | .ia_gid = current_fsgid(), }; |
| 194 | |
| 195 | if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && |
| 196 | gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) |
| 197 | return 0; |
| 198 | |
| 199 | return kernfs_setattr(kn, &iattr); |
| 200 | } |
| 201 | |
| 202 | static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) |
| 203 | { |
| 204 | struct kernfs_node *kn; |
| 205 | int ret; |
| 206 | |
| 207 | kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, |
| 208 | GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, |
| 209 | 0, rft->kf_ops, rft, NULL, NULL); |
| 210 | if (IS_ERR(kn)) |
| 211 | return PTR_ERR(kn); |
| 212 | |
| 213 | ret = rdtgroup_kn_set_ugid(kn); |
| 214 | if (ret) { |
| 215 | kernfs_remove(kn); |
| 216 | return ret; |
| 217 | } |
| 218 | |
| 219 | return 0; |
| 220 | } |
| 221 | |
| 222 | static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) |
| 223 | { |
| 224 | struct kernfs_open_file *of = m->private; |
| 225 | struct rftype *rft = of->kn->priv; |
| 226 | |
| 227 | if (rft->seq_show) |
| 228 | return rft->seq_show(of, m, arg); |
| 229 | return 0; |
| 230 | } |
| 231 | |
| 232 | static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, |
| 233 | size_t nbytes, loff_t off) |
| 234 | { |
| 235 | struct rftype *rft = of->kn->priv; |
| 236 | |
| 237 | if (rft->write) |
| 238 | return rft->write(of, buf, nbytes, off); |
| 239 | |
| 240 | return -EINVAL; |
| 241 | } |
| 242 | |
| 243 | static struct kernfs_ops rdtgroup_kf_single_ops = { |
| 244 | .atomic_write_len = PAGE_SIZE, |
| 245 | .write = rdtgroup_file_write, |
| 246 | .seq_show = rdtgroup_seqfile_show, |
| 247 | }; |
| 248 | |
| 249 | static struct kernfs_ops kf_mondata_ops = { |
| 250 | .atomic_write_len = PAGE_SIZE, |
| 251 | .seq_show = rdtgroup_mondata_show, |
| 252 | }; |
| 253 | |
| 254 | static bool is_cpu_list(struct kernfs_open_file *of) |
| 255 | { |
| 256 | struct rftype *rft = of->kn->priv; |
| 257 | |
| 258 | return rft->flags & RFTYPE_FLAGS_CPUS_LIST; |
| 259 | } |
| 260 | |
| 261 | static int rdtgroup_cpus_show(struct kernfs_open_file *of, |
| 262 | struct seq_file *s, void *v) |
| 263 | { |
| 264 | struct rdtgroup *rdtgrp; |
| 265 | struct cpumask *mask; |
| 266 | int ret = 0; |
| 267 | |
| 268 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 269 | |
| 270 | if (rdtgrp) { |
| 271 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { |
| 272 | if (!rdtgrp->plr->d) { |
| 273 | rdt_last_cmd_clear(); |
| 274 | rdt_last_cmd_puts("Cache domain offline\n"); |
| 275 | ret = -ENODEV; |
| 276 | } else { |
| 277 | mask = &rdtgrp->plr->d->cpu_mask; |
| 278 | seq_printf(s, is_cpu_list(of) ? |
| 279 | "%*pbl\n" : "%*pb\n", |
| 280 | cpumask_pr_args(mask)); |
| 281 | } |
| 282 | } else { |
| 283 | seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", |
| 284 | cpumask_pr_args(&rdtgrp->cpu_mask)); |
| 285 | } |
| 286 | } else { |
| 287 | ret = -ENOENT; |
| 288 | } |
| 289 | rdtgroup_kn_unlock(of->kn); |
| 290 | |
| 291 | return ret; |
| 292 | } |
| 293 | |
| 294 | /* |
| 295 | * This is safe against resctrl_sched_in() called from __switch_to() |
| 296 | * because __switch_to() is executed with interrupts disabled. A local call |
| 297 | * from update_closid_rmid() is proteced against __switch_to() because |
| 298 | * preemption is disabled. |
| 299 | */ |
| 300 | static void update_cpu_closid_rmid(void *info) |
| 301 | { |
| 302 | struct rdtgroup *r = info; |
| 303 | |
| 304 | if (r) { |
| 305 | this_cpu_write(pqr_state.default_closid, r->closid); |
| 306 | this_cpu_write(pqr_state.default_rmid, r->mon.rmid); |
| 307 | } |
| 308 | |
| 309 | /* |
| 310 | * We cannot unconditionally write the MSR because the current |
| 311 | * executing task might have its own closid selected. Just reuse |
| 312 | * the context switch code. |
| 313 | */ |
| 314 | resctrl_sched_in(current); |
| 315 | } |
| 316 | |
| 317 | /* |
| 318 | * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, |
| 319 | * |
| 320 | * Per task closids/rmids must have been set up before calling this function. |
| 321 | */ |
| 322 | static void |
| 323 | update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) |
| 324 | { |
| 325 | int cpu = get_cpu(); |
| 326 | |
| 327 | if (cpumask_test_cpu(cpu, cpu_mask)) |
| 328 | update_cpu_closid_rmid(r); |
| 329 | smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1); |
| 330 | put_cpu(); |
| 331 | } |
| 332 | |
| 333 | static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, |
| 334 | cpumask_var_t tmpmask) |
| 335 | { |
| 336 | struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; |
| 337 | struct list_head *head; |
| 338 | |
| 339 | /* Check whether cpus belong to parent ctrl group */ |
| 340 | cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); |
| 341 | if (cpumask_weight(tmpmask)) { |
| 342 | rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); |
| 343 | return -EINVAL; |
| 344 | } |
| 345 | |
| 346 | /* Check whether cpus are dropped from this group */ |
| 347 | cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); |
| 348 | if (cpumask_weight(tmpmask)) { |
| 349 | /* Give any dropped cpus to parent rdtgroup */ |
| 350 | cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); |
| 351 | update_closid_rmid(tmpmask, prgrp); |
| 352 | } |
| 353 | |
| 354 | /* |
| 355 | * If we added cpus, remove them from previous group that owned them |
| 356 | * and update per-cpu rmid |
| 357 | */ |
| 358 | cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); |
| 359 | if (cpumask_weight(tmpmask)) { |
| 360 | head = &prgrp->mon.crdtgrp_list; |
| 361 | list_for_each_entry(crgrp, head, mon.crdtgrp_list) { |
| 362 | if (crgrp == rdtgrp) |
| 363 | continue; |
| 364 | cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, |
| 365 | tmpmask); |
| 366 | } |
| 367 | update_closid_rmid(tmpmask, rdtgrp); |
| 368 | } |
| 369 | |
| 370 | /* Done pushing/pulling - update this group with new mask */ |
| 371 | cpumask_copy(&rdtgrp->cpu_mask, newmask); |
| 372 | |
| 373 | return 0; |
| 374 | } |
| 375 | |
| 376 | static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) |
| 377 | { |
| 378 | struct rdtgroup *crgrp; |
| 379 | |
| 380 | cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); |
| 381 | /* update the child mon group masks as well*/ |
| 382 | list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) |
| 383 | cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); |
| 384 | } |
| 385 | |
| 386 | static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, |
| 387 | cpumask_var_t tmpmask, cpumask_var_t tmpmask1) |
| 388 | { |
| 389 | struct rdtgroup *r, *crgrp; |
| 390 | struct list_head *head; |
| 391 | |
| 392 | /* Check whether cpus are dropped from this group */ |
| 393 | cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); |
| 394 | if (cpumask_weight(tmpmask)) { |
| 395 | /* Can't drop from default group */ |
| 396 | if (rdtgrp == &rdtgroup_default) { |
| 397 | rdt_last_cmd_puts("Can't drop CPUs from default group\n"); |
| 398 | return -EINVAL; |
| 399 | } |
| 400 | |
| 401 | /* Give any dropped cpus to rdtgroup_default */ |
| 402 | cpumask_or(&rdtgroup_default.cpu_mask, |
| 403 | &rdtgroup_default.cpu_mask, tmpmask); |
| 404 | update_closid_rmid(tmpmask, &rdtgroup_default); |
| 405 | } |
| 406 | |
| 407 | /* |
| 408 | * If we added cpus, remove them from previous group and |
| 409 | * the prev group's child groups that owned them |
| 410 | * and update per-cpu closid/rmid. |
| 411 | */ |
| 412 | cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); |
| 413 | if (cpumask_weight(tmpmask)) { |
| 414 | list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { |
| 415 | if (r == rdtgrp) |
| 416 | continue; |
| 417 | cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); |
| 418 | if (cpumask_weight(tmpmask1)) |
| 419 | cpumask_rdtgrp_clear(r, tmpmask1); |
| 420 | } |
| 421 | update_closid_rmid(tmpmask, rdtgrp); |
| 422 | } |
| 423 | |
| 424 | /* Done pushing/pulling - update this group with new mask */ |
| 425 | cpumask_copy(&rdtgrp->cpu_mask, newmask); |
| 426 | |
| 427 | /* |
| 428 | * Clear child mon group masks since there is a new parent mask |
| 429 | * now and update the rmid for the cpus the child lost. |
| 430 | */ |
| 431 | head = &rdtgrp->mon.crdtgrp_list; |
| 432 | list_for_each_entry(crgrp, head, mon.crdtgrp_list) { |
| 433 | cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); |
| 434 | update_closid_rmid(tmpmask, rdtgrp); |
| 435 | cpumask_clear(&crgrp->cpu_mask); |
| 436 | } |
| 437 | |
| 438 | return 0; |
| 439 | } |
| 440 | |
| 441 | static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, |
| 442 | char *buf, size_t nbytes, loff_t off) |
| 443 | { |
| 444 | cpumask_var_t tmpmask, newmask, tmpmask1; |
| 445 | struct rdtgroup *rdtgrp; |
| 446 | int ret; |
| 447 | |
| 448 | if (!buf) |
| 449 | return -EINVAL; |
| 450 | |
| 451 | if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) |
| 452 | return -ENOMEM; |
| 453 | if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { |
| 454 | free_cpumask_var(tmpmask); |
| 455 | return -ENOMEM; |
| 456 | } |
| 457 | if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { |
| 458 | free_cpumask_var(tmpmask); |
| 459 | free_cpumask_var(newmask); |
| 460 | return -ENOMEM; |
| 461 | } |
| 462 | |
| 463 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 464 | if (!rdtgrp) { |
| 465 | ret = -ENOENT; |
| 466 | goto unlock; |
| 467 | } |
| 468 | |
| 469 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || |
| 470 | rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
| 471 | ret = -EINVAL; |
| 472 | rdt_last_cmd_puts("Pseudo-locking in progress\n"); |
| 473 | goto unlock; |
| 474 | } |
| 475 | |
| 476 | if (is_cpu_list(of)) |
| 477 | ret = cpulist_parse(buf, newmask); |
| 478 | else |
| 479 | ret = cpumask_parse(buf, newmask); |
| 480 | |
| 481 | if (ret) { |
| 482 | rdt_last_cmd_puts("Bad CPU list/mask\n"); |
| 483 | goto unlock; |
| 484 | } |
| 485 | |
| 486 | /* check that user didn't specify any offline cpus */ |
| 487 | cpumask_andnot(tmpmask, newmask, cpu_online_mask); |
| 488 | if (cpumask_weight(tmpmask)) { |
| 489 | ret = -EINVAL; |
| 490 | rdt_last_cmd_puts("Can only assign online CPUs\n"); |
| 491 | goto unlock; |
| 492 | } |
| 493 | |
| 494 | if (rdtgrp->type == RDTCTRL_GROUP) |
| 495 | ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); |
| 496 | else if (rdtgrp->type == RDTMON_GROUP) |
| 497 | ret = cpus_mon_write(rdtgrp, newmask, tmpmask); |
| 498 | else |
| 499 | ret = -EINVAL; |
| 500 | |
| 501 | unlock: |
| 502 | rdtgroup_kn_unlock(of->kn); |
| 503 | free_cpumask_var(tmpmask); |
| 504 | free_cpumask_var(newmask); |
| 505 | free_cpumask_var(tmpmask1); |
| 506 | |
| 507 | return ret ?: nbytes; |
| 508 | } |
| 509 | |
| 510 | /** |
| 511 | * rdtgroup_remove - the helper to remove resource group safely |
| 512 | * @rdtgrp: resource group to remove |
| 513 | * |
| 514 | * On resource group creation via a mkdir, an extra kernfs_node reference is |
| 515 | * taken to ensure that the rdtgroup structure remains accessible for the |
| 516 | * rdtgroup_kn_unlock() calls where it is removed. |
| 517 | * |
| 518 | * Drop the extra reference here, then free the rdtgroup structure. |
| 519 | * |
| 520 | * Return: void |
| 521 | */ |
| 522 | static void rdtgroup_remove(struct rdtgroup *rdtgrp) |
| 523 | { |
| 524 | kernfs_put(rdtgrp->kn); |
| 525 | kfree(rdtgrp); |
| 526 | } |
| 527 | |
| 528 | static void _update_task_closid_rmid(void *task) |
| 529 | { |
| 530 | /* |
| 531 | * If the task is still current on this CPU, update PQR_ASSOC MSR. |
| 532 | * Otherwise, the MSR is updated when the task is scheduled in. |
| 533 | */ |
| 534 | if (task == current) |
| 535 | resctrl_sched_in(task); |
| 536 | } |
| 537 | |
| 538 | static void update_task_closid_rmid(struct task_struct *t) |
| 539 | { |
| 540 | if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) |
| 541 | smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); |
| 542 | else |
| 543 | _update_task_closid_rmid(t); |
| 544 | } |
| 545 | |
| 546 | static int __rdtgroup_move_task(struct task_struct *tsk, |
| 547 | struct rdtgroup *rdtgrp) |
| 548 | { |
| 549 | /* If the task is already in rdtgrp, no need to move the task. */ |
| 550 | if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && |
| 551 | tsk->rmid == rdtgrp->mon.rmid) || |
| 552 | (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && |
| 553 | tsk->closid == rdtgrp->mon.parent->closid)) |
| 554 | return 0; |
| 555 | |
| 556 | /* |
| 557 | * Set the task's closid/rmid before the PQR_ASSOC MSR can be |
| 558 | * updated by them. |
| 559 | * |
| 560 | * For ctrl_mon groups, move both closid and rmid. |
| 561 | * For monitor groups, can move the tasks only from |
| 562 | * their parent CTRL group. |
| 563 | */ |
| 564 | |
| 565 | if (rdtgrp->type == RDTCTRL_GROUP) { |
| 566 | WRITE_ONCE(tsk->closid, rdtgrp->closid); |
| 567 | WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid); |
| 568 | } else if (rdtgrp->type == RDTMON_GROUP) { |
| 569 | if (rdtgrp->mon.parent->closid == tsk->closid) { |
| 570 | WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid); |
| 571 | } else { |
| 572 | rdt_last_cmd_puts("Can't move task to different control group\n"); |
| 573 | return -EINVAL; |
| 574 | } |
| 575 | } |
| 576 | |
| 577 | /* |
| 578 | * Ensure the task's closid and rmid are written before determining if |
| 579 | * the task is current that will decide if it will be interrupted. |
| 580 | * This pairs with the full barrier between the rq->curr update and |
| 581 | * resctrl_sched_in() during context switch. |
| 582 | */ |
| 583 | smp_mb(); |
| 584 | |
| 585 | /* |
| 586 | * By now, the task's closid and rmid are set. If the task is current |
| 587 | * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource |
| 588 | * group go into effect. If the task is not current, the MSR will be |
| 589 | * updated when the task is scheduled in. |
| 590 | */ |
| 591 | update_task_closid_rmid(tsk); |
| 592 | |
| 593 | return 0; |
| 594 | } |
| 595 | |
| 596 | static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) |
| 597 | { |
| 598 | return (rdt_alloc_capable && |
| 599 | (r->type == RDTCTRL_GROUP) && (t->closid == r->closid)); |
| 600 | } |
| 601 | |
| 602 | static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) |
| 603 | { |
| 604 | return (rdt_mon_capable && |
| 605 | (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid)); |
| 606 | } |
| 607 | |
| 608 | /** |
| 609 | * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group |
| 610 | * @r: Resource group |
| 611 | * |
| 612 | * Return: 1 if tasks have been assigned to @r, 0 otherwise |
| 613 | */ |
| 614 | int rdtgroup_tasks_assigned(struct rdtgroup *r) |
| 615 | { |
| 616 | struct task_struct *p, *t; |
| 617 | int ret = 0; |
| 618 | |
| 619 | lockdep_assert_held(&rdtgroup_mutex); |
| 620 | |
| 621 | rcu_read_lock(); |
| 622 | for_each_process_thread(p, t) { |
| 623 | if (is_closid_match(t, r) || is_rmid_match(t, r)) { |
| 624 | ret = 1; |
| 625 | break; |
| 626 | } |
| 627 | } |
| 628 | rcu_read_unlock(); |
| 629 | |
| 630 | return ret; |
| 631 | } |
| 632 | |
| 633 | static int rdtgroup_task_write_permission(struct task_struct *task, |
| 634 | struct kernfs_open_file *of) |
| 635 | { |
| 636 | const struct cred *tcred = get_task_cred(task); |
| 637 | const struct cred *cred = current_cred(); |
| 638 | int ret = 0; |
| 639 | |
| 640 | /* |
| 641 | * Even if we're attaching all tasks in the thread group, we only |
| 642 | * need to check permissions on one of them. |
| 643 | */ |
| 644 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && |
| 645 | !uid_eq(cred->euid, tcred->uid) && |
| 646 | !uid_eq(cred->euid, tcred->suid)) { |
| 647 | rdt_last_cmd_printf("No permission to move task %d\n", task->pid); |
| 648 | ret = -EPERM; |
| 649 | } |
| 650 | |
| 651 | put_cred(tcred); |
| 652 | return ret; |
| 653 | } |
| 654 | |
| 655 | static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, |
| 656 | struct kernfs_open_file *of) |
| 657 | { |
| 658 | struct task_struct *tsk; |
| 659 | int ret; |
| 660 | |
| 661 | rcu_read_lock(); |
| 662 | if (pid) { |
| 663 | tsk = find_task_by_vpid(pid); |
| 664 | if (!tsk) { |
| 665 | rcu_read_unlock(); |
| 666 | rdt_last_cmd_printf("No task %d\n", pid); |
| 667 | return -ESRCH; |
| 668 | } |
| 669 | } else { |
| 670 | tsk = current; |
| 671 | } |
| 672 | |
| 673 | get_task_struct(tsk); |
| 674 | rcu_read_unlock(); |
| 675 | |
| 676 | ret = rdtgroup_task_write_permission(tsk, of); |
| 677 | if (!ret) |
| 678 | ret = __rdtgroup_move_task(tsk, rdtgrp); |
| 679 | |
| 680 | put_task_struct(tsk); |
| 681 | return ret; |
| 682 | } |
| 683 | |
| 684 | static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, |
| 685 | char *buf, size_t nbytes, loff_t off) |
| 686 | { |
| 687 | struct rdtgroup *rdtgrp; |
| 688 | int ret = 0; |
| 689 | pid_t pid; |
| 690 | |
| 691 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) |
| 692 | return -EINVAL; |
| 693 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 694 | if (!rdtgrp) { |
| 695 | rdtgroup_kn_unlock(of->kn); |
| 696 | return -ENOENT; |
| 697 | } |
| 698 | rdt_last_cmd_clear(); |
| 699 | |
| 700 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || |
| 701 | rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
| 702 | ret = -EINVAL; |
| 703 | rdt_last_cmd_puts("Pseudo-locking in progress\n"); |
| 704 | goto unlock; |
| 705 | } |
| 706 | |
| 707 | ret = rdtgroup_move_task(pid, rdtgrp, of); |
| 708 | |
| 709 | unlock: |
| 710 | rdtgroup_kn_unlock(of->kn); |
| 711 | |
| 712 | return ret ?: nbytes; |
| 713 | } |
| 714 | |
| 715 | static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) |
| 716 | { |
| 717 | struct task_struct *p, *t; |
| 718 | pid_t pid; |
| 719 | |
| 720 | rcu_read_lock(); |
| 721 | for_each_process_thread(p, t) { |
| 722 | if (is_closid_match(t, r) || is_rmid_match(t, r)) { |
| 723 | pid = task_pid_vnr(t); |
| 724 | if (pid) |
| 725 | seq_printf(s, "%d\n", pid); |
| 726 | } |
| 727 | } |
| 728 | rcu_read_unlock(); |
| 729 | } |
| 730 | |
| 731 | static int rdtgroup_tasks_show(struct kernfs_open_file *of, |
| 732 | struct seq_file *s, void *v) |
| 733 | { |
| 734 | struct rdtgroup *rdtgrp; |
| 735 | int ret = 0; |
| 736 | |
| 737 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 738 | if (rdtgrp) |
| 739 | show_rdt_tasks(rdtgrp, s); |
| 740 | else |
| 741 | ret = -ENOENT; |
| 742 | rdtgroup_kn_unlock(of->kn); |
| 743 | |
| 744 | return ret; |
| 745 | } |
| 746 | |
| 747 | static int rdt_last_cmd_status_show(struct kernfs_open_file *of, |
| 748 | struct seq_file *seq, void *v) |
| 749 | { |
| 750 | int len; |
| 751 | |
| 752 | mutex_lock(&rdtgroup_mutex); |
| 753 | len = seq_buf_used(&last_cmd_status); |
| 754 | if (len) |
| 755 | seq_printf(seq, "%.*s", len, last_cmd_status_buf); |
| 756 | else |
| 757 | seq_puts(seq, "ok\n"); |
| 758 | mutex_unlock(&rdtgroup_mutex); |
| 759 | return 0; |
| 760 | } |
| 761 | |
| 762 | static int rdt_num_closids_show(struct kernfs_open_file *of, |
| 763 | struct seq_file *seq, void *v) |
| 764 | { |
| 765 | struct rdt_resource *r = of->kn->parent->priv; |
| 766 | |
| 767 | seq_printf(seq, "%d\n", r->num_closid); |
| 768 | return 0; |
| 769 | } |
| 770 | |
| 771 | static int rdt_default_ctrl_show(struct kernfs_open_file *of, |
| 772 | struct seq_file *seq, void *v) |
| 773 | { |
| 774 | struct rdt_resource *r = of->kn->parent->priv; |
| 775 | |
| 776 | seq_printf(seq, "%x\n", r->default_ctrl); |
| 777 | return 0; |
| 778 | } |
| 779 | |
| 780 | static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, |
| 781 | struct seq_file *seq, void *v) |
| 782 | { |
| 783 | struct rdt_resource *r = of->kn->parent->priv; |
| 784 | |
| 785 | seq_printf(seq, "%u\n", r->cache.min_cbm_bits); |
| 786 | return 0; |
| 787 | } |
| 788 | |
| 789 | static int rdt_shareable_bits_show(struct kernfs_open_file *of, |
| 790 | struct seq_file *seq, void *v) |
| 791 | { |
| 792 | struct rdt_resource *r = of->kn->parent->priv; |
| 793 | |
| 794 | seq_printf(seq, "%x\n", r->cache.shareable_bits); |
| 795 | return 0; |
| 796 | } |
| 797 | |
| 798 | /** |
| 799 | * rdt_bit_usage_show - Display current usage of resources |
| 800 | * |
| 801 | * A domain is a shared resource that can now be allocated differently. Here |
| 802 | * we display the current regions of the domain as an annotated bitmask. |
| 803 | * For each domain of this resource its allocation bitmask |
| 804 | * is annotated as below to indicate the current usage of the corresponding bit: |
| 805 | * 0 - currently unused |
| 806 | * X - currently available for sharing and used by software and hardware |
| 807 | * H - currently used by hardware only but available for software use |
| 808 | * S - currently used and shareable by software only |
| 809 | * E - currently used exclusively by one resource group |
| 810 | * P - currently pseudo-locked by one resource group |
| 811 | */ |
| 812 | static int rdt_bit_usage_show(struct kernfs_open_file *of, |
| 813 | struct seq_file *seq, void *v) |
| 814 | { |
| 815 | struct rdt_resource *r = of->kn->parent->priv; |
| 816 | /* |
| 817 | * Use unsigned long even though only 32 bits are used to ensure |
| 818 | * test_bit() is used safely. |
| 819 | */ |
| 820 | unsigned long sw_shareable = 0, hw_shareable = 0; |
| 821 | unsigned long exclusive = 0, pseudo_locked = 0; |
| 822 | struct rdt_domain *dom; |
| 823 | int i, hwb, swb, excl, psl; |
| 824 | enum rdtgrp_mode mode; |
| 825 | bool sep = false; |
| 826 | u32 *ctrl; |
| 827 | |
| 828 | mutex_lock(&rdtgroup_mutex); |
| 829 | hw_shareable = r->cache.shareable_bits; |
| 830 | list_for_each_entry(dom, &r->domains, list) { |
| 831 | if (sep) |
| 832 | seq_putc(seq, ';'); |
| 833 | ctrl = dom->ctrl_val; |
| 834 | sw_shareable = 0; |
| 835 | exclusive = 0; |
| 836 | seq_printf(seq, "%d=", dom->id); |
| 837 | for (i = 0; i < closids_supported(); i++, ctrl++) { |
| 838 | if (!closid_allocated(i)) |
| 839 | continue; |
| 840 | mode = rdtgroup_mode_by_closid(i); |
| 841 | switch (mode) { |
| 842 | case RDT_MODE_SHAREABLE: |
| 843 | sw_shareable |= *ctrl; |
| 844 | break; |
| 845 | case RDT_MODE_EXCLUSIVE: |
| 846 | exclusive |= *ctrl; |
| 847 | break; |
| 848 | case RDT_MODE_PSEUDO_LOCKSETUP: |
| 849 | /* |
| 850 | * RDT_MODE_PSEUDO_LOCKSETUP is possible |
| 851 | * here but not included since the CBM |
| 852 | * associated with this CLOSID in this mode |
| 853 | * is not initialized and no task or cpu can be |
| 854 | * assigned this CLOSID. |
| 855 | */ |
| 856 | break; |
| 857 | case RDT_MODE_PSEUDO_LOCKED: |
| 858 | case RDT_NUM_MODES: |
| 859 | WARN(1, |
| 860 | "invalid mode for closid %d\n", i); |
| 861 | break; |
| 862 | } |
| 863 | } |
| 864 | for (i = r->cache.cbm_len - 1; i >= 0; i--) { |
| 865 | pseudo_locked = dom->plr ? dom->plr->cbm : 0; |
| 866 | hwb = test_bit(i, &hw_shareable); |
| 867 | swb = test_bit(i, &sw_shareable); |
| 868 | excl = test_bit(i, &exclusive); |
| 869 | psl = test_bit(i, &pseudo_locked); |
| 870 | if (hwb && swb) |
| 871 | seq_putc(seq, 'X'); |
| 872 | else if (hwb && !swb) |
| 873 | seq_putc(seq, 'H'); |
| 874 | else if (!hwb && swb) |
| 875 | seq_putc(seq, 'S'); |
| 876 | else if (excl) |
| 877 | seq_putc(seq, 'E'); |
| 878 | else if (psl) |
| 879 | seq_putc(seq, 'P'); |
| 880 | else /* Unused bits remain */ |
| 881 | seq_putc(seq, '0'); |
| 882 | } |
| 883 | sep = true; |
| 884 | } |
| 885 | seq_putc(seq, '\n'); |
| 886 | mutex_unlock(&rdtgroup_mutex); |
| 887 | return 0; |
| 888 | } |
| 889 | |
| 890 | static int rdt_min_bw_show(struct kernfs_open_file *of, |
| 891 | struct seq_file *seq, void *v) |
| 892 | { |
| 893 | struct rdt_resource *r = of->kn->parent->priv; |
| 894 | |
| 895 | seq_printf(seq, "%u\n", r->membw.min_bw); |
| 896 | return 0; |
| 897 | } |
| 898 | |
| 899 | static int rdt_num_rmids_show(struct kernfs_open_file *of, |
| 900 | struct seq_file *seq, void *v) |
| 901 | { |
| 902 | struct rdt_resource *r = of->kn->parent->priv; |
| 903 | |
| 904 | seq_printf(seq, "%d\n", r->num_rmid); |
| 905 | |
| 906 | return 0; |
| 907 | } |
| 908 | |
| 909 | static int rdt_mon_features_show(struct kernfs_open_file *of, |
| 910 | struct seq_file *seq, void *v) |
| 911 | { |
| 912 | struct rdt_resource *r = of->kn->parent->priv; |
| 913 | struct mon_evt *mevt; |
| 914 | |
| 915 | list_for_each_entry(mevt, &r->evt_list, list) |
| 916 | seq_printf(seq, "%s\n", mevt->name); |
| 917 | |
| 918 | return 0; |
| 919 | } |
| 920 | |
| 921 | static int rdt_bw_gran_show(struct kernfs_open_file *of, |
| 922 | struct seq_file *seq, void *v) |
| 923 | { |
| 924 | struct rdt_resource *r = of->kn->parent->priv; |
| 925 | |
| 926 | seq_printf(seq, "%u\n", r->membw.bw_gran); |
| 927 | return 0; |
| 928 | } |
| 929 | |
| 930 | static int rdt_delay_linear_show(struct kernfs_open_file *of, |
| 931 | struct seq_file *seq, void *v) |
| 932 | { |
| 933 | struct rdt_resource *r = of->kn->parent->priv; |
| 934 | |
| 935 | seq_printf(seq, "%u\n", r->membw.delay_linear); |
| 936 | return 0; |
| 937 | } |
| 938 | |
| 939 | static int max_threshold_occ_show(struct kernfs_open_file *of, |
| 940 | struct seq_file *seq, void *v) |
| 941 | { |
| 942 | struct rdt_resource *r = of->kn->parent->priv; |
| 943 | |
| 944 | seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale); |
| 945 | |
| 946 | return 0; |
| 947 | } |
| 948 | |
| 949 | static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, |
| 950 | char *buf, size_t nbytes, loff_t off) |
| 951 | { |
| 952 | struct rdt_resource *r = of->kn->parent->priv; |
| 953 | unsigned int bytes; |
| 954 | int ret; |
| 955 | |
| 956 | ret = kstrtouint(buf, 0, &bytes); |
| 957 | if (ret) |
| 958 | return ret; |
| 959 | |
| 960 | if (bytes > (boot_cpu_data.x86_cache_size * 1024)) |
| 961 | return -EINVAL; |
| 962 | |
| 963 | resctrl_cqm_threshold = bytes / r->mon_scale; |
| 964 | |
| 965 | return nbytes; |
| 966 | } |
| 967 | |
| 968 | /* |
| 969 | * rdtgroup_mode_show - Display mode of this resource group |
| 970 | */ |
| 971 | static int rdtgroup_mode_show(struct kernfs_open_file *of, |
| 972 | struct seq_file *s, void *v) |
| 973 | { |
| 974 | struct rdtgroup *rdtgrp; |
| 975 | |
| 976 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 977 | if (!rdtgrp) { |
| 978 | rdtgroup_kn_unlock(of->kn); |
| 979 | return -ENOENT; |
| 980 | } |
| 981 | |
| 982 | seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); |
| 983 | |
| 984 | rdtgroup_kn_unlock(of->kn); |
| 985 | return 0; |
| 986 | } |
| 987 | |
| 988 | /** |
| 989 | * rdt_cdp_peer_get - Retrieve CDP peer if it exists |
| 990 | * @r: RDT resource to which RDT domain @d belongs |
| 991 | * @d: Cache instance for which a CDP peer is requested |
| 992 | * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer) |
| 993 | * Used to return the result. |
| 994 | * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer) |
| 995 | * Used to return the result. |
| 996 | * |
| 997 | * RDT resources are managed independently and by extension the RDT domains |
| 998 | * (RDT resource instances) are managed independently also. The Code and |
| 999 | * Data Prioritization (CDP) RDT resources, while managed independently, |
| 1000 | * could refer to the same underlying hardware. For example, |
| 1001 | * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache. |
| 1002 | * |
| 1003 | * When provided with an RDT resource @r and an instance of that RDT |
| 1004 | * resource @d rdt_cdp_peer_get() will return if there is a peer RDT |
| 1005 | * resource and the exact instance that shares the same hardware. |
| 1006 | * |
| 1007 | * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists. |
| 1008 | * If a CDP peer was found, @r_cdp will point to the peer RDT resource |
| 1009 | * and @d_cdp will point to the peer RDT domain. |
| 1010 | */ |
| 1011 | static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, |
| 1012 | struct rdt_resource **r_cdp, |
| 1013 | struct rdt_domain **d_cdp) |
| 1014 | { |
| 1015 | struct rdt_resource *_r_cdp = NULL; |
| 1016 | struct rdt_domain *_d_cdp = NULL; |
| 1017 | int ret = 0; |
| 1018 | |
| 1019 | switch (r->rid) { |
| 1020 | case RDT_RESOURCE_L3DATA: |
| 1021 | _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE]; |
| 1022 | break; |
| 1023 | case RDT_RESOURCE_L3CODE: |
| 1024 | _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA]; |
| 1025 | break; |
| 1026 | case RDT_RESOURCE_L2DATA: |
| 1027 | _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE]; |
| 1028 | break; |
| 1029 | case RDT_RESOURCE_L2CODE: |
| 1030 | _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA]; |
| 1031 | break; |
| 1032 | default: |
| 1033 | ret = -ENOENT; |
| 1034 | goto out; |
| 1035 | } |
| 1036 | |
| 1037 | /* |
| 1038 | * When a new CPU comes online and CDP is enabled then the new |
| 1039 | * RDT domains (if any) associated with both CDP RDT resources |
| 1040 | * are added in the same CPU online routine while the |
| 1041 | * rdtgroup_mutex is held. It should thus not happen for one |
| 1042 | * RDT domain to exist and be associated with its RDT CDP |
| 1043 | * resource but there is no RDT domain associated with the |
| 1044 | * peer RDT CDP resource. Hence the WARN. |
| 1045 | */ |
| 1046 | _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); |
| 1047 | if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { |
| 1048 | _r_cdp = NULL; |
| 1049 | _d_cdp = NULL; |
| 1050 | ret = -EINVAL; |
| 1051 | } |
| 1052 | |
| 1053 | out: |
| 1054 | *r_cdp = _r_cdp; |
| 1055 | *d_cdp = _d_cdp; |
| 1056 | |
| 1057 | return ret; |
| 1058 | } |
| 1059 | |
| 1060 | /** |
| 1061 | * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other |
| 1062 | * @r: Resource to which domain instance @d belongs. |
| 1063 | * @d: The domain instance for which @closid is being tested. |
| 1064 | * @cbm: Capacity bitmask being tested. |
| 1065 | * @closid: Intended closid for @cbm. |
| 1066 | * @exclusive: Only check if overlaps with exclusive resource groups |
| 1067 | * |
| 1068 | * Checks if provided @cbm intended to be used for @closid on domain |
| 1069 | * @d overlaps with any other closids or other hardware usage associated |
| 1070 | * with this domain. If @exclusive is true then only overlaps with |
| 1071 | * resource groups in exclusive mode will be considered. If @exclusive |
| 1072 | * is false then overlaps with any resource group or hardware entities |
| 1073 | * will be considered. |
| 1074 | * |
| 1075 | * @cbm is unsigned long, even if only 32 bits are used, to make the |
| 1076 | * bitmap functions work correctly. |
| 1077 | * |
| 1078 | * Return: false if CBM does not overlap, true if it does. |
| 1079 | */ |
| 1080 | static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, |
| 1081 | unsigned long cbm, int closid, bool exclusive) |
| 1082 | { |
| 1083 | enum rdtgrp_mode mode; |
| 1084 | unsigned long ctrl_b; |
| 1085 | u32 *ctrl; |
| 1086 | int i; |
| 1087 | |
| 1088 | /* Check for any overlap with regions used by hardware directly */ |
| 1089 | if (!exclusive) { |
| 1090 | ctrl_b = r->cache.shareable_bits; |
| 1091 | if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) |
| 1092 | return true; |
| 1093 | } |
| 1094 | |
| 1095 | /* Check for overlap with other resource groups */ |
| 1096 | ctrl = d->ctrl_val; |
| 1097 | for (i = 0; i < closids_supported(); i++, ctrl++) { |
| 1098 | ctrl_b = *ctrl; |
| 1099 | mode = rdtgroup_mode_by_closid(i); |
| 1100 | if (closid_allocated(i) && i != closid && |
| 1101 | mode != RDT_MODE_PSEUDO_LOCKSETUP) { |
| 1102 | if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { |
| 1103 | if (exclusive) { |
| 1104 | if (mode == RDT_MODE_EXCLUSIVE) |
| 1105 | return true; |
| 1106 | continue; |
| 1107 | } |
| 1108 | return true; |
| 1109 | } |
| 1110 | } |
| 1111 | } |
| 1112 | |
| 1113 | return false; |
| 1114 | } |
| 1115 | |
| 1116 | /** |
| 1117 | * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware |
| 1118 | * @r: Resource to which domain instance @d belongs. |
| 1119 | * @d: The domain instance for which @closid is being tested. |
| 1120 | * @cbm: Capacity bitmask being tested. |
| 1121 | * @closid: Intended closid for @cbm. |
| 1122 | * @exclusive: Only check if overlaps with exclusive resource groups |
| 1123 | * |
| 1124 | * Resources that can be allocated using a CBM can use the CBM to control |
| 1125 | * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test |
| 1126 | * for overlap. Overlap test is not limited to the specific resource for |
| 1127 | * which the CBM is intended though - when dealing with CDP resources that |
| 1128 | * share the underlying hardware the overlap check should be performed on |
| 1129 | * the CDP resource sharing the hardware also. |
| 1130 | * |
| 1131 | * Refer to description of __rdtgroup_cbm_overlaps() for the details of the |
| 1132 | * overlap test. |
| 1133 | * |
| 1134 | * Return: true if CBM overlap detected, false if there is no overlap |
| 1135 | */ |
| 1136 | bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, |
| 1137 | unsigned long cbm, int closid, bool exclusive) |
| 1138 | { |
| 1139 | struct rdt_resource *r_cdp; |
| 1140 | struct rdt_domain *d_cdp; |
| 1141 | |
| 1142 | if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive)) |
| 1143 | return true; |
| 1144 | |
| 1145 | if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0) |
| 1146 | return false; |
| 1147 | |
| 1148 | return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive); |
| 1149 | } |
| 1150 | |
| 1151 | /** |
| 1152 | * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive |
| 1153 | * |
| 1154 | * An exclusive resource group implies that there should be no sharing of |
| 1155 | * its allocated resources. At the time this group is considered to be |
| 1156 | * exclusive this test can determine if its current schemata supports this |
| 1157 | * setting by testing for overlap with all other resource groups. |
| 1158 | * |
| 1159 | * Return: true if resource group can be exclusive, false if there is overlap |
| 1160 | * with allocations of other resource groups and thus this resource group |
| 1161 | * cannot be exclusive. |
| 1162 | */ |
| 1163 | static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) |
| 1164 | { |
| 1165 | int closid = rdtgrp->closid; |
| 1166 | struct rdt_resource *r; |
| 1167 | bool has_cache = false; |
| 1168 | struct rdt_domain *d; |
| 1169 | |
| 1170 | for_each_alloc_enabled_rdt_resource(r) { |
| 1171 | if (r->rid == RDT_RESOURCE_MBA) |
| 1172 | continue; |
| 1173 | has_cache = true; |
| 1174 | list_for_each_entry(d, &r->domains, list) { |
| 1175 | if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], |
| 1176 | rdtgrp->closid, false)) { |
| 1177 | rdt_last_cmd_puts("Schemata overlaps\n"); |
| 1178 | return false; |
| 1179 | } |
| 1180 | } |
| 1181 | } |
| 1182 | |
| 1183 | if (!has_cache) { |
| 1184 | rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); |
| 1185 | return false; |
| 1186 | } |
| 1187 | |
| 1188 | return true; |
| 1189 | } |
| 1190 | |
| 1191 | /** |
| 1192 | * rdtgroup_mode_write - Modify the resource group's mode |
| 1193 | * |
| 1194 | */ |
| 1195 | static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, |
| 1196 | char *buf, size_t nbytes, loff_t off) |
| 1197 | { |
| 1198 | struct rdtgroup *rdtgrp; |
| 1199 | enum rdtgrp_mode mode; |
| 1200 | int ret = 0; |
| 1201 | |
| 1202 | /* Valid input requires a trailing newline */ |
| 1203 | if (nbytes == 0 || buf[nbytes - 1] != '\n') |
| 1204 | return -EINVAL; |
| 1205 | buf[nbytes - 1] = '\0'; |
| 1206 | |
| 1207 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 1208 | if (!rdtgrp) { |
| 1209 | rdtgroup_kn_unlock(of->kn); |
| 1210 | return -ENOENT; |
| 1211 | } |
| 1212 | |
| 1213 | rdt_last_cmd_clear(); |
| 1214 | |
| 1215 | mode = rdtgrp->mode; |
| 1216 | |
| 1217 | if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || |
| 1218 | (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || |
| 1219 | (!strcmp(buf, "pseudo-locksetup") && |
| 1220 | mode == RDT_MODE_PSEUDO_LOCKSETUP) || |
| 1221 | (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) |
| 1222 | goto out; |
| 1223 | |
| 1224 | if (mode == RDT_MODE_PSEUDO_LOCKED) { |
| 1225 | rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); |
| 1226 | ret = -EINVAL; |
| 1227 | goto out; |
| 1228 | } |
| 1229 | |
| 1230 | if (!strcmp(buf, "shareable")) { |
| 1231 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
| 1232 | ret = rdtgroup_locksetup_exit(rdtgrp); |
| 1233 | if (ret) |
| 1234 | goto out; |
| 1235 | } |
| 1236 | rdtgrp->mode = RDT_MODE_SHAREABLE; |
| 1237 | } else if (!strcmp(buf, "exclusive")) { |
| 1238 | if (!rdtgroup_mode_test_exclusive(rdtgrp)) { |
| 1239 | ret = -EINVAL; |
| 1240 | goto out; |
| 1241 | } |
| 1242 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
| 1243 | ret = rdtgroup_locksetup_exit(rdtgrp); |
| 1244 | if (ret) |
| 1245 | goto out; |
| 1246 | } |
| 1247 | rdtgrp->mode = RDT_MODE_EXCLUSIVE; |
| 1248 | } else if (!strcmp(buf, "pseudo-locksetup")) { |
| 1249 | ret = rdtgroup_locksetup_enter(rdtgrp); |
| 1250 | if (ret) |
| 1251 | goto out; |
| 1252 | rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; |
| 1253 | } else { |
| 1254 | rdt_last_cmd_puts("Unknown or unsupported mode\n"); |
| 1255 | ret = -EINVAL; |
| 1256 | } |
| 1257 | |
| 1258 | out: |
| 1259 | rdtgroup_kn_unlock(of->kn); |
| 1260 | return ret ?: nbytes; |
| 1261 | } |
| 1262 | |
| 1263 | /** |
| 1264 | * rdtgroup_cbm_to_size - Translate CBM to size in bytes |
| 1265 | * @r: RDT resource to which @d belongs. |
| 1266 | * @d: RDT domain instance. |
| 1267 | * @cbm: bitmask for which the size should be computed. |
| 1268 | * |
| 1269 | * The bitmask provided associated with the RDT domain instance @d will be |
| 1270 | * translated into how many bytes it represents. The size in bytes is |
| 1271 | * computed by first dividing the total cache size by the CBM length to |
| 1272 | * determine how many bytes each bit in the bitmask represents. The result |
| 1273 | * is multiplied with the number of bits set in the bitmask. |
| 1274 | * |
| 1275 | * @cbm is unsigned long, even if only 32 bits are used to make the |
| 1276 | * bitmap functions work correctly. |
| 1277 | */ |
| 1278 | unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, |
| 1279 | struct rdt_domain *d, unsigned long cbm) |
| 1280 | { |
| 1281 | struct cpu_cacheinfo *ci; |
| 1282 | unsigned int size = 0; |
| 1283 | int num_b, i; |
| 1284 | |
| 1285 | num_b = bitmap_weight(&cbm, r->cache.cbm_len); |
| 1286 | ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); |
| 1287 | for (i = 0; i < ci->num_leaves; i++) { |
| 1288 | if (ci->info_list[i].level == r->cache_level) { |
| 1289 | size = ci->info_list[i].size / r->cache.cbm_len * num_b; |
| 1290 | break; |
| 1291 | } |
| 1292 | } |
| 1293 | |
| 1294 | return size; |
| 1295 | } |
| 1296 | |
| 1297 | /** |
| 1298 | * rdtgroup_size_show - Display size in bytes of allocated regions |
| 1299 | * |
| 1300 | * The "size" file mirrors the layout of the "schemata" file, printing the |
| 1301 | * size in bytes of each region instead of the capacity bitmask. |
| 1302 | * |
| 1303 | */ |
| 1304 | static int rdtgroup_size_show(struct kernfs_open_file *of, |
| 1305 | struct seq_file *s, void *v) |
| 1306 | { |
| 1307 | struct rdtgroup *rdtgrp; |
| 1308 | struct rdt_resource *r; |
| 1309 | struct rdt_domain *d; |
| 1310 | unsigned int size; |
| 1311 | int ret = 0; |
| 1312 | bool sep; |
| 1313 | u32 ctrl; |
| 1314 | |
| 1315 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
| 1316 | if (!rdtgrp) { |
| 1317 | rdtgroup_kn_unlock(of->kn); |
| 1318 | return -ENOENT; |
| 1319 | } |
| 1320 | |
| 1321 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { |
| 1322 | if (!rdtgrp->plr->d) { |
| 1323 | rdt_last_cmd_clear(); |
| 1324 | rdt_last_cmd_puts("Cache domain offline\n"); |
| 1325 | ret = -ENODEV; |
| 1326 | } else { |
| 1327 | seq_printf(s, "%*s:", max_name_width, |
| 1328 | rdtgrp->plr->r->name); |
| 1329 | size = rdtgroup_cbm_to_size(rdtgrp->plr->r, |
| 1330 | rdtgrp->plr->d, |
| 1331 | rdtgrp->plr->cbm); |
| 1332 | seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); |
| 1333 | } |
| 1334 | goto out; |
| 1335 | } |
| 1336 | |
| 1337 | for_each_alloc_enabled_rdt_resource(r) { |
| 1338 | sep = false; |
| 1339 | seq_printf(s, "%*s:", max_name_width, r->name); |
| 1340 | list_for_each_entry(d, &r->domains, list) { |
| 1341 | if (sep) |
| 1342 | seq_putc(s, ';'); |
| 1343 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
| 1344 | size = 0; |
| 1345 | } else { |
| 1346 | ctrl = (!is_mba_sc(r) ? |
| 1347 | d->ctrl_val[rdtgrp->closid] : |
| 1348 | d->mbps_val[rdtgrp->closid]); |
| 1349 | if (r->rid == RDT_RESOURCE_MBA) |
| 1350 | size = ctrl; |
| 1351 | else |
| 1352 | size = rdtgroup_cbm_to_size(r, d, ctrl); |
| 1353 | } |
| 1354 | seq_printf(s, "%d=%u", d->id, size); |
| 1355 | sep = true; |
| 1356 | } |
| 1357 | seq_putc(s, '\n'); |
| 1358 | } |
| 1359 | |
| 1360 | out: |
| 1361 | rdtgroup_kn_unlock(of->kn); |
| 1362 | |
| 1363 | return ret; |
| 1364 | } |
| 1365 | |
| 1366 | /* rdtgroup information files for one cache resource. */ |
| 1367 | static struct rftype res_common_files[] = { |
| 1368 | { |
| 1369 | .name = "last_cmd_status", |
| 1370 | .mode = 0444, |
| 1371 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1372 | .seq_show = rdt_last_cmd_status_show, |
| 1373 | .fflags = RF_TOP_INFO, |
| 1374 | }, |
| 1375 | { |
| 1376 | .name = "num_closids", |
| 1377 | .mode = 0444, |
| 1378 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1379 | .seq_show = rdt_num_closids_show, |
| 1380 | .fflags = RF_CTRL_INFO, |
| 1381 | }, |
| 1382 | { |
| 1383 | .name = "mon_features", |
| 1384 | .mode = 0444, |
| 1385 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1386 | .seq_show = rdt_mon_features_show, |
| 1387 | .fflags = RF_MON_INFO, |
| 1388 | }, |
| 1389 | { |
| 1390 | .name = "num_rmids", |
| 1391 | .mode = 0444, |
| 1392 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1393 | .seq_show = rdt_num_rmids_show, |
| 1394 | .fflags = RF_MON_INFO, |
| 1395 | }, |
| 1396 | { |
| 1397 | .name = "cbm_mask", |
| 1398 | .mode = 0444, |
| 1399 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1400 | .seq_show = rdt_default_ctrl_show, |
| 1401 | .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, |
| 1402 | }, |
| 1403 | { |
| 1404 | .name = "min_cbm_bits", |
| 1405 | .mode = 0444, |
| 1406 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1407 | .seq_show = rdt_min_cbm_bits_show, |
| 1408 | .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, |
| 1409 | }, |
| 1410 | { |
| 1411 | .name = "shareable_bits", |
| 1412 | .mode = 0444, |
| 1413 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1414 | .seq_show = rdt_shareable_bits_show, |
| 1415 | .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, |
| 1416 | }, |
| 1417 | { |
| 1418 | .name = "bit_usage", |
| 1419 | .mode = 0444, |
| 1420 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1421 | .seq_show = rdt_bit_usage_show, |
| 1422 | .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, |
| 1423 | }, |
| 1424 | { |
| 1425 | .name = "min_bandwidth", |
| 1426 | .mode = 0444, |
| 1427 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1428 | .seq_show = rdt_min_bw_show, |
| 1429 | .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, |
| 1430 | }, |
| 1431 | { |
| 1432 | .name = "bandwidth_gran", |
| 1433 | .mode = 0444, |
| 1434 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1435 | .seq_show = rdt_bw_gran_show, |
| 1436 | .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, |
| 1437 | }, |
| 1438 | { |
| 1439 | .name = "delay_linear", |
| 1440 | .mode = 0444, |
| 1441 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1442 | .seq_show = rdt_delay_linear_show, |
| 1443 | .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, |
| 1444 | }, |
| 1445 | { |
| 1446 | .name = "max_threshold_occupancy", |
| 1447 | .mode = 0644, |
| 1448 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1449 | .write = max_threshold_occ_write, |
| 1450 | .seq_show = max_threshold_occ_show, |
| 1451 | .fflags = RF_MON_INFO | RFTYPE_RES_CACHE, |
| 1452 | }, |
| 1453 | { |
| 1454 | .name = "cpus", |
| 1455 | .mode = 0644, |
| 1456 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1457 | .write = rdtgroup_cpus_write, |
| 1458 | .seq_show = rdtgroup_cpus_show, |
| 1459 | .fflags = RFTYPE_BASE, |
| 1460 | }, |
| 1461 | { |
| 1462 | .name = "cpus_list", |
| 1463 | .mode = 0644, |
| 1464 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1465 | .write = rdtgroup_cpus_write, |
| 1466 | .seq_show = rdtgroup_cpus_show, |
| 1467 | .flags = RFTYPE_FLAGS_CPUS_LIST, |
| 1468 | .fflags = RFTYPE_BASE, |
| 1469 | }, |
| 1470 | { |
| 1471 | .name = "tasks", |
| 1472 | .mode = 0644, |
| 1473 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1474 | .write = rdtgroup_tasks_write, |
| 1475 | .seq_show = rdtgroup_tasks_show, |
| 1476 | .fflags = RFTYPE_BASE, |
| 1477 | }, |
| 1478 | { |
| 1479 | .name = "schemata", |
| 1480 | .mode = 0644, |
| 1481 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1482 | .write = rdtgroup_schemata_write, |
| 1483 | .seq_show = rdtgroup_schemata_show, |
| 1484 | .fflags = RF_CTRL_BASE, |
| 1485 | }, |
| 1486 | { |
| 1487 | .name = "mode", |
| 1488 | .mode = 0644, |
| 1489 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1490 | .write = rdtgroup_mode_write, |
| 1491 | .seq_show = rdtgroup_mode_show, |
| 1492 | .fflags = RF_CTRL_BASE, |
| 1493 | }, |
| 1494 | { |
| 1495 | .name = "size", |
| 1496 | .mode = 0444, |
| 1497 | .kf_ops = &rdtgroup_kf_single_ops, |
| 1498 | .seq_show = rdtgroup_size_show, |
| 1499 | .fflags = RF_CTRL_BASE, |
| 1500 | }, |
| 1501 | |
| 1502 | }; |
| 1503 | |
| 1504 | static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) |
| 1505 | { |
| 1506 | struct rftype *rfts, *rft; |
| 1507 | int ret, len; |
| 1508 | |
| 1509 | rfts = res_common_files; |
| 1510 | len = ARRAY_SIZE(res_common_files); |
| 1511 | |
| 1512 | lockdep_assert_held(&rdtgroup_mutex); |
| 1513 | |
| 1514 | for (rft = rfts; rft < rfts + len; rft++) { |
| 1515 | if ((fflags & rft->fflags) == rft->fflags) { |
| 1516 | ret = rdtgroup_add_file(kn, rft); |
| 1517 | if (ret) |
| 1518 | goto error; |
| 1519 | } |
| 1520 | } |
| 1521 | |
| 1522 | return 0; |
| 1523 | error: |
| 1524 | pr_warn("Failed to add %s, err=%d\n", rft->name, ret); |
| 1525 | while (--rft >= rfts) { |
| 1526 | if ((fflags & rft->fflags) == rft->fflags) |
| 1527 | kernfs_remove_by_name(kn, rft->name); |
| 1528 | } |
| 1529 | return ret; |
| 1530 | } |
| 1531 | |
| 1532 | /** |
| 1533 | * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file |
| 1534 | * @r: The resource group with which the file is associated. |
| 1535 | * @name: Name of the file |
| 1536 | * |
| 1537 | * The permissions of named resctrl file, directory, or link are modified |
| 1538 | * to not allow read, write, or execute by any user. |
| 1539 | * |
| 1540 | * WARNING: This function is intended to communicate to the user that the |
| 1541 | * resctrl file has been locked down - that it is not relevant to the |
| 1542 | * particular state the system finds itself in. It should not be relied |
| 1543 | * on to protect from user access because after the file's permissions |
| 1544 | * are restricted the user can still change the permissions using chmod |
| 1545 | * from the command line. |
| 1546 | * |
| 1547 | * Return: 0 on success, <0 on failure. |
| 1548 | */ |
| 1549 | int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) |
| 1550 | { |
| 1551 | struct iattr iattr = {.ia_valid = ATTR_MODE,}; |
| 1552 | struct kernfs_node *kn; |
| 1553 | int ret = 0; |
| 1554 | |
| 1555 | kn = kernfs_find_and_get_ns(r->kn, name, NULL); |
| 1556 | if (!kn) |
| 1557 | return -ENOENT; |
| 1558 | |
| 1559 | switch (kernfs_type(kn)) { |
| 1560 | case KERNFS_DIR: |
| 1561 | iattr.ia_mode = S_IFDIR; |
| 1562 | break; |
| 1563 | case KERNFS_FILE: |
| 1564 | iattr.ia_mode = S_IFREG; |
| 1565 | break; |
| 1566 | case KERNFS_LINK: |
| 1567 | iattr.ia_mode = S_IFLNK; |
| 1568 | break; |
| 1569 | } |
| 1570 | |
| 1571 | ret = kernfs_setattr(kn, &iattr); |
| 1572 | kernfs_put(kn); |
| 1573 | return ret; |
| 1574 | } |
| 1575 | |
| 1576 | /** |
| 1577 | * rdtgroup_kn_mode_restore - Restore user access to named resctrl file |
| 1578 | * @r: The resource group with which the file is associated. |
| 1579 | * @name: Name of the file |
| 1580 | * @mask: Mask of permissions that should be restored |
| 1581 | * |
| 1582 | * Restore the permissions of the named file. If @name is a directory the |
| 1583 | * permissions of its parent will be used. |
| 1584 | * |
| 1585 | * Return: 0 on success, <0 on failure. |
| 1586 | */ |
| 1587 | int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, |
| 1588 | umode_t mask) |
| 1589 | { |
| 1590 | struct iattr iattr = {.ia_valid = ATTR_MODE,}; |
| 1591 | struct kernfs_node *kn, *parent; |
| 1592 | struct rftype *rfts, *rft; |
| 1593 | int ret, len; |
| 1594 | |
| 1595 | rfts = res_common_files; |
| 1596 | len = ARRAY_SIZE(res_common_files); |
| 1597 | |
| 1598 | for (rft = rfts; rft < rfts + len; rft++) { |
| 1599 | if (!strcmp(rft->name, name)) |
| 1600 | iattr.ia_mode = rft->mode & mask; |
| 1601 | } |
| 1602 | |
| 1603 | kn = kernfs_find_and_get_ns(r->kn, name, NULL); |
| 1604 | if (!kn) |
| 1605 | return -ENOENT; |
| 1606 | |
| 1607 | switch (kernfs_type(kn)) { |
| 1608 | case KERNFS_DIR: |
| 1609 | parent = kernfs_get_parent(kn); |
| 1610 | if (parent) { |
| 1611 | iattr.ia_mode |= parent->mode; |
| 1612 | kernfs_put(parent); |
| 1613 | } |
| 1614 | iattr.ia_mode |= S_IFDIR; |
| 1615 | break; |
| 1616 | case KERNFS_FILE: |
| 1617 | iattr.ia_mode |= S_IFREG; |
| 1618 | break; |
| 1619 | case KERNFS_LINK: |
| 1620 | iattr.ia_mode |= S_IFLNK; |
| 1621 | break; |
| 1622 | } |
| 1623 | |
| 1624 | ret = kernfs_setattr(kn, &iattr); |
| 1625 | kernfs_put(kn); |
| 1626 | return ret; |
| 1627 | } |
| 1628 | |
| 1629 | static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, |
| 1630 | unsigned long fflags) |
| 1631 | { |
| 1632 | struct kernfs_node *kn_subdir; |
| 1633 | int ret; |
| 1634 | |
| 1635 | kn_subdir = kernfs_create_dir(kn_info, name, |
| 1636 | kn_info->mode, r); |
| 1637 | if (IS_ERR(kn_subdir)) |
| 1638 | return PTR_ERR(kn_subdir); |
| 1639 | |
| 1640 | ret = rdtgroup_kn_set_ugid(kn_subdir); |
| 1641 | if (ret) |
| 1642 | return ret; |
| 1643 | |
| 1644 | ret = rdtgroup_add_files(kn_subdir, fflags); |
| 1645 | if (!ret) |
| 1646 | kernfs_activate(kn_subdir); |
| 1647 | |
| 1648 | return ret; |
| 1649 | } |
| 1650 | |
| 1651 | static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) |
| 1652 | { |
| 1653 | struct rdt_resource *r; |
| 1654 | unsigned long fflags; |
| 1655 | char name[32]; |
| 1656 | int ret; |
| 1657 | |
| 1658 | /* create the directory */ |
| 1659 | kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); |
| 1660 | if (IS_ERR(kn_info)) |
| 1661 | return PTR_ERR(kn_info); |
| 1662 | |
| 1663 | ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); |
| 1664 | if (ret) |
| 1665 | goto out_destroy; |
| 1666 | |
| 1667 | for_each_alloc_enabled_rdt_resource(r) { |
| 1668 | fflags = r->fflags | RF_CTRL_INFO; |
| 1669 | ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); |
| 1670 | if (ret) |
| 1671 | goto out_destroy; |
| 1672 | } |
| 1673 | |
| 1674 | for_each_mon_enabled_rdt_resource(r) { |
| 1675 | fflags = r->fflags | RF_MON_INFO; |
| 1676 | sprintf(name, "%s_MON", r->name); |
| 1677 | ret = rdtgroup_mkdir_info_resdir(r, name, fflags); |
| 1678 | if (ret) |
| 1679 | goto out_destroy; |
| 1680 | } |
| 1681 | |
| 1682 | ret = rdtgroup_kn_set_ugid(kn_info); |
| 1683 | if (ret) |
| 1684 | goto out_destroy; |
| 1685 | |
| 1686 | kernfs_activate(kn_info); |
| 1687 | |
| 1688 | return 0; |
| 1689 | |
| 1690 | out_destroy: |
| 1691 | kernfs_remove(kn_info); |
| 1692 | return ret; |
| 1693 | } |
| 1694 | |
| 1695 | static int |
| 1696 | mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, |
| 1697 | char *name, struct kernfs_node **dest_kn) |
| 1698 | { |
| 1699 | struct kernfs_node *kn; |
| 1700 | int ret; |
| 1701 | |
| 1702 | /* create the directory */ |
| 1703 | kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); |
| 1704 | if (IS_ERR(kn)) |
| 1705 | return PTR_ERR(kn); |
| 1706 | |
| 1707 | if (dest_kn) |
| 1708 | *dest_kn = kn; |
| 1709 | |
| 1710 | ret = rdtgroup_kn_set_ugid(kn); |
| 1711 | if (ret) |
| 1712 | goto out_destroy; |
| 1713 | |
| 1714 | kernfs_activate(kn); |
| 1715 | |
| 1716 | return 0; |
| 1717 | |
| 1718 | out_destroy: |
| 1719 | kernfs_remove(kn); |
| 1720 | return ret; |
| 1721 | } |
| 1722 | |
| 1723 | static void l3_qos_cfg_update(void *arg) |
| 1724 | { |
| 1725 | bool *enable = arg; |
| 1726 | |
| 1727 | wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); |
| 1728 | } |
| 1729 | |
| 1730 | static void l2_qos_cfg_update(void *arg) |
| 1731 | { |
| 1732 | bool *enable = arg; |
| 1733 | |
| 1734 | wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); |
| 1735 | } |
| 1736 | |
| 1737 | static inline bool is_mba_linear(void) |
| 1738 | { |
| 1739 | return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; |
| 1740 | } |
| 1741 | |
| 1742 | static int set_cache_qos_cfg(int level, bool enable) |
| 1743 | { |
| 1744 | void (*update)(void *arg); |
| 1745 | struct rdt_resource *r_l; |
| 1746 | cpumask_var_t cpu_mask; |
| 1747 | struct rdt_domain *d; |
| 1748 | int cpu; |
| 1749 | |
| 1750 | if (level == RDT_RESOURCE_L3) |
| 1751 | update = l3_qos_cfg_update; |
| 1752 | else if (level == RDT_RESOURCE_L2) |
| 1753 | update = l2_qos_cfg_update; |
| 1754 | else |
| 1755 | return -EINVAL; |
| 1756 | |
| 1757 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) |
| 1758 | return -ENOMEM; |
| 1759 | |
| 1760 | r_l = &rdt_resources_all[level]; |
| 1761 | list_for_each_entry(d, &r_l->domains, list) { |
| 1762 | /* Pick one CPU from each domain instance to update MSR */ |
| 1763 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); |
| 1764 | } |
| 1765 | cpu = get_cpu(); |
| 1766 | /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ |
| 1767 | if (cpumask_test_cpu(cpu, cpu_mask)) |
| 1768 | update(&enable); |
| 1769 | /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ |
| 1770 | smp_call_function_many(cpu_mask, update, &enable, 1); |
| 1771 | put_cpu(); |
| 1772 | |
| 1773 | free_cpumask_var(cpu_mask); |
| 1774 | |
| 1775 | return 0; |
| 1776 | } |
| 1777 | |
| 1778 | /* Restore the qos cfg state when a domain comes online */ |
| 1779 | void rdt_domain_reconfigure_cdp(struct rdt_resource *r) |
| 1780 | { |
| 1781 | if (!r->alloc_capable) |
| 1782 | return; |
| 1783 | |
| 1784 | if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) |
| 1785 | l2_qos_cfg_update(&r->alloc_enabled); |
| 1786 | |
| 1787 | if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) |
| 1788 | l3_qos_cfg_update(&r->alloc_enabled); |
| 1789 | } |
| 1790 | |
| 1791 | /* |
| 1792 | * Enable or disable the MBA software controller |
| 1793 | * which helps user specify bandwidth in MBps. |
| 1794 | * MBA software controller is supported only if |
| 1795 | * MBM is supported and MBA is in linear scale. |
| 1796 | */ |
| 1797 | static int set_mba_sc(bool mba_sc) |
| 1798 | { |
| 1799 | struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; |
| 1800 | struct rdt_domain *d; |
| 1801 | |
| 1802 | if (!is_mbm_enabled() || !is_mba_linear() || |
| 1803 | mba_sc == is_mba_sc(r)) |
| 1804 | return -EINVAL; |
| 1805 | |
| 1806 | r->membw.mba_sc = mba_sc; |
| 1807 | list_for_each_entry(d, &r->domains, list) |
| 1808 | setup_default_ctrlval(r, d->ctrl_val, d->mbps_val); |
| 1809 | |
| 1810 | return 0; |
| 1811 | } |
| 1812 | |
| 1813 | static int cdp_enable(int level, int data_type, int code_type) |
| 1814 | { |
| 1815 | struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; |
| 1816 | struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; |
| 1817 | struct rdt_resource *r_l = &rdt_resources_all[level]; |
| 1818 | int ret; |
| 1819 | |
| 1820 | if (!r_l->alloc_capable || !r_ldata->alloc_capable || |
| 1821 | !r_lcode->alloc_capable) |
| 1822 | return -EINVAL; |
| 1823 | |
| 1824 | ret = set_cache_qos_cfg(level, true); |
| 1825 | if (!ret) { |
| 1826 | r_l->alloc_enabled = false; |
| 1827 | r_ldata->alloc_enabled = true; |
| 1828 | r_lcode->alloc_enabled = true; |
| 1829 | } |
| 1830 | return ret; |
| 1831 | } |
| 1832 | |
| 1833 | static int cdpl3_enable(void) |
| 1834 | { |
| 1835 | return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, |
| 1836 | RDT_RESOURCE_L3CODE); |
| 1837 | } |
| 1838 | |
| 1839 | static int cdpl2_enable(void) |
| 1840 | { |
| 1841 | return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, |
| 1842 | RDT_RESOURCE_L2CODE); |
| 1843 | } |
| 1844 | |
| 1845 | static void cdp_disable(int level, int data_type, int code_type) |
| 1846 | { |
| 1847 | struct rdt_resource *r = &rdt_resources_all[level]; |
| 1848 | |
| 1849 | r->alloc_enabled = r->alloc_capable; |
| 1850 | |
| 1851 | if (rdt_resources_all[data_type].alloc_enabled) { |
| 1852 | rdt_resources_all[data_type].alloc_enabled = false; |
| 1853 | rdt_resources_all[code_type].alloc_enabled = false; |
| 1854 | set_cache_qos_cfg(level, false); |
| 1855 | } |
| 1856 | } |
| 1857 | |
| 1858 | static void cdpl3_disable(void) |
| 1859 | { |
| 1860 | cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); |
| 1861 | } |
| 1862 | |
| 1863 | static void cdpl2_disable(void) |
| 1864 | { |
| 1865 | cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); |
| 1866 | } |
| 1867 | |
| 1868 | static void cdp_disable_all(void) |
| 1869 | { |
| 1870 | if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) |
| 1871 | cdpl3_disable(); |
| 1872 | if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) |
| 1873 | cdpl2_disable(); |
| 1874 | } |
| 1875 | |
| 1876 | /* |
| 1877 | * We don't allow rdtgroup directories to be created anywhere |
| 1878 | * except the root directory. Thus when looking for the rdtgroup |
| 1879 | * structure for a kernfs node we are either looking at a directory, |
| 1880 | * in which case the rdtgroup structure is pointed at by the "priv" |
| 1881 | * field, otherwise we have a file, and need only look to the parent |
| 1882 | * to find the rdtgroup. |
| 1883 | */ |
| 1884 | static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) |
| 1885 | { |
| 1886 | if (kernfs_type(kn) == KERNFS_DIR) { |
| 1887 | /* |
| 1888 | * All the resource directories use "kn->priv" |
| 1889 | * to point to the "struct rdtgroup" for the |
| 1890 | * resource. "info" and its subdirectories don't |
| 1891 | * have rdtgroup structures, so return NULL here. |
| 1892 | */ |
| 1893 | if (kn == kn_info || kn->parent == kn_info) |
| 1894 | return NULL; |
| 1895 | else |
| 1896 | return kn->priv; |
| 1897 | } else { |
| 1898 | return kn->parent->priv; |
| 1899 | } |
| 1900 | } |
| 1901 | |
| 1902 | struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) |
| 1903 | { |
| 1904 | struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); |
| 1905 | |
| 1906 | if (!rdtgrp) |
| 1907 | return NULL; |
| 1908 | |
| 1909 | atomic_inc(&rdtgrp->waitcount); |
| 1910 | kernfs_break_active_protection(kn); |
| 1911 | |
| 1912 | mutex_lock(&rdtgroup_mutex); |
| 1913 | |
| 1914 | /* Was this group deleted while we waited? */ |
| 1915 | if (rdtgrp->flags & RDT_DELETED) |
| 1916 | return NULL; |
| 1917 | |
| 1918 | return rdtgrp; |
| 1919 | } |
| 1920 | |
| 1921 | void rdtgroup_kn_unlock(struct kernfs_node *kn) |
| 1922 | { |
| 1923 | struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); |
| 1924 | |
| 1925 | if (!rdtgrp) |
| 1926 | return; |
| 1927 | |
| 1928 | mutex_unlock(&rdtgroup_mutex); |
| 1929 | |
| 1930 | if (atomic_dec_and_test(&rdtgrp->waitcount) && |
| 1931 | (rdtgrp->flags & RDT_DELETED)) { |
| 1932 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || |
| 1933 | rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) |
| 1934 | rdtgroup_pseudo_lock_remove(rdtgrp); |
| 1935 | kernfs_unbreak_active_protection(kn); |
| 1936 | rdtgroup_remove(rdtgrp); |
| 1937 | } else { |
| 1938 | kernfs_unbreak_active_protection(kn); |
| 1939 | } |
| 1940 | } |
| 1941 | |
| 1942 | static int mkdir_mondata_all(struct kernfs_node *parent_kn, |
| 1943 | struct rdtgroup *prgrp, |
| 1944 | struct kernfs_node **mon_data_kn); |
| 1945 | |
| 1946 | static int rdt_enable_ctx(struct rdt_fs_context *ctx) |
| 1947 | { |
| 1948 | int ret = 0; |
| 1949 | |
| 1950 | if (ctx->enable_cdpl2) |
| 1951 | ret = cdpl2_enable(); |
| 1952 | |
| 1953 | if (!ret && ctx->enable_cdpl3) |
| 1954 | ret = cdpl3_enable(); |
| 1955 | |
| 1956 | if (!ret && ctx->enable_mba_mbps) |
| 1957 | ret = set_mba_sc(true); |
| 1958 | |
| 1959 | return ret; |
| 1960 | } |
| 1961 | |
| 1962 | static int rdt_get_tree(struct fs_context *fc) |
| 1963 | { |
| 1964 | struct rdt_fs_context *ctx = rdt_fc2context(fc); |
| 1965 | struct rdt_domain *dom; |
| 1966 | struct rdt_resource *r; |
| 1967 | int ret; |
| 1968 | |
| 1969 | cpus_read_lock(); |
| 1970 | mutex_lock(&rdtgroup_mutex); |
| 1971 | /* |
| 1972 | * resctrl file system can only be mounted once. |
| 1973 | */ |
| 1974 | if (static_branch_unlikely(&rdt_enable_key)) { |
| 1975 | ret = -EBUSY; |
| 1976 | goto out; |
| 1977 | } |
| 1978 | |
| 1979 | ret = rdt_enable_ctx(ctx); |
| 1980 | if (ret < 0) |
| 1981 | goto out_cdp; |
| 1982 | |
| 1983 | closid_init(); |
| 1984 | |
| 1985 | ret = rdtgroup_create_info_dir(rdtgroup_default.kn); |
| 1986 | if (ret < 0) |
| 1987 | goto out_mba; |
| 1988 | |
| 1989 | if (rdt_mon_capable) { |
| 1990 | ret = mongroup_create_dir(rdtgroup_default.kn, |
| 1991 | &rdtgroup_default, "mon_groups", |
| 1992 | &kn_mongrp); |
| 1993 | if (ret < 0) |
| 1994 | goto out_info; |
| 1995 | |
| 1996 | ret = mkdir_mondata_all(rdtgroup_default.kn, |
| 1997 | &rdtgroup_default, &kn_mondata); |
| 1998 | if (ret < 0) |
| 1999 | goto out_mongrp; |
| 2000 | rdtgroup_default.mon.mon_data_kn = kn_mondata; |
| 2001 | } |
| 2002 | |
| 2003 | ret = rdt_pseudo_lock_init(); |
| 2004 | if (ret) |
| 2005 | goto out_mondata; |
| 2006 | |
| 2007 | ret = kernfs_get_tree(fc); |
| 2008 | if (ret < 0) |
| 2009 | goto out_psl; |
| 2010 | |
| 2011 | if (rdt_alloc_capable) |
| 2012 | static_branch_enable_cpuslocked(&rdt_alloc_enable_key); |
| 2013 | if (rdt_mon_capable) |
| 2014 | static_branch_enable_cpuslocked(&rdt_mon_enable_key); |
| 2015 | |
| 2016 | if (rdt_alloc_capable || rdt_mon_capable) |
| 2017 | static_branch_enable_cpuslocked(&rdt_enable_key); |
| 2018 | |
| 2019 | if (is_mbm_enabled()) { |
| 2020 | r = &rdt_resources_all[RDT_RESOURCE_L3]; |
| 2021 | list_for_each_entry(dom, &r->domains, list) |
| 2022 | mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); |
| 2023 | } |
| 2024 | |
| 2025 | goto out; |
| 2026 | |
| 2027 | out_psl: |
| 2028 | rdt_pseudo_lock_release(); |
| 2029 | out_mondata: |
| 2030 | if (rdt_mon_capable) |
| 2031 | kernfs_remove(kn_mondata); |
| 2032 | out_mongrp: |
| 2033 | if (rdt_mon_capable) |
| 2034 | kernfs_remove(kn_mongrp); |
| 2035 | out_info: |
| 2036 | kernfs_remove(kn_info); |
| 2037 | out_mba: |
| 2038 | if (ctx->enable_mba_mbps) |
| 2039 | set_mba_sc(false); |
| 2040 | out_cdp: |
| 2041 | cdp_disable_all(); |
| 2042 | out: |
| 2043 | rdt_last_cmd_clear(); |
| 2044 | mutex_unlock(&rdtgroup_mutex); |
| 2045 | cpus_read_unlock(); |
| 2046 | return ret; |
| 2047 | } |
| 2048 | |
| 2049 | enum rdt_param { |
| 2050 | Opt_cdp, |
| 2051 | Opt_cdpl2, |
| 2052 | Opt_mba_mbps, |
| 2053 | nr__rdt_params |
| 2054 | }; |
| 2055 | |
| 2056 | static const struct fs_parameter_spec rdt_param_specs[] = { |
| 2057 | fsparam_flag("cdp", Opt_cdp), |
| 2058 | fsparam_flag("cdpl2", Opt_cdpl2), |
| 2059 | fsparam_flag("mba_MBps", Opt_mba_mbps), |
| 2060 | {} |
| 2061 | }; |
| 2062 | |
| 2063 | static const struct fs_parameter_description rdt_fs_parameters = { |
| 2064 | .name = "rdt", |
| 2065 | .specs = rdt_param_specs, |
| 2066 | }; |
| 2067 | |
| 2068 | static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) |
| 2069 | { |
| 2070 | struct rdt_fs_context *ctx = rdt_fc2context(fc); |
| 2071 | struct fs_parse_result result; |
| 2072 | int opt; |
| 2073 | |
| 2074 | opt = fs_parse(fc, &rdt_fs_parameters, param, &result); |
| 2075 | if (opt < 0) |
| 2076 | return opt; |
| 2077 | |
| 2078 | switch (opt) { |
| 2079 | case Opt_cdp: |
| 2080 | ctx->enable_cdpl3 = true; |
| 2081 | return 0; |
| 2082 | case Opt_cdpl2: |
| 2083 | ctx->enable_cdpl2 = true; |
| 2084 | return 0; |
| 2085 | case Opt_mba_mbps: |
| 2086 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
| 2087 | return -EINVAL; |
| 2088 | ctx->enable_mba_mbps = true; |
| 2089 | return 0; |
| 2090 | } |
| 2091 | |
| 2092 | return -EINVAL; |
| 2093 | } |
| 2094 | |
| 2095 | static void rdt_fs_context_free(struct fs_context *fc) |
| 2096 | { |
| 2097 | struct rdt_fs_context *ctx = rdt_fc2context(fc); |
| 2098 | |
| 2099 | kernfs_free_fs_context(fc); |
| 2100 | kfree(ctx); |
| 2101 | } |
| 2102 | |
| 2103 | static const struct fs_context_operations rdt_fs_context_ops = { |
| 2104 | .free = rdt_fs_context_free, |
| 2105 | .parse_param = rdt_parse_param, |
| 2106 | .get_tree = rdt_get_tree, |
| 2107 | }; |
| 2108 | |
| 2109 | static int rdt_init_fs_context(struct fs_context *fc) |
| 2110 | { |
| 2111 | struct rdt_fs_context *ctx; |
| 2112 | |
| 2113 | ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); |
| 2114 | if (!ctx) |
| 2115 | return -ENOMEM; |
| 2116 | |
| 2117 | ctx->kfc.root = rdt_root; |
| 2118 | ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; |
| 2119 | fc->fs_private = &ctx->kfc; |
| 2120 | fc->ops = &rdt_fs_context_ops; |
| 2121 | put_user_ns(fc->user_ns); |
| 2122 | fc->user_ns = get_user_ns(&init_user_ns); |
| 2123 | fc->global = true; |
| 2124 | return 0; |
| 2125 | } |
| 2126 | |
| 2127 | static int reset_all_ctrls(struct rdt_resource *r) |
| 2128 | { |
| 2129 | struct msr_param msr_param; |
| 2130 | cpumask_var_t cpu_mask; |
| 2131 | struct rdt_domain *d; |
| 2132 | int i, cpu; |
| 2133 | |
| 2134 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) |
| 2135 | return -ENOMEM; |
| 2136 | |
| 2137 | msr_param.res = r; |
| 2138 | msr_param.low = 0; |
| 2139 | msr_param.high = r->num_closid; |
| 2140 | |
| 2141 | /* |
| 2142 | * Disable resource control for this resource by setting all |
| 2143 | * CBMs in all domains to the maximum mask value. Pick one CPU |
| 2144 | * from each domain to update the MSRs below. |
| 2145 | */ |
| 2146 | list_for_each_entry(d, &r->domains, list) { |
| 2147 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); |
| 2148 | |
| 2149 | for (i = 0; i < r->num_closid; i++) |
| 2150 | d->ctrl_val[i] = r->default_ctrl; |
| 2151 | } |
| 2152 | cpu = get_cpu(); |
| 2153 | /* Update CBM on this cpu if it's in cpu_mask. */ |
| 2154 | if (cpumask_test_cpu(cpu, cpu_mask)) |
| 2155 | rdt_ctrl_update(&msr_param); |
| 2156 | /* Update CBM on all other cpus in cpu_mask. */ |
| 2157 | smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1); |
| 2158 | put_cpu(); |
| 2159 | |
| 2160 | free_cpumask_var(cpu_mask); |
| 2161 | |
| 2162 | return 0; |
| 2163 | } |
| 2164 | |
| 2165 | /* |
| 2166 | * Move tasks from one to the other group. If @from is NULL, then all tasks |
| 2167 | * in the systems are moved unconditionally (used for teardown). |
| 2168 | * |
| 2169 | * If @mask is not NULL the cpus on which moved tasks are running are set |
| 2170 | * in that mask so the update smp function call is restricted to affected |
| 2171 | * cpus. |
| 2172 | */ |
| 2173 | static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, |
| 2174 | struct cpumask *mask) |
| 2175 | { |
| 2176 | struct task_struct *p, *t; |
| 2177 | |
| 2178 | read_lock(&tasklist_lock); |
| 2179 | for_each_process_thread(p, t) { |
| 2180 | if (!from || is_closid_match(t, from) || |
| 2181 | is_rmid_match(t, from)) { |
| 2182 | WRITE_ONCE(t->closid, to->closid); |
| 2183 | WRITE_ONCE(t->rmid, to->mon.rmid); |
| 2184 | |
| 2185 | /* |
| 2186 | * Order the closid/rmid stores above before the loads |
| 2187 | * in task_curr(). This pairs with the full barrier |
| 2188 | * between the rq->curr update and resctrl_sched_in() |
| 2189 | * during context switch. |
| 2190 | */ |
| 2191 | smp_mb(); |
| 2192 | |
| 2193 | /* |
| 2194 | * If the task is on a CPU, set the CPU in the mask. |
| 2195 | * The detection is inaccurate as tasks might move or |
| 2196 | * schedule before the smp function call takes place. |
| 2197 | * In such a case the function call is pointless, but |
| 2198 | * there is no other side effect. |
| 2199 | */ |
| 2200 | if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) |
| 2201 | cpumask_set_cpu(task_cpu(t), mask); |
| 2202 | } |
| 2203 | } |
| 2204 | read_unlock(&tasklist_lock); |
| 2205 | } |
| 2206 | |
| 2207 | static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) |
| 2208 | { |
| 2209 | struct rdtgroup *sentry, *stmp; |
| 2210 | struct list_head *head; |
| 2211 | |
| 2212 | head = &rdtgrp->mon.crdtgrp_list; |
| 2213 | list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { |
| 2214 | free_rmid(sentry->mon.rmid); |
| 2215 | list_del(&sentry->mon.crdtgrp_list); |
| 2216 | |
| 2217 | if (atomic_read(&sentry->waitcount) != 0) |
| 2218 | sentry->flags = RDT_DELETED; |
| 2219 | else |
| 2220 | rdtgroup_remove(sentry); |
| 2221 | } |
| 2222 | } |
| 2223 | |
| 2224 | /* |
| 2225 | * Forcibly remove all of subdirectories under root. |
| 2226 | */ |
| 2227 | static void rmdir_all_sub(void) |
| 2228 | { |
| 2229 | struct rdtgroup *rdtgrp, *tmp; |
| 2230 | |
| 2231 | /* Move all tasks to the default resource group */ |
| 2232 | rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); |
| 2233 | |
| 2234 | list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { |
| 2235 | /* Free any child rmids */ |
| 2236 | free_all_child_rdtgrp(rdtgrp); |
| 2237 | |
| 2238 | /* Remove each rdtgroup other than root */ |
| 2239 | if (rdtgrp == &rdtgroup_default) |
| 2240 | continue; |
| 2241 | |
| 2242 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || |
| 2243 | rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) |
| 2244 | rdtgroup_pseudo_lock_remove(rdtgrp); |
| 2245 | |
| 2246 | /* |
| 2247 | * Give any CPUs back to the default group. We cannot copy |
| 2248 | * cpu_online_mask because a CPU might have executed the |
| 2249 | * offline callback already, but is still marked online. |
| 2250 | */ |
| 2251 | cpumask_or(&rdtgroup_default.cpu_mask, |
| 2252 | &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); |
| 2253 | |
| 2254 | free_rmid(rdtgrp->mon.rmid); |
| 2255 | |
| 2256 | kernfs_remove(rdtgrp->kn); |
| 2257 | list_del(&rdtgrp->rdtgroup_list); |
| 2258 | |
| 2259 | if (atomic_read(&rdtgrp->waitcount) != 0) |
| 2260 | rdtgrp->flags = RDT_DELETED; |
| 2261 | else |
| 2262 | rdtgroup_remove(rdtgrp); |
| 2263 | } |
| 2264 | /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ |
| 2265 | update_closid_rmid(cpu_online_mask, &rdtgroup_default); |
| 2266 | |
| 2267 | kernfs_remove(kn_info); |
| 2268 | kernfs_remove(kn_mongrp); |
| 2269 | kernfs_remove(kn_mondata); |
| 2270 | } |
| 2271 | |
| 2272 | static void rdt_kill_sb(struct super_block *sb) |
| 2273 | { |
| 2274 | struct rdt_resource *r; |
| 2275 | |
| 2276 | cpus_read_lock(); |
| 2277 | mutex_lock(&rdtgroup_mutex); |
| 2278 | |
| 2279 | set_mba_sc(false); |
| 2280 | |
| 2281 | /*Put everything back to default values. */ |
| 2282 | for_each_alloc_enabled_rdt_resource(r) |
| 2283 | reset_all_ctrls(r); |
| 2284 | cdp_disable_all(); |
| 2285 | rmdir_all_sub(); |
| 2286 | rdt_pseudo_lock_release(); |
| 2287 | rdtgroup_default.mode = RDT_MODE_SHAREABLE; |
| 2288 | static_branch_disable_cpuslocked(&rdt_alloc_enable_key); |
| 2289 | static_branch_disable_cpuslocked(&rdt_mon_enable_key); |
| 2290 | static_branch_disable_cpuslocked(&rdt_enable_key); |
| 2291 | kernfs_kill_sb(sb); |
| 2292 | mutex_unlock(&rdtgroup_mutex); |
| 2293 | cpus_read_unlock(); |
| 2294 | } |
| 2295 | |
| 2296 | static struct file_system_type rdt_fs_type = { |
| 2297 | .name = "resctrl", |
| 2298 | .init_fs_context = rdt_init_fs_context, |
| 2299 | .parameters = &rdt_fs_parameters, |
| 2300 | .kill_sb = rdt_kill_sb, |
| 2301 | }; |
| 2302 | |
| 2303 | static int mon_addfile(struct kernfs_node *parent_kn, const char *name, |
| 2304 | void *priv) |
| 2305 | { |
| 2306 | struct kernfs_node *kn; |
| 2307 | int ret = 0; |
| 2308 | |
| 2309 | kn = __kernfs_create_file(parent_kn, name, 0444, |
| 2310 | GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, |
| 2311 | &kf_mondata_ops, priv, NULL, NULL); |
| 2312 | if (IS_ERR(kn)) |
| 2313 | return PTR_ERR(kn); |
| 2314 | |
| 2315 | ret = rdtgroup_kn_set_ugid(kn); |
| 2316 | if (ret) { |
| 2317 | kernfs_remove(kn); |
| 2318 | return ret; |
| 2319 | } |
| 2320 | |
| 2321 | return ret; |
| 2322 | } |
| 2323 | |
| 2324 | /* |
| 2325 | * Remove all subdirectories of mon_data of ctrl_mon groups |
| 2326 | * and monitor groups with given domain id. |
| 2327 | */ |
| 2328 | void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id) |
| 2329 | { |
| 2330 | struct rdtgroup *prgrp, *crgrp; |
| 2331 | char name[32]; |
| 2332 | |
| 2333 | if (!r->mon_enabled) |
| 2334 | return; |
| 2335 | |
| 2336 | list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { |
| 2337 | sprintf(name, "mon_%s_%02d", r->name, dom_id); |
| 2338 | kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); |
| 2339 | |
| 2340 | list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) |
| 2341 | kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); |
| 2342 | } |
| 2343 | } |
| 2344 | |
| 2345 | static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, |
| 2346 | struct rdt_domain *d, |
| 2347 | struct rdt_resource *r, struct rdtgroup *prgrp) |
| 2348 | { |
| 2349 | union mon_data_bits priv; |
| 2350 | struct kernfs_node *kn; |
| 2351 | struct mon_evt *mevt; |
| 2352 | struct rmid_read rr; |
| 2353 | char name[32]; |
| 2354 | int ret; |
| 2355 | |
| 2356 | sprintf(name, "mon_%s_%02d", r->name, d->id); |
| 2357 | /* create the directory */ |
| 2358 | kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); |
| 2359 | if (IS_ERR(kn)) |
| 2360 | return PTR_ERR(kn); |
| 2361 | |
| 2362 | ret = rdtgroup_kn_set_ugid(kn); |
| 2363 | if (ret) |
| 2364 | goto out_destroy; |
| 2365 | |
| 2366 | if (WARN_ON(list_empty(&r->evt_list))) { |
| 2367 | ret = -EPERM; |
| 2368 | goto out_destroy; |
| 2369 | } |
| 2370 | |
| 2371 | priv.u.rid = r->rid; |
| 2372 | priv.u.domid = d->id; |
| 2373 | list_for_each_entry(mevt, &r->evt_list, list) { |
| 2374 | priv.u.evtid = mevt->evtid; |
| 2375 | ret = mon_addfile(kn, mevt->name, priv.priv); |
| 2376 | if (ret) |
| 2377 | goto out_destroy; |
| 2378 | |
| 2379 | if (is_mbm_event(mevt->evtid)) |
| 2380 | mon_event_read(&rr, d, prgrp, mevt->evtid, true); |
| 2381 | } |
| 2382 | kernfs_activate(kn); |
| 2383 | return 0; |
| 2384 | |
| 2385 | out_destroy: |
| 2386 | kernfs_remove(kn); |
| 2387 | return ret; |
| 2388 | } |
| 2389 | |
| 2390 | /* |
| 2391 | * Add all subdirectories of mon_data for "ctrl_mon" groups |
| 2392 | * and "monitor" groups with given domain id. |
| 2393 | */ |
| 2394 | void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, |
| 2395 | struct rdt_domain *d) |
| 2396 | { |
| 2397 | struct kernfs_node *parent_kn; |
| 2398 | struct rdtgroup *prgrp, *crgrp; |
| 2399 | struct list_head *head; |
| 2400 | |
| 2401 | if (!r->mon_enabled) |
| 2402 | return; |
| 2403 | |
| 2404 | list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { |
| 2405 | parent_kn = prgrp->mon.mon_data_kn; |
| 2406 | mkdir_mondata_subdir(parent_kn, d, r, prgrp); |
| 2407 | |
| 2408 | head = &prgrp->mon.crdtgrp_list; |
| 2409 | list_for_each_entry(crgrp, head, mon.crdtgrp_list) { |
| 2410 | parent_kn = crgrp->mon.mon_data_kn; |
| 2411 | mkdir_mondata_subdir(parent_kn, d, r, crgrp); |
| 2412 | } |
| 2413 | } |
| 2414 | } |
| 2415 | |
| 2416 | static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, |
| 2417 | struct rdt_resource *r, |
| 2418 | struct rdtgroup *prgrp) |
| 2419 | { |
| 2420 | struct rdt_domain *dom; |
| 2421 | int ret; |
| 2422 | |
| 2423 | list_for_each_entry(dom, &r->domains, list) { |
| 2424 | ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); |
| 2425 | if (ret) |
| 2426 | return ret; |
| 2427 | } |
| 2428 | |
| 2429 | return 0; |
| 2430 | } |
| 2431 | |
| 2432 | /* |
| 2433 | * This creates a directory mon_data which contains the monitored data. |
| 2434 | * |
| 2435 | * mon_data has one directory for each domain whic are named |
| 2436 | * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data |
| 2437 | * with L3 domain looks as below: |
| 2438 | * ./mon_data: |
| 2439 | * mon_L3_00 |
| 2440 | * mon_L3_01 |
| 2441 | * mon_L3_02 |
| 2442 | * ... |
| 2443 | * |
| 2444 | * Each domain directory has one file per event: |
| 2445 | * ./mon_L3_00/: |
| 2446 | * llc_occupancy |
| 2447 | * |
| 2448 | */ |
| 2449 | static int mkdir_mondata_all(struct kernfs_node *parent_kn, |
| 2450 | struct rdtgroup *prgrp, |
| 2451 | struct kernfs_node **dest_kn) |
| 2452 | { |
| 2453 | struct rdt_resource *r; |
| 2454 | struct kernfs_node *kn; |
| 2455 | int ret; |
| 2456 | |
| 2457 | /* |
| 2458 | * Create the mon_data directory first. |
| 2459 | */ |
| 2460 | ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); |
| 2461 | if (ret) |
| 2462 | return ret; |
| 2463 | |
| 2464 | if (dest_kn) |
| 2465 | *dest_kn = kn; |
| 2466 | |
| 2467 | /* |
| 2468 | * Create the subdirectories for each domain. Note that all events |
| 2469 | * in a domain like L3 are grouped into a resource whose domain is L3 |
| 2470 | */ |
| 2471 | for_each_mon_enabled_rdt_resource(r) { |
| 2472 | ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); |
| 2473 | if (ret) |
| 2474 | goto out_destroy; |
| 2475 | } |
| 2476 | |
| 2477 | return 0; |
| 2478 | |
| 2479 | out_destroy: |
| 2480 | kernfs_remove(kn); |
| 2481 | return ret; |
| 2482 | } |
| 2483 | |
| 2484 | /** |
| 2485 | * cbm_ensure_valid - Enforce validity on provided CBM |
| 2486 | * @_val: Candidate CBM |
| 2487 | * @r: RDT resource to which the CBM belongs |
| 2488 | * |
| 2489 | * The provided CBM represents all cache portions available for use. This |
| 2490 | * may be represented by a bitmap that does not consist of contiguous ones |
| 2491 | * and thus be an invalid CBM. |
| 2492 | * Here the provided CBM is forced to be a valid CBM by only considering |
| 2493 | * the first set of contiguous bits as valid and clearing all bits. |
| 2494 | * The intention here is to provide a valid default CBM with which a new |
| 2495 | * resource group is initialized. The user can follow this with a |
| 2496 | * modification to the CBM if the default does not satisfy the |
| 2497 | * requirements. |
| 2498 | */ |
| 2499 | static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) |
| 2500 | { |
| 2501 | unsigned int cbm_len = r->cache.cbm_len; |
| 2502 | unsigned long first_bit, zero_bit; |
| 2503 | unsigned long val = _val; |
| 2504 | |
| 2505 | if (!val) |
| 2506 | return 0; |
| 2507 | |
| 2508 | first_bit = find_first_bit(&val, cbm_len); |
| 2509 | zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); |
| 2510 | |
| 2511 | /* Clear any remaining bits to ensure contiguous region */ |
| 2512 | bitmap_clear(&val, zero_bit, cbm_len - zero_bit); |
| 2513 | return (u32)val; |
| 2514 | } |
| 2515 | |
| 2516 | /* |
| 2517 | * Initialize cache resources per RDT domain |
| 2518 | * |
| 2519 | * Set the RDT domain up to start off with all usable allocations. That is, |
| 2520 | * all shareable and unused bits. All-zero CBM is invalid. |
| 2521 | */ |
| 2522 | static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, |
| 2523 | u32 closid) |
| 2524 | { |
| 2525 | struct rdt_resource *r_cdp = NULL; |
| 2526 | struct rdt_domain *d_cdp = NULL; |
| 2527 | u32 used_b = 0, unused_b = 0; |
| 2528 | unsigned long tmp_cbm; |
| 2529 | enum rdtgrp_mode mode; |
| 2530 | u32 peer_ctl, *ctrl; |
| 2531 | int i; |
| 2532 | |
| 2533 | rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp); |
| 2534 | d->have_new_ctrl = false; |
| 2535 | d->new_ctrl = r->cache.shareable_bits; |
| 2536 | used_b = r->cache.shareable_bits; |
| 2537 | ctrl = d->ctrl_val; |
| 2538 | for (i = 0; i < closids_supported(); i++, ctrl++) { |
| 2539 | if (closid_allocated(i) && i != closid) { |
| 2540 | mode = rdtgroup_mode_by_closid(i); |
| 2541 | if (mode == RDT_MODE_PSEUDO_LOCKSETUP) |
| 2542 | /* |
| 2543 | * ctrl values for locksetup aren't relevant |
| 2544 | * until the schemata is written, and the mode |
| 2545 | * becomes RDT_MODE_PSEUDO_LOCKED. |
| 2546 | */ |
| 2547 | continue; |
| 2548 | /* |
| 2549 | * If CDP is active include peer domain's |
| 2550 | * usage to ensure there is no overlap |
| 2551 | * with an exclusive group. |
| 2552 | */ |
| 2553 | if (d_cdp) |
| 2554 | peer_ctl = d_cdp->ctrl_val[i]; |
| 2555 | else |
| 2556 | peer_ctl = 0; |
| 2557 | used_b |= *ctrl | peer_ctl; |
| 2558 | if (mode == RDT_MODE_SHAREABLE) |
| 2559 | d->new_ctrl |= *ctrl | peer_ctl; |
| 2560 | } |
| 2561 | } |
| 2562 | if (d->plr && d->plr->cbm > 0) |
| 2563 | used_b |= d->plr->cbm; |
| 2564 | unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); |
| 2565 | unused_b &= BIT_MASK(r->cache.cbm_len) - 1; |
| 2566 | d->new_ctrl |= unused_b; |
| 2567 | /* |
| 2568 | * Force the initial CBM to be valid, user can |
| 2569 | * modify the CBM based on system availability. |
| 2570 | */ |
| 2571 | d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r); |
| 2572 | /* |
| 2573 | * Assign the u32 CBM to an unsigned long to ensure that |
| 2574 | * bitmap_weight() does not access out-of-bound memory. |
| 2575 | */ |
| 2576 | tmp_cbm = d->new_ctrl; |
| 2577 | if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { |
| 2578 | rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id); |
| 2579 | return -ENOSPC; |
| 2580 | } |
| 2581 | d->have_new_ctrl = true; |
| 2582 | |
| 2583 | return 0; |
| 2584 | } |
| 2585 | |
| 2586 | /* |
| 2587 | * Initialize cache resources with default values. |
| 2588 | * |
| 2589 | * A new RDT group is being created on an allocation capable (CAT) |
| 2590 | * supporting system. Set this group up to start off with all usable |
| 2591 | * allocations. |
| 2592 | * |
| 2593 | * If there are no more shareable bits available on any domain then |
| 2594 | * the entire allocation will fail. |
| 2595 | */ |
| 2596 | static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) |
| 2597 | { |
| 2598 | struct rdt_domain *d; |
| 2599 | int ret; |
| 2600 | |
| 2601 | list_for_each_entry(d, &r->domains, list) { |
| 2602 | ret = __init_one_rdt_domain(d, r, closid); |
| 2603 | if (ret < 0) |
| 2604 | return ret; |
| 2605 | } |
| 2606 | |
| 2607 | return 0; |
| 2608 | } |
| 2609 | |
| 2610 | /* Initialize MBA resource with default values. */ |
| 2611 | static void rdtgroup_init_mba(struct rdt_resource *r) |
| 2612 | { |
| 2613 | struct rdt_domain *d; |
| 2614 | |
| 2615 | list_for_each_entry(d, &r->domains, list) { |
| 2616 | d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; |
| 2617 | d->have_new_ctrl = true; |
| 2618 | } |
| 2619 | } |
| 2620 | |
| 2621 | /* Initialize the RDT group's allocations. */ |
| 2622 | static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) |
| 2623 | { |
| 2624 | struct rdt_resource *r; |
| 2625 | int ret; |
| 2626 | |
| 2627 | for_each_alloc_enabled_rdt_resource(r) { |
| 2628 | if (r->rid == RDT_RESOURCE_MBA) { |
| 2629 | rdtgroup_init_mba(r); |
| 2630 | } else { |
| 2631 | ret = rdtgroup_init_cat(r, rdtgrp->closid); |
| 2632 | if (ret < 0) |
| 2633 | return ret; |
| 2634 | } |
| 2635 | |
| 2636 | ret = update_domains(r, rdtgrp->closid); |
| 2637 | if (ret < 0) { |
| 2638 | rdt_last_cmd_puts("Failed to initialize allocations\n"); |
| 2639 | return ret; |
| 2640 | } |
| 2641 | |
| 2642 | } |
| 2643 | |
| 2644 | rdtgrp->mode = RDT_MODE_SHAREABLE; |
| 2645 | |
| 2646 | return 0; |
| 2647 | } |
| 2648 | |
| 2649 | static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, |
| 2650 | struct kernfs_node *prgrp_kn, |
| 2651 | const char *name, umode_t mode, |
| 2652 | enum rdt_group_type rtype, struct rdtgroup **r) |
| 2653 | { |
| 2654 | struct rdtgroup *prdtgrp, *rdtgrp; |
| 2655 | struct kernfs_node *kn; |
| 2656 | uint files = 0; |
| 2657 | int ret; |
| 2658 | |
| 2659 | prdtgrp = rdtgroup_kn_lock_live(parent_kn); |
| 2660 | if (!prdtgrp) { |
| 2661 | ret = -ENODEV; |
| 2662 | goto out_unlock; |
| 2663 | } |
| 2664 | |
| 2665 | if (rtype == RDTMON_GROUP && |
| 2666 | (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || |
| 2667 | prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { |
| 2668 | ret = -EINVAL; |
| 2669 | rdt_last_cmd_puts("Pseudo-locking in progress\n"); |
| 2670 | goto out_unlock; |
| 2671 | } |
| 2672 | |
| 2673 | /* allocate the rdtgroup. */ |
| 2674 | rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); |
| 2675 | if (!rdtgrp) { |
| 2676 | ret = -ENOSPC; |
| 2677 | rdt_last_cmd_puts("Kernel out of memory\n"); |
| 2678 | goto out_unlock; |
| 2679 | } |
| 2680 | *r = rdtgrp; |
| 2681 | rdtgrp->mon.parent = prdtgrp; |
| 2682 | rdtgrp->type = rtype; |
| 2683 | INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); |
| 2684 | |
| 2685 | /* kernfs creates the directory for rdtgrp */ |
| 2686 | kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); |
| 2687 | if (IS_ERR(kn)) { |
| 2688 | ret = PTR_ERR(kn); |
| 2689 | rdt_last_cmd_puts("kernfs create error\n"); |
| 2690 | goto out_free_rgrp; |
| 2691 | } |
| 2692 | rdtgrp->kn = kn; |
| 2693 | |
| 2694 | /* |
| 2695 | * kernfs_remove() will drop the reference count on "kn" which |
| 2696 | * will free it. But we still need it to stick around for the |
| 2697 | * rdtgroup_kn_unlock(kn) call. Take one extra reference here, |
| 2698 | * which will be dropped by kernfs_put() in rdtgroup_remove(). |
| 2699 | */ |
| 2700 | kernfs_get(kn); |
| 2701 | |
| 2702 | ret = rdtgroup_kn_set_ugid(kn); |
| 2703 | if (ret) { |
| 2704 | rdt_last_cmd_puts("kernfs perm error\n"); |
| 2705 | goto out_destroy; |
| 2706 | } |
| 2707 | |
| 2708 | files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); |
| 2709 | ret = rdtgroup_add_files(kn, files); |
| 2710 | if (ret) { |
| 2711 | rdt_last_cmd_puts("kernfs fill error\n"); |
| 2712 | goto out_destroy; |
| 2713 | } |
| 2714 | |
| 2715 | if (rdt_mon_capable) { |
| 2716 | ret = alloc_rmid(); |
| 2717 | if (ret < 0) { |
| 2718 | rdt_last_cmd_puts("Out of RMIDs\n"); |
| 2719 | goto out_destroy; |
| 2720 | } |
| 2721 | rdtgrp->mon.rmid = ret; |
| 2722 | |
| 2723 | ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn); |
| 2724 | if (ret) { |
| 2725 | rdt_last_cmd_puts("kernfs subdir error\n"); |
| 2726 | goto out_idfree; |
| 2727 | } |
| 2728 | } |
| 2729 | kernfs_activate(kn); |
| 2730 | |
| 2731 | /* |
| 2732 | * The caller unlocks the parent_kn upon success. |
| 2733 | */ |
| 2734 | return 0; |
| 2735 | |
| 2736 | out_idfree: |
| 2737 | free_rmid(rdtgrp->mon.rmid); |
| 2738 | out_destroy: |
| 2739 | kernfs_put(rdtgrp->kn); |
| 2740 | kernfs_remove(rdtgrp->kn); |
| 2741 | out_free_rgrp: |
| 2742 | kfree(rdtgrp); |
| 2743 | out_unlock: |
| 2744 | rdtgroup_kn_unlock(parent_kn); |
| 2745 | return ret; |
| 2746 | } |
| 2747 | |
| 2748 | static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) |
| 2749 | { |
| 2750 | kernfs_remove(rgrp->kn); |
| 2751 | free_rmid(rgrp->mon.rmid); |
| 2752 | rdtgroup_remove(rgrp); |
| 2753 | } |
| 2754 | |
| 2755 | /* |
| 2756 | * Create a monitor group under "mon_groups" directory of a control |
| 2757 | * and monitor group(ctrl_mon). This is a resource group |
| 2758 | * to monitor a subset of tasks and cpus in its parent ctrl_mon group. |
| 2759 | */ |
| 2760 | static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, |
| 2761 | struct kernfs_node *prgrp_kn, |
| 2762 | const char *name, |
| 2763 | umode_t mode) |
| 2764 | { |
| 2765 | struct rdtgroup *rdtgrp, *prgrp; |
| 2766 | int ret; |
| 2767 | |
| 2768 | ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP, |
| 2769 | &rdtgrp); |
| 2770 | if (ret) |
| 2771 | return ret; |
| 2772 | |
| 2773 | prgrp = rdtgrp->mon.parent; |
| 2774 | rdtgrp->closid = prgrp->closid; |
| 2775 | |
| 2776 | /* |
| 2777 | * Add the rdtgrp to the list of rdtgrps the parent |
| 2778 | * ctrl_mon group has to track. |
| 2779 | */ |
| 2780 | list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); |
| 2781 | |
| 2782 | rdtgroup_kn_unlock(parent_kn); |
| 2783 | return ret; |
| 2784 | } |
| 2785 | |
| 2786 | /* |
| 2787 | * These are rdtgroups created under the root directory. Can be used |
| 2788 | * to allocate and monitor resources. |
| 2789 | */ |
| 2790 | static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, |
| 2791 | struct kernfs_node *prgrp_kn, |
| 2792 | const char *name, umode_t mode) |
| 2793 | { |
| 2794 | struct rdtgroup *rdtgrp; |
| 2795 | struct kernfs_node *kn; |
| 2796 | u32 closid; |
| 2797 | int ret; |
| 2798 | |
| 2799 | ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP, |
| 2800 | &rdtgrp); |
| 2801 | if (ret) |
| 2802 | return ret; |
| 2803 | |
| 2804 | kn = rdtgrp->kn; |
| 2805 | ret = closid_alloc(); |
| 2806 | if (ret < 0) { |
| 2807 | rdt_last_cmd_puts("Out of CLOSIDs\n"); |
| 2808 | goto out_common_fail; |
| 2809 | } |
| 2810 | closid = ret; |
| 2811 | ret = 0; |
| 2812 | |
| 2813 | rdtgrp->closid = closid; |
| 2814 | ret = rdtgroup_init_alloc(rdtgrp); |
| 2815 | if (ret < 0) |
| 2816 | goto out_id_free; |
| 2817 | |
| 2818 | list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); |
| 2819 | |
| 2820 | if (rdt_mon_capable) { |
| 2821 | /* |
| 2822 | * Create an empty mon_groups directory to hold the subset |
| 2823 | * of tasks and cpus to monitor. |
| 2824 | */ |
| 2825 | ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); |
| 2826 | if (ret) { |
| 2827 | rdt_last_cmd_puts("kernfs subdir error\n"); |
| 2828 | goto out_del_list; |
| 2829 | } |
| 2830 | } |
| 2831 | |
| 2832 | goto out_unlock; |
| 2833 | |
| 2834 | out_del_list: |
| 2835 | list_del(&rdtgrp->rdtgroup_list); |
| 2836 | out_id_free: |
| 2837 | closid_free(closid); |
| 2838 | out_common_fail: |
| 2839 | mkdir_rdt_prepare_clean(rdtgrp); |
| 2840 | out_unlock: |
| 2841 | rdtgroup_kn_unlock(parent_kn); |
| 2842 | return ret; |
| 2843 | } |
| 2844 | |
| 2845 | /* |
| 2846 | * We allow creating mon groups only with in a directory called "mon_groups" |
| 2847 | * which is present in every ctrl_mon group. Check if this is a valid |
| 2848 | * "mon_groups" directory. |
| 2849 | * |
| 2850 | * 1. The directory should be named "mon_groups". |
| 2851 | * 2. The mon group itself should "not" be named "mon_groups". |
| 2852 | * This makes sure "mon_groups" directory always has a ctrl_mon group |
| 2853 | * as parent. |
| 2854 | */ |
| 2855 | static bool is_mon_groups(struct kernfs_node *kn, const char *name) |
| 2856 | { |
| 2857 | return (!strcmp(kn->name, "mon_groups") && |
| 2858 | strcmp(name, "mon_groups")); |
| 2859 | } |
| 2860 | |
| 2861 | static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, |
| 2862 | umode_t mode) |
| 2863 | { |
| 2864 | /* Do not accept '\n' to avoid unparsable situation. */ |
| 2865 | if (strchr(name, '\n')) |
| 2866 | return -EINVAL; |
| 2867 | |
| 2868 | /* |
| 2869 | * If the parent directory is the root directory and RDT |
| 2870 | * allocation is supported, add a control and monitoring |
| 2871 | * subdirectory |
| 2872 | */ |
| 2873 | if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn) |
| 2874 | return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode); |
| 2875 | |
| 2876 | /* |
| 2877 | * If RDT monitoring is supported and the parent directory is a valid |
| 2878 | * "mon_groups" directory, add a monitoring subdirectory. |
| 2879 | */ |
| 2880 | if (rdt_mon_capable && is_mon_groups(parent_kn, name)) |
| 2881 | return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode); |
| 2882 | |
| 2883 | return -EPERM; |
| 2884 | } |
| 2885 | |
| 2886 | static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, |
| 2887 | cpumask_var_t tmpmask) |
| 2888 | { |
| 2889 | struct rdtgroup *prdtgrp = rdtgrp->mon.parent; |
| 2890 | int cpu; |
| 2891 | |
| 2892 | /* Give any tasks back to the parent group */ |
| 2893 | rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); |
| 2894 | |
| 2895 | /* Update per cpu rmid of the moved CPUs first */ |
| 2896 | for_each_cpu(cpu, &rdtgrp->cpu_mask) |
| 2897 | per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; |
| 2898 | /* |
| 2899 | * Update the MSR on moved CPUs and CPUs which have moved |
| 2900 | * task running on them. |
| 2901 | */ |
| 2902 | cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); |
| 2903 | update_closid_rmid(tmpmask, NULL); |
| 2904 | |
| 2905 | rdtgrp->flags = RDT_DELETED; |
| 2906 | free_rmid(rdtgrp->mon.rmid); |
| 2907 | |
| 2908 | /* |
| 2909 | * Remove the rdtgrp from the parent ctrl_mon group's list |
| 2910 | */ |
| 2911 | WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); |
| 2912 | list_del(&rdtgrp->mon.crdtgrp_list); |
| 2913 | |
| 2914 | kernfs_remove(rdtgrp->kn); |
| 2915 | |
| 2916 | return 0; |
| 2917 | } |
| 2918 | |
| 2919 | static int rdtgroup_ctrl_remove(struct kernfs_node *kn, |
| 2920 | struct rdtgroup *rdtgrp) |
| 2921 | { |
| 2922 | rdtgrp->flags = RDT_DELETED; |
| 2923 | list_del(&rdtgrp->rdtgroup_list); |
| 2924 | |
| 2925 | kernfs_remove(rdtgrp->kn); |
| 2926 | return 0; |
| 2927 | } |
| 2928 | |
| 2929 | static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, |
| 2930 | cpumask_var_t tmpmask) |
| 2931 | { |
| 2932 | int cpu; |
| 2933 | |
| 2934 | /* Give any tasks back to the default group */ |
| 2935 | rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); |
| 2936 | |
| 2937 | /* Give any CPUs back to the default group */ |
| 2938 | cpumask_or(&rdtgroup_default.cpu_mask, |
| 2939 | &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); |
| 2940 | |
| 2941 | /* Update per cpu closid and rmid of the moved CPUs first */ |
| 2942 | for_each_cpu(cpu, &rdtgrp->cpu_mask) { |
| 2943 | per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; |
| 2944 | per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; |
| 2945 | } |
| 2946 | |
| 2947 | /* |
| 2948 | * Update the MSR on moved CPUs and CPUs which have moved |
| 2949 | * task running on them. |
| 2950 | */ |
| 2951 | cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); |
| 2952 | update_closid_rmid(tmpmask, NULL); |
| 2953 | |
| 2954 | closid_free(rdtgrp->closid); |
| 2955 | free_rmid(rdtgrp->mon.rmid); |
| 2956 | |
| 2957 | rdtgroup_ctrl_remove(kn, rdtgrp); |
| 2958 | |
| 2959 | /* |
| 2960 | * Free all the child monitor group rmids. |
| 2961 | */ |
| 2962 | free_all_child_rdtgrp(rdtgrp); |
| 2963 | |
| 2964 | return 0; |
| 2965 | } |
| 2966 | |
| 2967 | static int rdtgroup_rmdir(struct kernfs_node *kn) |
| 2968 | { |
| 2969 | struct kernfs_node *parent_kn = kn->parent; |
| 2970 | struct rdtgroup *rdtgrp; |
| 2971 | cpumask_var_t tmpmask; |
| 2972 | int ret = 0; |
| 2973 | |
| 2974 | if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) |
| 2975 | return -ENOMEM; |
| 2976 | |
| 2977 | rdtgrp = rdtgroup_kn_lock_live(kn); |
| 2978 | if (!rdtgrp) { |
| 2979 | ret = -EPERM; |
| 2980 | goto out; |
| 2981 | } |
| 2982 | |
| 2983 | /* |
| 2984 | * If the rdtgroup is a ctrl_mon group and parent directory |
| 2985 | * is the root directory, remove the ctrl_mon group. |
| 2986 | * |
| 2987 | * If the rdtgroup is a mon group and parent directory |
| 2988 | * is a valid "mon_groups" directory, remove the mon group. |
| 2989 | */ |
| 2990 | if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && |
| 2991 | rdtgrp != &rdtgroup_default) { |
| 2992 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || |
| 2993 | rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { |
| 2994 | ret = rdtgroup_ctrl_remove(kn, rdtgrp); |
| 2995 | } else { |
| 2996 | ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); |
| 2997 | } |
| 2998 | } else if (rdtgrp->type == RDTMON_GROUP && |
| 2999 | is_mon_groups(parent_kn, kn->name)) { |
| 3000 | ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask); |
| 3001 | } else { |
| 3002 | ret = -EPERM; |
| 3003 | } |
| 3004 | |
| 3005 | out: |
| 3006 | rdtgroup_kn_unlock(kn); |
| 3007 | free_cpumask_var(tmpmask); |
| 3008 | return ret; |
| 3009 | } |
| 3010 | |
| 3011 | static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) |
| 3012 | { |
| 3013 | if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) |
| 3014 | seq_puts(seq, ",cdp"); |
| 3015 | |
| 3016 | if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) |
| 3017 | seq_puts(seq, ",cdpl2"); |
| 3018 | |
| 3019 | if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA])) |
| 3020 | seq_puts(seq, ",mba_MBps"); |
| 3021 | |
| 3022 | return 0; |
| 3023 | } |
| 3024 | |
| 3025 | static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { |
| 3026 | .mkdir = rdtgroup_mkdir, |
| 3027 | .rmdir = rdtgroup_rmdir, |
| 3028 | .show_options = rdtgroup_show_options, |
| 3029 | }; |
| 3030 | |
| 3031 | static int __init rdtgroup_setup_root(void) |
| 3032 | { |
| 3033 | int ret; |
| 3034 | |
| 3035 | rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, |
| 3036 | KERNFS_ROOT_CREATE_DEACTIVATED | |
| 3037 | KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, |
| 3038 | &rdtgroup_default); |
| 3039 | if (IS_ERR(rdt_root)) |
| 3040 | return PTR_ERR(rdt_root); |
| 3041 | |
| 3042 | mutex_lock(&rdtgroup_mutex); |
| 3043 | |
| 3044 | rdtgroup_default.closid = 0; |
| 3045 | rdtgroup_default.mon.rmid = 0; |
| 3046 | rdtgroup_default.type = RDTCTRL_GROUP; |
| 3047 | INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); |
| 3048 | |
| 3049 | list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); |
| 3050 | |
| 3051 | ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE); |
| 3052 | if (ret) { |
| 3053 | kernfs_destroy_root(rdt_root); |
| 3054 | goto out; |
| 3055 | } |
| 3056 | |
| 3057 | rdtgroup_default.kn = rdt_root->kn; |
| 3058 | kernfs_activate(rdtgroup_default.kn); |
| 3059 | |
| 3060 | out: |
| 3061 | mutex_unlock(&rdtgroup_mutex); |
| 3062 | |
| 3063 | return ret; |
| 3064 | } |
| 3065 | |
| 3066 | /* |
| 3067 | * rdtgroup_init - rdtgroup initialization |
| 3068 | * |
| 3069 | * Setup resctrl file system including set up root, create mount point, |
| 3070 | * register rdtgroup filesystem, and initialize files under root directory. |
| 3071 | * |
| 3072 | * Return: 0 on success or -errno |
| 3073 | */ |
| 3074 | int __init rdtgroup_init(void) |
| 3075 | { |
| 3076 | int ret = 0; |
| 3077 | |
| 3078 | seq_buf_init(&last_cmd_status, last_cmd_status_buf, |
| 3079 | sizeof(last_cmd_status_buf)); |
| 3080 | |
| 3081 | ret = rdtgroup_setup_root(); |
| 3082 | if (ret) |
| 3083 | return ret; |
| 3084 | |
| 3085 | ret = sysfs_create_mount_point(fs_kobj, "resctrl"); |
| 3086 | if (ret) |
| 3087 | goto cleanup_root; |
| 3088 | |
| 3089 | ret = register_filesystem(&rdt_fs_type); |
| 3090 | if (ret) |
| 3091 | goto cleanup_mountpoint; |
| 3092 | |
| 3093 | /* |
| 3094 | * Adding the resctrl debugfs directory here may not be ideal since |
| 3095 | * it would let the resctrl debugfs directory appear on the debugfs |
| 3096 | * filesystem before the resctrl filesystem is mounted. |
| 3097 | * It may also be ok since that would enable debugging of RDT before |
| 3098 | * resctrl is mounted. |
| 3099 | * The reason why the debugfs directory is created here and not in |
| 3100 | * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and |
| 3101 | * during the debugfs directory creation also &sb->s_type->i_mutex_key |
| 3102 | * (the lockdep class of inode->i_rwsem). Other filesystem |
| 3103 | * interactions (eg. SyS_getdents) have the lock ordering: |
| 3104 | * &sb->s_type->i_mutex_key --> &mm->mmap_sem |
| 3105 | * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex |
| 3106 | * is taken, thus creating dependency: |
| 3107 | * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause |
| 3108 | * issues considering the other two lock dependencies. |
| 3109 | * By creating the debugfs directory here we avoid a dependency |
| 3110 | * that may cause deadlock (even though file operations cannot |
| 3111 | * occur until the filesystem is mounted, but I do not know how to |
| 3112 | * tell lockdep that). |
| 3113 | */ |
| 3114 | debugfs_resctrl = debugfs_create_dir("resctrl", NULL); |
| 3115 | |
| 3116 | return 0; |
| 3117 | |
| 3118 | cleanup_mountpoint: |
| 3119 | sysfs_remove_mount_point(fs_kobj, "resctrl"); |
| 3120 | cleanup_root: |
| 3121 | kernfs_destroy_root(rdt_root); |
| 3122 | |
| 3123 | return ret; |
| 3124 | } |
| 3125 | |
| 3126 | void __exit rdtgroup_exit(void) |
| 3127 | { |
| 3128 | debugfs_remove_recursive(debugfs_resctrl); |
| 3129 | unregister_filesystem(&rdt_fs_type); |
| 3130 | sysfs_remove_mount_point(fs_kobj, "resctrl"); |
| 3131 | kernfs_destroy_root(rdt_root); |
| 3132 | } |