b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * PCIe AER software error injection support. |
| 4 | * |
| 5 | * Debugging PCIe AER code is quite difficult because it is hard to |
| 6 | * trigger various real hardware errors. Software based error |
| 7 | * injection can fake almost all kinds of errors with the help of a |
| 8 | * user space helper tool aer-inject, which can be gotten from: |
| 9 | * http://www.kernel.org/pub/linux/utils/pci/aer-inject/ |
| 10 | * |
| 11 | * Copyright 2009 Intel Corporation. |
| 12 | * Huang Ying <ying.huang@intel.com> |
| 13 | */ |
| 14 | |
| 15 | #define dev_fmt(fmt) "aer_inject: " fmt |
| 16 | |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/init.h> |
| 19 | #include <linux/irq.h> |
| 20 | #include <linux/miscdevice.h> |
| 21 | #include <linux/pci.h> |
| 22 | #include <linux/slab.h> |
| 23 | #include <linux/fs.h> |
| 24 | #include <linux/uaccess.h> |
| 25 | #include <linux/stddef.h> |
| 26 | #include <linux/device.h> |
| 27 | |
| 28 | #include "portdrv.h" |
| 29 | |
| 30 | /* Override the existing corrected and uncorrected error masks */ |
| 31 | static bool aer_mask_override; |
| 32 | module_param(aer_mask_override, bool, 0); |
| 33 | |
| 34 | struct aer_error_inj { |
| 35 | u8 bus; |
| 36 | u8 dev; |
| 37 | u8 fn; |
| 38 | u32 uncor_status; |
| 39 | u32 cor_status; |
| 40 | u32 header_log0; |
| 41 | u32 header_log1; |
| 42 | u32 header_log2; |
| 43 | u32 header_log3; |
| 44 | u32 domain; |
| 45 | }; |
| 46 | |
| 47 | struct aer_error { |
| 48 | struct list_head list; |
| 49 | u32 domain; |
| 50 | unsigned int bus; |
| 51 | unsigned int devfn; |
| 52 | int pos_cap_err; |
| 53 | |
| 54 | u32 uncor_status; |
| 55 | u32 cor_status; |
| 56 | u32 header_log0; |
| 57 | u32 header_log1; |
| 58 | u32 header_log2; |
| 59 | u32 header_log3; |
| 60 | u32 root_status; |
| 61 | u32 source_id; |
| 62 | }; |
| 63 | |
| 64 | struct pci_bus_ops { |
| 65 | struct list_head list; |
| 66 | struct pci_bus *bus; |
| 67 | struct pci_ops *ops; |
| 68 | }; |
| 69 | |
| 70 | static LIST_HEAD(einjected); |
| 71 | |
| 72 | static LIST_HEAD(pci_bus_ops_list); |
| 73 | |
| 74 | /* Protect einjected and pci_bus_ops_list */ |
| 75 | static DEFINE_SPINLOCK(inject_lock); |
| 76 | |
| 77 | static void aer_error_init(struct aer_error *err, u32 domain, |
| 78 | unsigned int bus, unsigned int devfn, |
| 79 | int pos_cap_err) |
| 80 | { |
| 81 | INIT_LIST_HEAD(&err->list); |
| 82 | err->domain = domain; |
| 83 | err->bus = bus; |
| 84 | err->devfn = devfn; |
| 85 | err->pos_cap_err = pos_cap_err; |
| 86 | } |
| 87 | |
| 88 | /* inject_lock must be held before calling */ |
| 89 | static struct aer_error *__find_aer_error(u32 domain, unsigned int bus, |
| 90 | unsigned int devfn) |
| 91 | { |
| 92 | struct aer_error *err; |
| 93 | |
| 94 | list_for_each_entry(err, &einjected, list) { |
| 95 | if (domain == err->domain && |
| 96 | bus == err->bus && |
| 97 | devfn == err->devfn) |
| 98 | return err; |
| 99 | } |
| 100 | return NULL; |
| 101 | } |
| 102 | |
| 103 | /* inject_lock must be held before calling */ |
| 104 | static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) |
| 105 | { |
| 106 | int domain = pci_domain_nr(dev->bus); |
| 107 | if (domain < 0) |
| 108 | return NULL; |
| 109 | return __find_aer_error(domain, dev->bus->number, dev->devfn); |
| 110 | } |
| 111 | |
| 112 | /* inject_lock must be held before calling */ |
| 113 | static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) |
| 114 | { |
| 115 | struct pci_bus_ops *bus_ops; |
| 116 | |
| 117 | list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { |
| 118 | if (bus_ops->bus == bus) |
| 119 | return bus_ops->ops; |
| 120 | } |
| 121 | return NULL; |
| 122 | } |
| 123 | |
| 124 | static struct pci_bus_ops *pci_bus_ops_pop(void) |
| 125 | { |
| 126 | unsigned long flags; |
| 127 | struct pci_bus_ops *bus_ops; |
| 128 | |
| 129 | spin_lock_irqsave(&inject_lock, flags); |
| 130 | bus_ops = list_first_entry_or_null(&pci_bus_ops_list, |
| 131 | struct pci_bus_ops, list); |
| 132 | if (bus_ops) |
| 133 | list_del(&bus_ops->list); |
| 134 | spin_unlock_irqrestore(&inject_lock, flags); |
| 135 | return bus_ops; |
| 136 | } |
| 137 | |
| 138 | static u32 *find_pci_config_dword(struct aer_error *err, int where, |
| 139 | int *prw1cs) |
| 140 | { |
| 141 | int rw1cs = 0; |
| 142 | u32 *target = NULL; |
| 143 | |
| 144 | if (err->pos_cap_err == -1) |
| 145 | return NULL; |
| 146 | |
| 147 | switch (where - err->pos_cap_err) { |
| 148 | case PCI_ERR_UNCOR_STATUS: |
| 149 | target = &err->uncor_status; |
| 150 | rw1cs = 1; |
| 151 | break; |
| 152 | case PCI_ERR_COR_STATUS: |
| 153 | target = &err->cor_status; |
| 154 | rw1cs = 1; |
| 155 | break; |
| 156 | case PCI_ERR_HEADER_LOG: |
| 157 | target = &err->header_log0; |
| 158 | break; |
| 159 | case PCI_ERR_HEADER_LOG+4: |
| 160 | target = &err->header_log1; |
| 161 | break; |
| 162 | case PCI_ERR_HEADER_LOG+8: |
| 163 | target = &err->header_log2; |
| 164 | break; |
| 165 | case PCI_ERR_HEADER_LOG+12: |
| 166 | target = &err->header_log3; |
| 167 | break; |
| 168 | case PCI_ERR_ROOT_STATUS: |
| 169 | target = &err->root_status; |
| 170 | rw1cs = 1; |
| 171 | break; |
| 172 | case PCI_ERR_ROOT_ERR_SRC: |
| 173 | target = &err->source_id; |
| 174 | break; |
| 175 | } |
| 176 | if (prw1cs) |
| 177 | *prw1cs = rw1cs; |
| 178 | return target; |
| 179 | } |
| 180 | |
| 181 | static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where, |
| 182 | int size, u32 *val) |
| 183 | { |
| 184 | struct pci_ops *ops, *my_ops; |
| 185 | int rv; |
| 186 | |
| 187 | ops = __find_pci_bus_ops(bus); |
| 188 | if (!ops) |
| 189 | return -1; |
| 190 | |
| 191 | my_ops = bus->ops; |
| 192 | bus->ops = ops; |
| 193 | rv = ops->read(bus, devfn, where, size, val); |
| 194 | bus->ops = my_ops; |
| 195 | |
| 196 | return rv; |
| 197 | } |
| 198 | |
| 199 | static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where, |
| 200 | int size, u32 val) |
| 201 | { |
| 202 | struct pci_ops *ops, *my_ops; |
| 203 | int rv; |
| 204 | |
| 205 | ops = __find_pci_bus_ops(bus); |
| 206 | if (!ops) |
| 207 | return -1; |
| 208 | |
| 209 | my_ops = bus->ops; |
| 210 | bus->ops = ops; |
| 211 | rv = ops->write(bus, devfn, where, size, val); |
| 212 | bus->ops = my_ops; |
| 213 | |
| 214 | return rv; |
| 215 | } |
| 216 | |
| 217 | static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn, |
| 218 | int where, int size, u32 *val) |
| 219 | { |
| 220 | u32 *sim; |
| 221 | struct aer_error *err; |
| 222 | unsigned long flags; |
| 223 | int domain; |
| 224 | int rv; |
| 225 | |
| 226 | spin_lock_irqsave(&inject_lock, flags); |
| 227 | if (size != sizeof(u32)) |
| 228 | goto out; |
| 229 | domain = pci_domain_nr(bus); |
| 230 | if (domain < 0) |
| 231 | goto out; |
| 232 | err = __find_aer_error(domain, bus->number, devfn); |
| 233 | if (!err) |
| 234 | goto out; |
| 235 | |
| 236 | sim = find_pci_config_dword(err, where, NULL); |
| 237 | if (sim) { |
| 238 | *val = *sim; |
| 239 | spin_unlock_irqrestore(&inject_lock, flags); |
| 240 | return 0; |
| 241 | } |
| 242 | out: |
| 243 | rv = aer_inj_read(bus, devfn, where, size, val); |
| 244 | spin_unlock_irqrestore(&inject_lock, flags); |
| 245 | return rv; |
| 246 | } |
| 247 | |
| 248 | static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn, |
| 249 | int where, int size, u32 val) |
| 250 | { |
| 251 | u32 *sim; |
| 252 | struct aer_error *err; |
| 253 | unsigned long flags; |
| 254 | int rw1cs; |
| 255 | int domain; |
| 256 | int rv; |
| 257 | |
| 258 | spin_lock_irqsave(&inject_lock, flags); |
| 259 | if (size != sizeof(u32)) |
| 260 | goto out; |
| 261 | domain = pci_domain_nr(bus); |
| 262 | if (domain < 0) |
| 263 | goto out; |
| 264 | err = __find_aer_error(domain, bus->number, devfn); |
| 265 | if (!err) |
| 266 | goto out; |
| 267 | |
| 268 | sim = find_pci_config_dword(err, where, &rw1cs); |
| 269 | if (sim) { |
| 270 | if (rw1cs) |
| 271 | *sim ^= val; |
| 272 | else |
| 273 | *sim = val; |
| 274 | spin_unlock_irqrestore(&inject_lock, flags); |
| 275 | return 0; |
| 276 | } |
| 277 | out: |
| 278 | rv = aer_inj_write(bus, devfn, where, size, val); |
| 279 | spin_unlock_irqrestore(&inject_lock, flags); |
| 280 | return rv; |
| 281 | } |
| 282 | |
| 283 | static struct pci_ops aer_inj_pci_ops = { |
| 284 | .read = aer_inj_read_config, |
| 285 | .write = aer_inj_write_config, |
| 286 | }; |
| 287 | |
| 288 | static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, |
| 289 | struct pci_bus *bus, |
| 290 | struct pci_ops *ops) |
| 291 | { |
| 292 | INIT_LIST_HEAD(&bus_ops->list); |
| 293 | bus_ops->bus = bus; |
| 294 | bus_ops->ops = ops; |
| 295 | } |
| 296 | |
| 297 | static int pci_bus_set_aer_ops(struct pci_bus *bus) |
| 298 | { |
| 299 | struct pci_ops *ops; |
| 300 | struct pci_bus_ops *bus_ops; |
| 301 | unsigned long flags; |
| 302 | |
| 303 | bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); |
| 304 | if (!bus_ops) |
| 305 | return -ENOMEM; |
| 306 | ops = pci_bus_set_ops(bus, &aer_inj_pci_ops); |
| 307 | spin_lock_irqsave(&inject_lock, flags); |
| 308 | if (ops == &aer_inj_pci_ops) |
| 309 | goto out; |
| 310 | pci_bus_ops_init(bus_ops, bus, ops); |
| 311 | list_add(&bus_ops->list, &pci_bus_ops_list); |
| 312 | bus_ops = NULL; |
| 313 | out: |
| 314 | spin_unlock_irqrestore(&inject_lock, flags); |
| 315 | kfree(bus_ops); |
| 316 | return 0; |
| 317 | } |
| 318 | |
| 319 | static int aer_inject(struct aer_error_inj *einj) |
| 320 | { |
| 321 | struct aer_error *err, *rperr; |
| 322 | struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; |
| 323 | struct pci_dev *dev, *rpdev; |
| 324 | struct pcie_device *edev; |
| 325 | struct device *device; |
| 326 | unsigned long flags; |
| 327 | unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); |
| 328 | int pos_cap_err, rp_pos_cap_err; |
| 329 | u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; |
| 330 | int ret = 0; |
| 331 | |
| 332 | dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn); |
| 333 | if (!dev) |
| 334 | return -ENODEV; |
| 335 | rpdev = pcie_find_root_port(dev); |
| 336 | if (!rpdev) { |
| 337 | pci_err(dev, "Root port not found\n"); |
| 338 | ret = -ENODEV; |
| 339 | goto out_put; |
| 340 | } |
| 341 | |
| 342 | pos_cap_err = dev->aer_cap; |
| 343 | if (!pos_cap_err) { |
| 344 | pci_err(dev, "Device doesn't support AER\n"); |
| 345 | ret = -EPROTONOSUPPORT; |
| 346 | goto out_put; |
| 347 | } |
| 348 | pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); |
| 349 | pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask); |
| 350 | pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, |
| 351 | &uncor_mask); |
| 352 | |
| 353 | rp_pos_cap_err = rpdev->aer_cap; |
| 354 | if (!rp_pos_cap_err) { |
| 355 | pci_err(rpdev, "Root port doesn't support AER\n"); |
| 356 | ret = -EPROTONOSUPPORT; |
| 357 | goto out_put; |
| 358 | } |
| 359 | |
| 360 | err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); |
| 361 | if (!err_alloc) { |
| 362 | ret = -ENOMEM; |
| 363 | goto out_put; |
| 364 | } |
| 365 | rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); |
| 366 | if (!rperr_alloc) { |
| 367 | ret = -ENOMEM; |
| 368 | goto out_put; |
| 369 | } |
| 370 | |
| 371 | if (aer_mask_override) { |
| 372 | cor_mask_orig = cor_mask; |
| 373 | cor_mask &= !(einj->cor_status); |
| 374 | pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, |
| 375 | cor_mask); |
| 376 | |
| 377 | uncor_mask_orig = uncor_mask; |
| 378 | uncor_mask &= !(einj->uncor_status); |
| 379 | pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, |
| 380 | uncor_mask); |
| 381 | } |
| 382 | |
| 383 | spin_lock_irqsave(&inject_lock, flags); |
| 384 | |
| 385 | err = __find_aer_error_by_dev(dev); |
| 386 | if (!err) { |
| 387 | err = err_alloc; |
| 388 | err_alloc = NULL; |
| 389 | aer_error_init(err, einj->domain, einj->bus, devfn, |
| 390 | pos_cap_err); |
| 391 | list_add(&err->list, &einjected); |
| 392 | } |
| 393 | err->uncor_status |= einj->uncor_status; |
| 394 | err->cor_status |= einj->cor_status; |
| 395 | err->header_log0 = einj->header_log0; |
| 396 | err->header_log1 = einj->header_log1; |
| 397 | err->header_log2 = einj->header_log2; |
| 398 | err->header_log3 = einj->header_log3; |
| 399 | |
| 400 | if (!aer_mask_override && einj->cor_status && |
| 401 | !(einj->cor_status & ~cor_mask)) { |
| 402 | ret = -EINVAL; |
| 403 | pci_warn(dev, "The correctable error(s) is masked by device\n"); |
| 404 | spin_unlock_irqrestore(&inject_lock, flags); |
| 405 | goto out_put; |
| 406 | } |
| 407 | if (!aer_mask_override && einj->uncor_status && |
| 408 | !(einj->uncor_status & ~uncor_mask)) { |
| 409 | ret = -EINVAL; |
| 410 | pci_warn(dev, "The uncorrectable error(s) is masked by device\n"); |
| 411 | spin_unlock_irqrestore(&inject_lock, flags); |
| 412 | goto out_put; |
| 413 | } |
| 414 | |
| 415 | rperr = __find_aer_error_by_dev(rpdev); |
| 416 | if (!rperr) { |
| 417 | rperr = rperr_alloc; |
| 418 | rperr_alloc = NULL; |
| 419 | aer_error_init(rperr, pci_domain_nr(rpdev->bus), |
| 420 | rpdev->bus->number, rpdev->devfn, |
| 421 | rp_pos_cap_err); |
| 422 | list_add(&rperr->list, &einjected); |
| 423 | } |
| 424 | if (einj->cor_status) { |
| 425 | if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) |
| 426 | rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; |
| 427 | else |
| 428 | rperr->root_status |= PCI_ERR_ROOT_COR_RCV; |
| 429 | rperr->source_id &= 0xffff0000; |
| 430 | rperr->source_id |= (einj->bus << 8) | devfn; |
| 431 | } |
| 432 | if (einj->uncor_status) { |
| 433 | if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) |
| 434 | rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; |
| 435 | if (sever & einj->uncor_status) { |
| 436 | rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; |
| 437 | if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) |
| 438 | rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; |
| 439 | } else |
| 440 | rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; |
| 441 | rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; |
| 442 | rperr->source_id &= 0x0000ffff; |
| 443 | rperr->source_id |= ((einj->bus << 8) | devfn) << 16; |
| 444 | } |
| 445 | spin_unlock_irqrestore(&inject_lock, flags); |
| 446 | |
| 447 | if (aer_mask_override) { |
| 448 | pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, |
| 449 | cor_mask_orig); |
| 450 | pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, |
| 451 | uncor_mask_orig); |
| 452 | } |
| 453 | |
| 454 | ret = pci_bus_set_aer_ops(dev->bus); |
| 455 | if (ret) |
| 456 | goto out_put; |
| 457 | ret = pci_bus_set_aer_ops(rpdev->bus); |
| 458 | if (ret) |
| 459 | goto out_put; |
| 460 | |
| 461 | device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER); |
| 462 | if (device) { |
| 463 | edev = to_pcie_device(device); |
| 464 | if (!get_service_data(edev)) { |
| 465 | pci_warn(edev->port, "AER service is not initialized\n"); |
| 466 | ret = -EPROTONOSUPPORT; |
| 467 | goto out_put; |
| 468 | } |
| 469 | pci_info(edev->port, "Injecting errors %08x/%08x into device %s\n", |
| 470 | einj->cor_status, einj->uncor_status, pci_name(dev)); |
| 471 | local_irq_disable(); |
| 472 | generic_handle_irq(edev->irq); |
| 473 | local_irq_enable(); |
| 474 | } else { |
| 475 | pci_err(rpdev, "AER device not found\n"); |
| 476 | ret = -ENODEV; |
| 477 | } |
| 478 | out_put: |
| 479 | kfree(err_alloc); |
| 480 | kfree(rperr_alloc); |
| 481 | pci_dev_put(dev); |
| 482 | return ret; |
| 483 | } |
| 484 | |
| 485 | static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, |
| 486 | size_t usize, loff_t *off) |
| 487 | { |
| 488 | struct aer_error_inj einj; |
| 489 | int ret; |
| 490 | |
| 491 | if (!capable(CAP_SYS_ADMIN)) |
| 492 | return -EPERM; |
| 493 | if (usize < offsetof(struct aer_error_inj, domain) || |
| 494 | usize > sizeof(einj)) |
| 495 | return -EINVAL; |
| 496 | |
| 497 | memset(&einj, 0, sizeof(einj)); |
| 498 | if (copy_from_user(&einj, ubuf, usize)) |
| 499 | return -EFAULT; |
| 500 | |
| 501 | ret = aer_inject(&einj); |
| 502 | return ret ? ret : usize; |
| 503 | } |
| 504 | |
| 505 | static const struct file_operations aer_inject_fops = { |
| 506 | .write = aer_inject_write, |
| 507 | .owner = THIS_MODULE, |
| 508 | .llseek = noop_llseek, |
| 509 | }; |
| 510 | |
| 511 | static struct miscdevice aer_inject_device = { |
| 512 | .minor = MISC_DYNAMIC_MINOR, |
| 513 | .name = "aer_inject", |
| 514 | .fops = &aer_inject_fops, |
| 515 | }; |
| 516 | |
| 517 | static int __init aer_inject_init(void) |
| 518 | { |
| 519 | return misc_register(&aer_inject_device); |
| 520 | } |
| 521 | |
| 522 | static void __exit aer_inject_exit(void) |
| 523 | { |
| 524 | struct aer_error *err, *err_next; |
| 525 | unsigned long flags; |
| 526 | struct pci_bus_ops *bus_ops; |
| 527 | |
| 528 | misc_deregister(&aer_inject_device); |
| 529 | |
| 530 | while ((bus_ops = pci_bus_ops_pop())) { |
| 531 | pci_bus_set_ops(bus_ops->bus, bus_ops->ops); |
| 532 | kfree(bus_ops); |
| 533 | } |
| 534 | |
| 535 | spin_lock_irqsave(&inject_lock, flags); |
| 536 | list_for_each_entry_safe(err, err_next, &einjected, list) { |
| 537 | list_del(&err->list); |
| 538 | kfree(err); |
| 539 | } |
| 540 | spin_unlock_irqrestore(&inject_lock, flags); |
| 541 | } |
| 542 | |
| 543 | module_init(aer_inject_init); |
| 544 | module_exit(aer_inject_exit); |
| 545 | |
| 546 | MODULE_DESCRIPTION("PCIe AER software error injector"); |
| 547 | MODULE_LICENSE("GPL"); |