b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * NFIT - Machine Check Handler |
| 4 | * |
| 5 | * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. |
| 6 | */ |
| 7 | #include <linux/notifier.h> |
| 8 | #include <linux/acpi.h> |
| 9 | #include <linux/nd.h> |
| 10 | #include <asm/mce.h> |
| 11 | #include "nfit.h" |
| 12 | |
| 13 | static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, |
| 14 | void *data) |
| 15 | { |
| 16 | struct mce *mce = (struct mce *)data; |
| 17 | struct acpi_nfit_desc *acpi_desc; |
| 18 | struct nfit_spa *nfit_spa; |
| 19 | |
| 20 | /* We only care about uncorrectable memory errors */ |
| 21 | if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) |
| 22 | return NOTIFY_DONE; |
| 23 | |
| 24 | /* Verify the address reported in the MCE is valid. */ |
| 25 | if (!mce_usable_address(mce)) |
| 26 | return NOTIFY_DONE; |
| 27 | |
| 28 | /* |
| 29 | * mce->addr contains the physical addr accessed that caused the |
| 30 | * machine check. We need to walk through the list of NFITs, and see |
| 31 | * if any of them matches that address, and only then start a scrub. |
| 32 | */ |
| 33 | mutex_lock(&acpi_desc_lock); |
| 34 | list_for_each_entry(acpi_desc, &acpi_descs, list) { |
| 35 | struct device *dev = acpi_desc->dev; |
| 36 | int found_match = 0; |
| 37 | |
| 38 | mutex_lock(&acpi_desc->init_mutex); |
| 39 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { |
| 40 | struct acpi_nfit_system_address *spa = nfit_spa->spa; |
| 41 | |
| 42 | if (nfit_spa_type(spa) != NFIT_SPA_PM) |
| 43 | continue; |
| 44 | /* find the spa that covers the mce addr */ |
| 45 | if (spa->address > mce->addr) |
| 46 | continue; |
| 47 | if ((spa->address + spa->length - 1) < mce->addr) |
| 48 | continue; |
| 49 | found_match = 1; |
| 50 | dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n", |
| 51 | spa->range_index, spa->address, spa->length); |
| 52 | /* |
| 53 | * We can break at the first match because we're going |
| 54 | * to rescan all the SPA ranges. There shouldn't be any |
| 55 | * aliasing anyway. |
| 56 | */ |
| 57 | break; |
| 58 | } |
| 59 | mutex_unlock(&acpi_desc->init_mutex); |
| 60 | |
| 61 | if (!found_match) |
| 62 | continue; |
| 63 | |
| 64 | /* If this fails due to an -ENOMEM, there is little we can do */ |
| 65 | nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, |
| 66 | ALIGN(mce->addr, L1_CACHE_BYTES), |
| 67 | L1_CACHE_BYTES); |
| 68 | nvdimm_region_notify(nfit_spa->nd_region, |
| 69 | NVDIMM_REVALIDATE_POISON); |
| 70 | |
| 71 | if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { |
| 72 | /* |
| 73 | * We can ignore an -EBUSY here because if an ARS is |
| 74 | * already in progress, just let that be the last |
| 75 | * authoritative one |
| 76 | */ |
| 77 | acpi_nfit_ars_rescan(acpi_desc, 0); |
| 78 | } |
| 79 | break; |
| 80 | } |
| 81 | |
| 82 | mutex_unlock(&acpi_desc_lock); |
| 83 | return NOTIFY_DONE; |
| 84 | } |
| 85 | |
| 86 | static struct notifier_block nfit_mce_dec = { |
| 87 | .notifier_call = nfit_handle_mce, |
| 88 | .priority = MCE_PRIO_NFIT, |
| 89 | }; |
| 90 | |
| 91 | void nfit_mce_register(void) |
| 92 | { |
| 93 | mce_register_decode_chain(&nfit_mce_dec); |
| 94 | } |
| 95 | |
| 96 | void nfit_mce_unregister(void) |
| 97 | { |
| 98 | mce_unregister_decode_chain(&nfit_mce_dec); |
| 99 | } |