blob: d6c1b10f6c2542a8cfbbac6dae31246cd35134f7 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * NFIT - Machine Check Handler
3 *
4 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15#include <linux/notifier.h>
16#include <linux/acpi.h>
17#include <linux/nd.h>
18#include <asm/mce.h>
19#include "nfit.h"
20
21static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
22 void *data)
23{
24 struct mce *mce = (struct mce *)data;
25 struct acpi_nfit_desc *acpi_desc;
26 struct nfit_spa *nfit_spa;
27
28 /* We only care about uncorrectable memory errors */
29 if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
30 return NOTIFY_DONE;
31
32 /* Verify the address reported in the MCE is valid. */
33 if (!mce_usable_address(mce))
34 return NOTIFY_DONE;
35
36 /*
37 * mce->addr contains the physical addr accessed that caused the
38 * machine check. We need to walk through the list of NFITs, and see
39 * if any of them matches that address, and only then start a scrub.
40 */
41 mutex_lock(&acpi_desc_lock);
42 list_for_each_entry(acpi_desc, &acpi_descs, list) {
43 struct device *dev = acpi_desc->dev;
44 int found_match = 0;
45
46 mutex_lock(&acpi_desc->init_mutex);
47 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
48 struct acpi_nfit_system_address *spa = nfit_spa->spa;
49
50 if (nfit_spa_type(spa) != NFIT_SPA_PM)
51 continue;
52 /* find the spa that covers the mce addr */
53 if (spa->address > mce->addr)
54 continue;
55 if ((spa->address + spa->length - 1) < mce->addr)
56 continue;
57 found_match = 1;
58 dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
59 spa->range_index, spa->address, spa->length);
60 /*
61 * We can break at the first match because we're going
62 * to rescan all the SPA ranges. There shouldn't be any
63 * aliasing anyway.
64 */
65 break;
66 }
67 mutex_unlock(&acpi_desc->init_mutex);
68
69 if (!found_match)
70 continue;
71
72 /* If this fails due to an -ENOMEM, there is little we can do */
73 nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
74 ALIGN(mce->addr, L1_CACHE_BYTES),
75 L1_CACHE_BYTES);
76 nvdimm_region_notify(nfit_spa->nd_region,
77 NVDIMM_REVALIDATE_POISON);
78
79 if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
80 /*
81 * We can ignore an -EBUSY here because if an ARS is
82 * already in progress, just let that be the last
83 * authoritative one
84 */
85 acpi_nfit_ars_rescan(acpi_desc, 0);
86 }
87 break;
88 }
89
90 mutex_unlock(&acpi_desc_lock);
91 return NOTIFY_DONE;
92}
93
94static struct notifier_block nfit_mce_dec = {
95 .notifier_call = nfit_handle_mce,
96 .priority = MCE_PRIO_NFIT,
97};
98
99void nfit_mce_register(void)
100{
101 mce_register_decode_chain(&nfit_mce_dec);
102}
103
104void nfit_mce_unregister(void)
105{
106 mce_unregister_decode_chain(&nfit_mce_dec);
107}