blob: ac4a5015c146b4f31877b15710fc6a54944710c7 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2#include "amd64_edac.h"
3#include <asm/amd_nb.h>
4
5static struct edac_pci_ctl_info *pci_ctl;
6
7static int report_gart_errors;
8module_param(report_gart_errors, int, 0644);
9
10/*
11 * Set by command line parameter. If BIOS has enabled the ECC, this override is
12 * cleared to prevent re-enabling the hardware by this driver.
13 */
14static int ecc_enable_override;
15module_param(ecc_enable_override, int, 0644);
16
17static struct msr __percpu *msrs;
18
19/* Per-node stuff */
20static struct ecc_settings **ecc_stngs;
21
22/* Number of Unified Memory Controllers */
23static u8 num_umcs;
24
25/* Device for the PCI component */
26static struct device *pci_ctl_dev;
27
28/*
29 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
30 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
31 * or higher value'.
32 *
33 *FIXME: Produce a better mapping/linearisation.
34 */
35static const struct scrubrate {
36 u32 scrubval; /* bit pattern for scrub rate */
37 u32 bandwidth; /* bandwidth consumed (bytes/sec) */
38} scrubrates[] = {
39 { 0x01, 1600000000UL},
40 { 0x02, 800000000UL},
41 { 0x03, 400000000UL},
42 { 0x04, 200000000UL},
43 { 0x05, 100000000UL},
44 { 0x06, 50000000UL},
45 { 0x07, 25000000UL},
46 { 0x08, 12284069UL},
47 { 0x09, 6274509UL},
48 { 0x0A, 3121951UL},
49 { 0x0B, 1560975UL},
50 { 0x0C, 781440UL},
51 { 0x0D, 390720UL},
52 { 0x0E, 195300UL},
53 { 0x0F, 97650UL},
54 { 0x10, 48854UL},
55 { 0x11, 24427UL},
56 { 0x12, 12213UL},
57 { 0x13, 6101UL},
58 { 0x14, 3051UL},
59 { 0x15, 1523UL},
60 { 0x16, 761UL},
61 { 0x00, 0UL}, /* scrubbing off */
62};
63
64int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
65 u32 *val, const char *func)
66{
67 int err = 0;
68
69 err = pci_read_config_dword(pdev, offset, val);
70 if (err)
71 amd64_warn("%s: error reading F%dx%03x.\n",
72 func, PCI_FUNC(pdev->devfn), offset);
73
74 return err;
75}
76
77int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
78 u32 val, const char *func)
79{
80 int err = 0;
81
82 err = pci_write_config_dword(pdev, offset, val);
83 if (err)
84 amd64_warn("%s: error writing to F%dx%03x.\n",
85 func, PCI_FUNC(pdev->devfn), offset);
86
87 return err;
88}
89
90/*
91 * Select DCT to which PCI cfg accesses are routed
92 */
93static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
94{
95 u32 reg = 0;
96
97 amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
98 reg &= (pvt->model == 0x30) ? ~3 : ~1;
99 reg |= dct;
100 amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
101}
102
103/*
104 *
105 * Depending on the family, F2 DCT reads need special handling:
106 *
107 * K8: has a single DCT only and no address offsets >= 0x100
108 *
109 * F10h: each DCT has its own set of regs
110 * DCT0 -> F2x040..
111 * DCT1 -> F2x140..
112 *
113 * F16h: has only 1 DCT
114 *
115 * F15h: we select which DCT we access using F1x10C[DctCfgSel]
116 */
117static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct,
118 int offset, u32 *val)
119{
120 switch (pvt->fam) {
121 case 0xf:
122 if (dct || offset >= 0x100)
123 return -EINVAL;
124 break;
125
126 case 0x10:
127 if (dct) {
128 /*
129 * Note: If ganging is enabled, barring the regs
130 * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx
131 * return 0. (cf. Section 2.8.1 F10h BKDG)
132 */
133 if (dct_ganging_enabled(pvt))
134 return 0;
135
136 offset += 0x100;
137 }
138 break;
139
140 case 0x15:
141 /*
142 * F15h: F2x1xx addresses do not map explicitly to DCT1.
143 * We should select which DCT we access using F1x10C[DctCfgSel]
144 */
145 dct = (dct && pvt->model == 0x30) ? 3 : dct;
146 f15h_select_dct(pvt, dct);
147 break;
148
149 case 0x16:
150 if (dct)
151 return -EINVAL;
152 break;
153
154 default:
155 break;
156 }
157 return amd64_read_pci_cfg(pvt->F2, offset, val);
158}
159
160/*
161 * Memory scrubber control interface. For K8, memory scrubbing is handled by
162 * hardware and can involve L2 cache, dcache as well as the main memory. With
163 * F10, this is extended to L3 cache scrubbing on CPU models sporting that
164 * functionality.
165 *
166 * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
167 * (dram) over to cache lines. This is nasty, so we will use bandwidth in
168 * bytes/sec for the setting.
169 *
170 * Currently, we only do dram scrubbing. If the scrubbing is done in software on
171 * other archs, we might not have access to the caches directly.
172 */
173
174static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval)
175{
176 /*
177 * Fam17h supports scrub values between 0x5 and 0x14. Also, the values
178 * are shifted down by 0x5, so scrubval 0x5 is written to the register
179 * as 0x0, scrubval 0x6 as 0x1, etc.
180 */
181 if (scrubval >= 0x5 && scrubval <= 0x14) {
182 scrubval -= 0x5;
183 pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF);
184 pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1);
185 } else {
186 pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1);
187 }
188}
189/*
190 * Scan the scrub rate mapping table for a close or matching bandwidth value to
191 * issue. If requested is too big, then use last maximum value found.
192 */
193static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
194{
195 u32 scrubval;
196 int i;
197
198 /*
199 * map the configured rate (new_bw) to a value specific to the AMD64
200 * memory controller and apply to register. Search for the first
201 * bandwidth entry that is greater or equal than the setting requested
202 * and program that. If at last entry, turn off DRAM scrubbing.
203 *
204 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
205 * by falling back to the last element in scrubrates[].
206 */
207 for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
208 /*
209 * skip scrub rates which aren't recommended
210 * (see F10 BKDG, F3x58)
211 */
212 if (scrubrates[i].scrubval < min_rate)
213 continue;
214
215 if (scrubrates[i].bandwidth <= new_bw)
216 break;
217 }
218
219 scrubval = scrubrates[i].scrubval;
220
221 if (pvt->fam == 0x17 || pvt->fam == 0x18) {
222 __f17h_set_scrubval(pvt, scrubval);
223 } else if (pvt->fam == 0x15 && pvt->model == 0x60) {
224 f15h_select_dct(pvt, 0);
225 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
226 f15h_select_dct(pvt, 1);
227 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
228 } else {
229 pci_write_bits32(pvt->F3, SCRCTRL, scrubval, 0x001F);
230 }
231
232 if (scrubval)
233 return scrubrates[i].bandwidth;
234
235 return 0;
236}
237
238static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
239{
240 struct amd64_pvt *pvt = mci->pvt_info;
241 u32 min_scrubrate = 0x5;
242
243 if (pvt->fam == 0xf)
244 min_scrubrate = 0x0;
245
246 if (pvt->fam == 0x15) {
247 /* Erratum #505 */
248 if (pvt->model < 0x10)
249 f15h_select_dct(pvt, 0);
250
251 if (pvt->model == 0x60)
252 min_scrubrate = 0x6;
253 }
254 return __set_scrub_rate(pvt, bw, min_scrubrate);
255}
256
257static int get_scrub_rate(struct mem_ctl_info *mci)
258{
259 struct amd64_pvt *pvt = mci->pvt_info;
260 int i, retval = -EINVAL;
261 u32 scrubval = 0;
262
263 switch (pvt->fam) {
264 case 0x15:
265 /* Erratum #505 */
266 if (pvt->model < 0x10)
267 f15h_select_dct(pvt, 0);
268
269 if (pvt->model == 0x60)
270 amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
271 else
272 amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
273 break;
274
275 case 0x17:
276 case 0x18:
277 amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
278 if (scrubval & BIT(0)) {
279 amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
280 scrubval &= 0xF;
281 scrubval += 0x5;
282 } else {
283 scrubval = 0;
284 }
285 break;
286
287 default:
288 amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
289 break;
290 }
291
292 scrubval = scrubval & 0x001F;
293
294 for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
295 if (scrubrates[i].scrubval == scrubval) {
296 retval = scrubrates[i].bandwidth;
297 break;
298 }
299 }
300 return retval;
301}
302
303/*
304 * returns true if the SysAddr given by sys_addr matches the
305 * DRAM base/limit associated with node_id
306 */
307static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid)
308{
309 u64 addr;
310
311 /* The K8 treats this as a 40-bit value. However, bits 63-40 will be
312 * all ones if the most significant implemented address bit is 1.
313 * Here we discard bits 63-40. See section 3.4.2 of AMD publication
314 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
315 * Application Programming.
316 */
317 addr = sys_addr & 0x000000ffffffffffull;
318
319 return ((addr >= get_dram_base(pvt, nid)) &&
320 (addr <= get_dram_limit(pvt, nid)));
321}
322
323/*
324 * Attempt to map a SysAddr to a node. On success, return a pointer to the
325 * mem_ctl_info structure for the node that the SysAddr maps to.
326 *
327 * On failure, return NULL.
328 */
329static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
330 u64 sys_addr)
331{
332 struct amd64_pvt *pvt;
333 u8 node_id;
334 u32 intlv_en, bits;
335
336 /*
337 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
338 * 3.4.4.2) registers to map the SysAddr to a node ID.
339 */
340 pvt = mci->pvt_info;
341
342 /*
343 * The value of this field should be the same for all DRAM Base
344 * registers. Therefore we arbitrarily choose to read it from the
345 * register for node 0.
346 */
347 intlv_en = dram_intlv_en(pvt, 0);
348
349 if (intlv_en == 0) {
350 for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
351 if (base_limit_match(pvt, sys_addr, node_id))
352 goto found;
353 }
354 goto err_no_match;
355 }
356
357 if (unlikely((intlv_en != 0x01) &&
358 (intlv_en != 0x03) &&
359 (intlv_en != 0x07))) {
360 amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
361 return NULL;
362 }
363
364 bits = (((u32) sys_addr) >> 12) & intlv_en;
365
366 for (node_id = 0; ; ) {
367 if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
368 break; /* intlv_sel field matches */
369
370 if (++node_id >= DRAM_RANGES)
371 goto err_no_match;
372 }
373
374 /* sanity test for sys_addr */
375 if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
376 amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
377 "range for node %d with node interleaving enabled.\n",
378 __func__, sys_addr, node_id);
379 return NULL;
380 }
381
382found:
383 return edac_mc_find((int)node_id);
384
385err_no_match:
386 edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
387 (unsigned long)sys_addr);
388
389 return NULL;
390}
391
392/*
393 * compute the CS base address of the @csrow on the DRAM controller @dct.
394 * For details see F2x[5C:40] in the processor's BKDG
395 */
396static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
397 u64 *base, u64 *mask)
398{
399 u64 csbase, csmask, base_bits, mask_bits;
400 u8 addr_shift;
401
402 if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
403 csbase = pvt->csels[dct].csbases[csrow];
404 csmask = pvt->csels[dct].csmasks[csrow];
405 base_bits = GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
406 mask_bits = GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
407 addr_shift = 4;
408
409 /*
410 * F16h and F15h, models 30h and later need two addr_shift values:
411 * 8 for high and 6 for low (cf. F16h BKDG).
412 */
413 } else if (pvt->fam == 0x16 ||
414 (pvt->fam == 0x15 && pvt->model >= 0x30)) {
415 csbase = pvt->csels[dct].csbases[csrow];
416 csmask = pvt->csels[dct].csmasks[csrow >> 1];
417
418 *base = (csbase & GENMASK_ULL(15, 5)) << 6;
419 *base |= (csbase & GENMASK_ULL(30, 19)) << 8;
420
421 *mask = ~0ULL;
422 /* poke holes for the csmask */
423 *mask &= ~((GENMASK_ULL(15, 5) << 6) |
424 (GENMASK_ULL(30, 19) << 8));
425
426 *mask |= (csmask & GENMASK_ULL(15, 5)) << 6;
427 *mask |= (csmask & GENMASK_ULL(30, 19)) << 8;
428
429 return;
430 } else {
431 csbase = pvt->csels[dct].csbases[csrow];
432 csmask = pvt->csels[dct].csmasks[csrow >> 1];
433 addr_shift = 8;
434
435 if (pvt->fam == 0x15)
436 base_bits = mask_bits =
437 GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
438 else
439 base_bits = mask_bits =
440 GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
441 }
442
443 *base = (csbase & base_bits) << addr_shift;
444
445 *mask = ~0ULL;
446 /* poke holes for the csmask */
447 *mask &= ~(mask_bits << addr_shift);
448 /* OR them in */
449 *mask |= (csmask & mask_bits) << addr_shift;
450}
451
452#define for_each_chip_select(i, dct, pvt) \
453 for (i = 0; i < pvt->csels[dct].b_cnt; i++)
454
455#define chip_select_base(i, dct, pvt) \
456 pvt->csels[dct].csbases[i]
457
458#define for_each_chip_select_mask(i, dct, pvt) \
459 for (i = 0; i < pvt->csels[dct].m_cnt; i++)
460
461#define for_each_umc(i) \
462 for (i = 0; i < num_umcs; i++)
463
464/*
465 * @input_addr is an InputAddr associated with the node given by mci. Return the
466 * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
467 */
468static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
469{
470 struct amd64_pvt *pvt;
471 int csrow;
472 u64 base, mask;
473
474 pvt = mci->pvt_info;
475
476 for_each_chip_select(csrow, 0, pvt) {
477 if (!csrow_enabled(csrow, 0, pvt))
478 continue;
479
480 get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
481
482 mask = ~mask;
483
484 if ((input_addr & mask) == (base & mask)) {
485 edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
486 (unsigned long)input_addr, csrow,
487 pvt->mc_node_id);
488
489 return csrow;
490 }
491 }
492 edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
493 (unsigned long)input_addr, pvt->mc_node_id);
494
495 return -1;
496}
497
498/*
499 * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
500 * for the node represented by mci. Info is passed back in *hole_base,
501 * *hole_offset, and *hole_size. Function returns 0 if info is valid or 1 if
502 * info is invalid. Info may be invalid for either of the following reasons:
503 *
504 * - The revision of the node is not E or greater. In this case, the DRAM Hole
505 * Address Register does not exist.
506 *
507 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
508 * indicating that its contents are not valid.
509 *
510 * The values passed back in *hole_base, *hole_offset, and *hole_size are
511 * complete 32-bit values despite the fact that the bitfields in the DHAR
512 * only represent bits 31-24 of the base and offset values.
513 */
514int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
515 u64 *hole_offset, u64 *hole_size)
516{
517 struct amd64_pvt *pvt = mci->pvt_info;
518
519 /* only revE and later have the DRAM Hole Address Register */
520 if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) {
521 edac_dbg(1, " revision %d for node %d does not support DHAR\n",
522 pvt->ext_model, pvt->mc_node_id);
523 return 1;
524 }
525
526 /* valid for Fam10h and above */
527 if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
528 edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n");
529 return 1;
530 }
531
532 if (!dhar_valid(pvt)) {
533 edac_dbg(1, " Dram Memory Hoisting is DISABLED on this node %d\n",
534 pvt->mc_node_id);
535 return 1;
536 }
537
538 /* This node has Memory Hoisting */
539
540 /* +------------------+--------------------+--------------------+-----
541 * | memory | DRAM hole | relocated |
542 * | [0, (x - 1)] | [x, 0xffffffff] | addresses from |
543 * | | | DRAM hole |
544 * | | | [0x100000000, |
545 * | | | (0x100000000+ |
546 * | | | (0xffffffff-x))] |
547 * +------------------+--------------------+--------------------+-----
548 *
549 * Above is a diagram of physical memory showing the DRAM hole and the
550 * relocated addresses from the DRAM hole. As shown, the DRAM hole
551 * starts at address x (the base address) and extends through address
552 * 0xffffffff. The DRAM Hole Address Register (DHAR) relocates the
553 * addresses in the hole so that they start at 0x100000000.
554 */
555
556 *hole_base = dhar_base(pvt);
557 *hole_size = (1ULL << 32) - *hole_base;
558
559 *hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt)
560 : k8_dhar_offset(pvt);
561
562 edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
563 pvt->mc_node_id, (unsigned long)*hole_base,
564 (unsigned long)*hole_offset, (unsigned long)*hole_size);
565
566 return 0;
567}
568EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
569
570/*
571 * Return the DramAddr that the SysAddr given by @sys_addr maps to. It is
572 * assumed that sys_addr maps to the node given by mci.
573 *
574 * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
575 * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
576 * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
577 * then it is also involved in translating a SysAddr to a DramAddr. Sections
578 * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
579 * These parts of the documentation are unclear. I interpret them as follows:
580 *
581 * When node n receives a SysAddr, it processes the SysAddr as follows:
582 *
583 * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
584 * Limit registers for node n. If the SysAddr is not within the range
585 * specified by the base and limit values, then node n ignores the Sysaddr
586 * (since it does not map to node n). Otherwise continue to step 2 below.
587 *
588 * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
589 * disabled so skip to step 3 below. Otherwise see if the SysAddr is within
590 * the range of relocated addresses (starting at 0x100000000) from the DRAM
591 * hole. If not, skip to step 3 below. Else get the value of the
592 * DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
593 * offset defined by this value from the SysAddr.
594 *
595 * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
596 * Base register for node n. To obtain the DramAddr, subtract the base
597 * address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
598 */
599static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
600{
601 struct amd64_pvt *pvt = mci->pvt_info;
602 u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
603 int ret;
604
605 dram_base = get_dram_base(pvt, pvt->mc_node_id);
606
607 ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
608 &hole_size);
609 if (!ret) {
610 if ((sys_addr >= (1ULL << 32)) &&
611 (sys_addr < ((1ULL << 32) + hole_size))) {
612 /* use DHAR to translate SysAddr to DramAddr */
613 dram_addr = sys_addr - hole_offset;
614
615 edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
616 (unsigned long)sys_addr,
617 (unsigned long)dram_addr);
618
619 return dram_addr;
620 }
621 }
622
623 /*
624 * Translate the SysAddr to a DramAddr as shown near the start of
625 * section 3.4.4 (p. 70). Although sys_addr is a 64-bit value, the k8
626 * only deals with 40-bit values. Therefore we discard bits 63-40 of
627 * sys_addr below. If bit 39 of sys_addr is 1 then the bits we
628 * discard are all 1s. Otherwise the bits we discard are all 0s. See
629 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
630 * Programmer's Manual Volume 1 Application Programming.
631 */
632 dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
633
634 edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
635 (unsigned long)sys_addr, (unsigned long)dram_addr);
636 return dram_addr;
637}
638
639/*
640 * @intlv_en is the value of the IntlvEn field from a DRAM Base register
641 * (section 3.4.4.1). Return the number of bits from a SysAddr that are used
642 * for node interleaving.
643 */
644static int num_node_interleave_bits(unsigned intlv_en)
645{
646 static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
647 int n;
648
649 BUG_ON(intlv_en > 7);
650 n = intlv_shift_table[intlv_en];
651 return n;
652}
653
654/* Translate the DramAddr given by @dram_addr to an InputAddr. */
655static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
656{
657 struct amd64_pvt *pvt;
658 int intlv_shift;
659 u64 input_addr;
660
661 pvt = mci->pvt_info;
662
663 /*
664 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
665 * concerning translating a DramAddr to an InputAddr.
666 */
667 intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
668 input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
669 (dram_addr & 0xfff);
670
671 edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
672 intlv_shift, (unsigned long)dram_addr,
673 (unsigned long)input_addr);
674
675 return input_addr;
676}
677
678/*
679 * Translate the SysAddr represented by @sys_addr to an InputAddr. It is
680 * assumed that @sys_addr maps to the node given by mci.
681 */
682static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
683{
684 u64 input_addr;
685
686 input_addr =
687 dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
688
689 edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n",
690 (unsigned long)sys_addr, (unsigned long)input_addr);
691
692 return input_addr;
693}
694
695/* Map the Error address to a PAGE and PAGE OFFSET. */
696static inline void error_address_to_page_and_offset(u64 error_address,
697 struct err_info *err)
698{
699 err->page = (u32) (error_address >> PAGE_SHIFT);
700 err->offset = ((u32) error_address) & ~PAGE_MASK;
701}
702
703/*
704 * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
705 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
706 * of a node that detected an ECC memory error. mci represents the node that
707 * the error address maps to (possibly different from the node that detected
708 * the error). Return the number of the csrow that sys_addr maps to, or -1 on
709 * error.
710 */
711static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
712{
713 int csrow;
714
715 csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
716
717 if (csrow == -1)
718 amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
719 "address 0x%lx\n", (unsigned long)sys_addr);
720 return csrow;
721}
722
723static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
724
725/*
726 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
727 * are ECC capable.
728 */
729static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
730{
731 unsigned long edac_cap = EDAC_FLAG_NONE;
732 u8 bit;
733
734 if (pvt->umc) {
735 u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
736
737 for_each_umc(i) {
738 if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
739 continue;
740
741 umc_en_mask |= BIT(i);
742
743 /* UMC Configuration bit 12 (DimmEccEn) */
744 if (pvt->umc[i].umc_cfg & BIT(12))
745 dimm_ecc_en_mask |= BIT(i);
746 }
747
748 if (umc_en_mask == dimm_ecc_en_mask)
749 edac_cap = EDAC_FLAG_SECDED;
750 } else {
751 bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
752 ? 19
753 : 17;
754
755 if (pvt->dclr0 & BIT(bit))
756 edac_cap = EDAC_FLAG_SECDED;
757 }
758
759 return edac_cap;
760}
761
762static void debug_display_dimm_sizes(struct amd64_pvt *, u8);
763
764static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
765{
766 edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
767
768 if (pvt->dram_type == MEM_LRDDR3) {
769 u32 dcsm = pvt->csels[chan].csmasks[0];
770 /*
771 * It's assumed all LRDIMMs in a DCT are going to be of
772 * same 'type' until proven otherwise. So, use a cs
773 * value of '0' here to get dcsm value.
774 */
775 edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
776 }
777
778 edac_dbg(1, "All DIMMs support ECC:%s\n",
779 (dclr & BIT(19)) ? "yes" : "no");
780
781
782 edac_dbg(1, " PAR/ERR parity: %s\n",
783 (dclr & BIT(8)) ? "enabled" : "disabled");
784
785 if (pvt->fam == 0x10)
786 edac_dbg(1, " DCT 128bit mode width: %s\n",
787 (dclr & BIT(11)) ? "128b" : "64b");
788
789 edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
790 (dclr & BIT(12)) ? "yes" : "no",
791 (dclr & BIT(13)) ? "yes" : "no",
792 (dclr & BIT(14)) ? "yes" : "no",
793 (dclr & BIT(15)) ? "yes" : "no");
794}
795
796#define CS_EVEN_PRIMARY BIT(0)
797#define CS_ODD_PRIMARY BIT(1)
798#define CS_EVEN_SECONDARY BIT(2)
799#define CS_ODD_SECONDARY BIT(3)
800#define CS_3R_INTERLEAVE BIT(4)
801
802#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY)
803#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY)
804
805static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
806{
807 u8 base, count = 0;
808 int cs_mode = 0;
809
810 if (csrow_enabled(2 * dimm, ctrl, pvt))
811 cs_mode |= CS_EVEN_PRIMARY;
812
813 if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
814 cs_mode |= CS_ODD_PRIMARY;
815
816 /* Asymmetric dual-rank DIMM support. */
817 if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
818 cs_mode |= CS_ODD_SECONDARY;
819
820 /*
821 * 3 Rank inteleaving support.
822 * There should be only three bases enabled and their two masks should
823 * be equal.
824 */
825 for_each_chip_select(base, ctrl, pvt)
826 count += csrow_enabled(base, ctrl, pvt);
827
828 if (count == 3 &&
829 pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) {
830 edac_dbg(1, "3R interleaving in use.\n");
831 cs_mode |= CS_3R_INTERLEAVE;
832 }
833
834 return cs_mode;
835}
836
837static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
838{
839 int dimm, size0, size1, cs0, cs1, cs_mode;
840
841 edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
842
843 for (dimm = 0; dimm < 2; dimm++) {
844 cs0 = dimm * 2;
845 cs1 = dimm * 2 + 1;
846
847 cs_mode = f17_get_cs_mode(dimm, ctrl, pvt);
848
849 size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0);
850 size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1);
851
852 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
853 cs0, size0,
854 cs1, size1);
855 }
856}
857
858static void __dump_misc_regs_df(struct amd64_pvt *pvt)
859{
860 struct amd64_umc *umc;
861 u32 i, tmp, umc_base;
862
863 for_each_umc(i) {
864 umc_base = get_umc_base(i);
865 umc = &pvt->umc[i];
866
867 edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg);
868 edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
869 edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
870 edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
871
872 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp);
873 edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp);
874
875 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_UMC_CAP, &tmp);
876 edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i, tmp);
877 edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi);
878
879 edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
880 i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no",
881 (umc->umc_cap_hi & BIT(31)) ? "yes" : "no");
882 edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
883 i, (umc->umc_cfg & BIT(12)) ? "yes" : "no");
884 edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
885 i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no");
886 edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
887 i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
888
889 if (pvt->dram_type == MEM_LRDDR4) {
890 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
891 edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
892 i, 1 << ((tmp >> 4) & 0x3));
893 }
894
895 debug_display_dimm_sizes_df(pvt, i);
896 }
897
898 edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n",
899 pvt->dhar, dhar_base(pvt));
900}
901
902/* Display and decode various NB registers for debug purposes. */
903static void __dump_misc_regs(struct amd64_pvt *pvt)
904{
905 edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
906
907 edac_dbg(1, " NB two channel DRAM capable: %s\n",
908 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
909
910 edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n",
911 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
912 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
913
914 debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
915
916 edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
917
918 edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
919 pvt->dhar, dhar_base(pvt),
920 (pvt->fam == 0xf) ? k8_dhar_offset(pvt)
921 : f10_dhar_offset(pvt));
922
923 debug_display_dimm_sizes(pvt, 0);
924
925 /* everything below this point is Fam10h and above */
926 if (pvt->fam == 0xf)
927 return;
928
929 debug_display_dimm_sizes(pvt, 1);
930
931 /* Only if NOT ganged does dclr1 have valid info */
932 if (!dct_ganging_enabled(pvt))
933 debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
934}
935
936/* Display and decode various NB registers for debug purposes. */
937static void dump_misc_regs(struct amd64_pvt *pvt)
938{
939 if (pvt->umc)
940 __dump_misc_regs_df(pvt);
941 else
942 __dump_misc_regs(pvt);
943
944 edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
945
946 amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
947}
948
949/*
950 * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
951 */
952static void prep_chip_selects(struct amd64_pvt *pvt)
953{
954 if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
955 pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
956 pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
957 } else if (pvt->fam == 0x15 && pvt->model == 0x30) {
958 pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
959 pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
960 } else if (pvt->fam >= 0x17) {
961 int umc;
962
963 for_each_umc(umc) {
964 pvt->csels[umc].b_cnt = 4;
965 pvt->csels[umc].m_cnt = 2;
966 }
967
968 } else {
969 pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
970 pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
971 }
972}
973
974static void read_umc_base_mask(struct amd64_pvt *pvt)
975{
976 u32 umc_base_reg, umc_base_reg_sec;
977 u32 umc_mask_reg, umc_mask_reg_sec;
978 u32 base_reg, base_reg_sec;
979 u32 mask_reg, mask_reg_sec;
980 u32 *base, *base_sec;
981 u32 *mask, *mask_sec;
982 int cs, umc;
983
984 for_each_umc(umc) {
985 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
986 umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC;
987
988 for_each_chip_select(cs, umc, pvt) {
989 base = &pvt->csels[umc].csbases[cs];
990 base_sec = &pvt->csels[umc].csbases_sec[cs];
991
992 base_reg = umc_base_reg + (cs * 4);
993 base_reg_sec = umc_base_reg_sec + (cs * 4);
994
995 if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
996 edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
997 umc, cs, *base, base_reg);
998
999 if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec))
1000 edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n",
1001 umc, cs, *base_sec, base_reg_sec);
1002 }
1003
1004 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
1005 umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
1006
1007 for_each_chip_select_mask(cs, umc, pvt) {
1008 mask = &pvt->csels[umc].csmasks[cs];
1009 mask_sec = &pvt->csels[umc].csmasks_sec[cs];
1010
1011 mask_reg = umc_mask_reg + (cs * 4);
1012 mask_reg_sec = umc_mask_reg_sec + (cs * 4);
1013
1014 if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
1015 edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
1016 umc, cs, *mask, mask_reg);
1017
1018 if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec))
1019 edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n",
1020 umc, cs, *mask_sec, mask_reg_sec);
1021 }
1022 }
1023}
1024
1025/*
1026 * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
1027 */
1028static void read_dct_base_mask(struct amd64_pvt *pvt)
1029{
1030 int cs;
1031
1032 prep_chip_selects(pvt);
1033
1034 if (pvt->umc)
1035 return read_umc_base_mask(pvt);
1036
1037 for_each_chip_select(cs, 0, pvt) {
1038 int reg0 = DCSB0 + (cs * 4);
1039 int reg1 = DCSB1 + (cs * 4);
1040 u32 *base0 = &pvt->csels[0].csbases[cs];
1041 u32 *base1 = &pvt->csels[1].csbases[cs];
1042
1043 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
1044 edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
1045 cs, *base0, reg0);
1046
1047 if (pvt->fam == 0xf)
1048 continue;
1049
1050 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
1051 edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
1052 cs, *base1, (pvt->fam == 0x10) ? reg1
1053 : reg0);
1054 }
1055
1056 for_each_chip_select_mask(cs, 0, pvt) {
1057 int reg0 = DCSM0 + (cs * 4);
1058 int reg1 = DCSM1 + (cs * 4);
1059 u32 *mask0 = &pvt->csels[0].csmasks[cs];
1060 u32 *mask1 = &pvt->csels[1].csmasks[cs];
1061
1062 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
1063 edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
1064 cs, *mask0, reg0);
1065
1066 if (pvt->fam == 0xf)
1067 continue;
1068
1069 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
1070 edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
1071 cs, *mask1, (pvt->fam == 0x10) ? reg1
1072 : reg0);
1073 }
1074}
1075
1076static void determine_memory_type(struct amd64_pvt *pvt)
1077{
1078 u32 dram_ctrl, dcsm;
1079
1080 switch (pvt->fam) {
1081 case 0xf:
1082 if (pvt->ext_model >= K8_REV_F)
1083 goto ddr3;
1084
1085 pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
1086 return;
1087
1088 case 0x10:
1089 if (pvt->dchr0 & DDR3_MODE)
1090 goto ddr3;
1091
1092 pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
1093 return;
1094
1095 case 0x15:
1096 if (pvt->model < 0x60)
1097 goto ddr3;
1098
1099 /*
1100 * Model 0x60h needs special handling:
1101 *
1102 * We use a Chip Select value of '0' to obtain dcsm.
1103 * Theoretically, it is possible to populate LRDIMMs of different
1104 * 'Rank' value on a DCT. But this is not the common case. So,
1105 * it's reasonable to assume all DIMMs are going to be of same
1106 * 'type' until proven otherwise.
1107 */
1108 amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl);
1109 dcsm = pvt->csels[0].csmasks[0];
1110
1111 if (((dram_ctrl >> 8) & 0x7) == 0x2)
1112 pvt->dram_type = MEM_DDR4;
1113 else if (pvt->dclr0 & BIT(16))
1114 pvt->dram_type = MEM_DDR3;
1115 else if (dcsm & 0x3)
1116 pvt->dram_type = MEM_LRDDR3;
1117 else
1118 pvt->dram_type = MEM_RDDR3;
1119
1120 return;
1121
1122 case 0x16:
1123 goto ddr3;
1124
1125 case 0x17:
1126 case 0x18:
1127 if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
1128 pvt->dram_type = MEM_LRDDR4;
1129 else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
1130 pvt->dram_type = MEM_RDDR4;
1131 else
1132 pvt->dram_type = MEM_DDR4;
1133 return;
1134
1135 default:
1136 WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
1137 pvt->dram_type = MEM_EMPTY;
1138 }
1139 return;
1140
1141ddr3:
1142 pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
1143}
1144
1145/* Get the number of DCT channels the memory controller is using. */
1146static int k8_early_channel_count(struct amd64_pvt *pvt)
1147{
1148 int flag;
1149
1150 if (pvt->ext_model >= K8_REV_F)
1151 /* RevF (NPT) and later */
1152 flag = pvt->dclr0 & WIDTH_128;
1153 else
1154 /* RevE and earlier */
1155 flag = pvt->dclr0 & REVE_WIDTH_128;
1156
1157 /* not used */
1158 pvt->dclr1 = 0;
1159
1160 return (flag) ? 2 : 1;
1161}
1162
1163/* On F10h and later ErrAddr is MC4_ADDR[47:1] */
1164static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
1165{
1166 u16 mce_nid = amd_get_nb_id(m->extcpu);
1167 struct mem_ctl_info *mci;
1168 u8 start_bit = 1;
1169 u8 end_bit = 47;
1170 u64 addr;
1171
1172 mci = edac_mc_find(mce_nid);
1173 if (!mci)
1174 return 0;
1175
1176 pvt = mci->pvt_info;
1177
1178 if (pvt->fam == 0xf) {
1179 start_bit = 3;
1180 end_bit = 39;
1181 }
1182
1183 addr = m->addr & GENMASK_ULL(end_bit, start_bit);
1184
1185 /*
1186 * Erratum 637 workaround
1187 */
1188 if (pvt->fam == 0x15) {
1189 u64 cc6_base, tmp_addr;
1190 u32 tmp;
1191 u8 intlv_en;
1192
1193 if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
1194 return addr;
1195
1196
1197 amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
1198 intlv_en = tmp >> 21 & 0x7;
1199
1200 /* add [47:27] + 3 trailing bits */
1201 cc6_base = (tmp & GENMASK_ULL(20, 0)) << 3;
1202
1203 /* reverse and add DramIntlvEn */
1204 cc6_base |= intlv_en ^ 0x7;
1205
1206 /* pin at [47:24] */
1207 cc6_base <<= 24;
1208
1209 if (!intlv_en)
1210 return cc6_base | (addr & GENMASK_ULL(23, 0));
1211
1212 amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
1213
1214 /* faster log2 */
1215 tmp_addr = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1);
1216
1217 /* OR DramIntlvSel into bits [14:12] */
1218 tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9;
1219
1220 /* add remaining [11:0] bits from original MC4_ADDR */
1221 tmp_addr |= addr & GENMASK_ULL(11, 0);
1222
1223 return cc6_base | tmp_addr;
1224 }
1225
1226 return addr;
1227}
1228
1229static struct pci_dev *pci_get_related_function(unsigned int vendor,
1230 unsigned int device,
1231 struct pci_dev *related)
1232{
1233 struct pci_dev *dev = NULL;
1234
1235 while ((dev = pci_get_device(vendor, device, dev))) {
1236 if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
1237 (dev->bus->number == related->bus->number) &&
1238 (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1239 break;
1240 }
1241
1242 return dev;
1243}
1244
1245static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
1246{
1247 struct amd_northbridge *nb;
1248 struct pci_dev *f1 = NULL;
1249 unsigned int pci_func;
1250 int off = range << 3;
1251 u32 llim;
1252
1253 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo);
1254 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
1255
1256 if (pvt->fam == 0xf)
1257 return;
1258
1259 if (!dram_rw(pvt, range))
1260 return;
1261
1262 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi);
1263 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
1264
1265 /* F15h: factor in CC6 save area by reading dst node's limit reg */
1266 if (pvt->fam != 0x15)
1267 return;
1268
1269 nb = node_to_amd_nb(dram_dst_node(pvt, range));
1270 if (WARN_ON(!nb))
1271 return;
1272
1273 if (pvt->model == 0x60)
1274 pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
1275 else if (pvt->model == 0x30)
1276 pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
1277 else
1278 pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1;
1279
1280 f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc);
1281 if (WARN_ON(!f1))
1282 return;
1283
1284 amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1285
1286 pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0);
1287
1288 /* {[39:27],111b} */
1289 pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1290
1291 pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0);
1292
1293 /* [47:40] */
1294 pvt->ranges[range].lim.hi |= llim >> 13;
1295
1296 pci_dev_put(f1);
1297}
1298
1299static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1300 struct err_info *err)
1301{
1302 struct amd64_pvt *pvt = mci->pvt_info;
1303
1304 error_address_to_page_and_offset(sys_addr, err);
1305
1306 /*
1307 * Find out which node the error address belongs to. This may be
1308 * different from the node that detected the error.
1309 */
1310 err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
1311 if (!err->src_mci) {
1312 amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1313 (unsigned long)sys_addr);
1314 err->err_code = ERR_NODE;
1315 return;
1316 }
1317
1318 /* Now map the sys_addr to a CSROW */
1319 err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
1320 if (err->csrow < 0) {
1321 err->err_code = ERR_CSROW;
1322 return;
1323 }
1324
1325 /* CHIPKILL enabled */
1326 if (pvt->nbcfg & NBCFG_CHIPKILL) {
1327 err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1328 if (err->channel < 0) {
1329 /*
1330 * Syndrome didn't map, so we don't know which of the
1331 * 2 DIMMs is in error. So we need to ID 'both' of them
1332 * as suspect.
1333 */
1334 amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
1335 "possible error reporting race\n",
1336 err->syndrome);
1337 err->err_code = ERR_CHANNEL;
1338 return;
1339 }
1340 } else {
1341 /*
1342 * non-chipkill ecc mode
1343 *
1344 * The k8 documentation is unclear about how to determine the
1345 * channel number when using non-chipkill memory. This method
1346 * was obtained from email communication with someone at AMD.
1347 * (Wish the email was placed in this comment - norsk)
1348 */
1349 err->channel = ((sys_addr & BIT(3)) != 0);
1350 }
1351}
1352
1353static int ddr2_cs_size(unsigned i, bool dct_width)
1354{
1355 unsigned shift = 0;
1356
1357 if (i <= 2)
1358 shift = i;
1359 else if (!(i & 0x1))
1360 shift = i >> 1;
1361 else
1362 shift = (i + 1) >> 1;
1363
1364 return 128 << (shift + !!dct_width);
1365}
1366
1367static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1368 unsigned cs_mode, int cs_mask_nr)
1369{
1370 u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1371
1372 if (pvt->ext_model >= K8_REV_F) {
1373 WARN_ON(cs_mode > 11);
1374 return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1375 }
1376 else if (pvt->ext_model >= K8_REV_D) {
1377 unsigned diff;
1378 WARN_ON(cs_mode > 10);
1379
1380 /*
1381 * the below calculation, besides trying to win an obfuscated C
1382 * contest, maps cs_mode values to DIMM chip select sizes. The
1383 * mappings are:
1384 *
1385 * cs_mode CS size (mb)
1386 * ======= ============
1387 * 0 32
1388 * 1 64
1389 * 2 128
1390 * 3 128
1391 * 4 256
1392 * 5 512
1393 * 6 256
1394 * 7 512
1395 * 8 1024
1396 * 9 1024
1397 * 10 2048
1398 *
1399 * Basically, it calculates a value with which to shift the
1400 * smallest CS size of 32MB.
1401 *
1402 * ddr[23]_cs_size have a similar purpose.
1403 */
1404 diff = cs_mode/3 + (unsigned)(cs_mode > 5);
1405
1406 return 32 << (cs_mode - diff);
1407 }
1408 else {
1409 WARN_ON(cs_mode > 6);
1410 return 32 << cs_mode;
1411 }
1412}
1413
1414/*
1415 * Get the number of DCT channels in use.
1416 *
1417 * Return:
1418 * number of Memory Channels in operation
1419 * Pass back:
1420 * contents of the DCL0_LOW register
1421 */
1422static int f1x_early_channel_count(struct amd64_pvt *pvt)
1423{
1424 int i, j, channels = 0;
1425
1426 /* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1427 if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128))
1428 return 2;
1429
1430 /*
1431 * Need to check if in unganged mode: In such, there are 2 channels,
1432 * but they are not in 128 bit mode and thus the above 'dclr0' status
1433 * bit will be OFF.
1434 *
1435 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1436 * their CSEnable bit on. If so, then SINGLE DIMM case.
1437 */
1438 edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1439
1440 /*
1441 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1442 * is more than just one DIMM present in unganged mode. Need to check
1443 * both controllers since DIMMs can be placed in either one.
1444 */
1445 for (i = 0; i < 2; i++) {
1446 u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1447
1448 for (j = 0; j < 4; j++) {
1449 if (DBAM_DIMM(j, dbam) > 0) {
1450 channels++;
1451 break;
1452 }
1453 }
1454 }
1455
1456 if (channels > 2)
1457 channels = 2;
1458
1459 amd64_info("MCT channel count: %d\n", channels);
1460
1461 return channels;
1462}
1463
1464static int f17_early_channel_count(struct amd64_pvt *pvt)
1465{
1466 int i, channels = 0;
1467
1468 /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
1469 for_each_umc(i)
1470 channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
1471
1472 amd64_info("MCT channel count: %d\n", channels);
1473
1474 return channels;
1475}
1476
1477static int ddr3_cs_size(unsigned i, bool dct_width)
1478{
1479 unsigned shift = 0;
1480 int cs_size = 0;
1481
1482 if (i == 0 || i == 3 || i == 4)
1483 cs_size = -1;
1484 else if (i <= 2)
1485 shift = i;
1486 else if (i == 12)
1487 shift = 7;
1488 else if (!(i & 0x1))
1489 shift = i >> 1;
1490 else
1491 shift = (i + 1) >> 1;
1492
1493 if (cs_size != -1)
1494 cs_size = (128 * (1 << !!dct_width)) << shift;
1495
1496 return cs_size;
1497}
1498
1499static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply)
1500{
1501 unsigned shift = 0;
1502 int cs_size = 0;
1503
1504 if (i < 4 || i == 6)
1505 cs_size = -1;
1506 else if (i == 12)
1507 shift = 7;
1508 else if (!(i & 0x1))
1509 shift = i >> 1;
1510 else
1511 shift = (i + 1) >> 1;
1512
1513 if (cs_size != -1)
1514 cs_size = rank_multiply * (128 << shift);
1515
1516 return cs_size;
1517}
1518
1519static int ddr4_cs_size(unsigned i)
1520{
1521 int cs_size = 0;
1522
1523 if (i == 0)
1524 cs_size = -1;
1525 else if (i == 1)
1526 cs_size = 1024;
1527 else
1528 /* Min cs_size = 1G */
1529 cs_size = 1024 * (1 << (i >> 1));
1530
1531 return cs_size;
1532}
1533
1534static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1535 unsigned cs_mode, int cs_mask_nr)
1536{
1537 u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1538
1539 WARN_ON(cs_mode > 11);
1540
1541 if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1542 return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1543 else
1544 return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1545}
1546
1547/*
1548 * F15h supports only 64bit DCT interfaces
1549 */
1550static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1551 unsigned cs_mode, int cs_mask_nr)
1552{
1553 WARN_ON(cs_mode > 12);
1554
1555 return ddr3_cs_size(cs_mode, false);
1556}
1557
1558/* F15h M60h supports DDR4 mapping as well.. */
1559static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1560 unsigned cs_mode, int cs_mask_nr)
1561{
1562 int cs_size;
1563 u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr];
1564
1565 WARN_ON(cs_mode > 12);
1566
1567 if (pvt->dram_type == MEM_DDR4) {
1568 if (cs_mode > 9)
1569 return -1;
1570
1571 cs_size = ddr4_cs_size(cs_mode);
1572 } else if (pvt->dram_type == MEM_LRDDR3) {
1573 unsigned rank_multiply = dcsm & 0xf;
1574
1575 if (rank_multiply == 3)
1576 rank_multiply = 4;
1577 cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply);
1578 } else {
1579 /* Minimum cs size is 512mb for F15hM60h*/
1580 if (cs_mode == 0x1)
1581 return -1;
1582
1583 cs_size = ddr3_cs_size(cs_mode, false);
1584 }
1585
1586 return cs_size;
1587}
1588
1589/*
1590 * F16h and F15h model 30h have only limited cs_modes.
1591 */
1592static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1593 unsigned cs_mode, int cs_mask_nr)
1594{
1595 WARN_ON(cs_mode > 12);
1596
1597 if (cs_mode == 6 || cs_mode == 8 ||
1598 cs_mode == 9 || cs_mode == 12)
1599 return -1;
1600 else
1601 return ddr3_cs_size(cs_mode, false);
1602}
1603
1604static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
1605 unsigned int cs_mode, int csrow_nr)
1606{
1607 u32 addr_mask_orig, addr_mask_deinterleaved;
1608 u32 msb, weight, num_zero_bits;
1609 int dimm, size = 0;
1610
1611 /* No Chip Selects are enabled. */
1612 if (!cs_mode)
1613 return size;
1614
1615 /* Requested size of an even CS but none are enabled. */
1616 if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1))
1617 return size;
1618
1619 /* Requested size of an odd CS but none are enabled. */
1620 if (!(cs_mode & CS_ODD) && (csrow_nr & 1))
1621 return size;
1622
1623 /*
1624 * There is one mask per DIMM, and two Chip Selects per DIMM.
1625 * CS0 and CS1 -> DIMM0
1626 * CS2 and CS3 -> DIMM1
1627 */
1628 dimm = csrow_nr >> 1;
1629
1630 /* Asymmetric dual-rank DIMM support. */
1631 if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
1632 addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
1633 else
1634 addr_mask_orig = pvt->csels[umc].csmasks[dimm];
1635
1636 /*
1637 * The number of zero bits in the mask is equal to the number of bits
1638 * in a full mask minus the number of bits in the current mask.
1639 *
1640 * The MSB is the number of bits in the full mask because BIT[0] is
1641 * always 0.
1642 *
1643 * In the special 3 Rank interleaving case, a single bit is flipped
1644 * without swapping with the most significant bit. This can be handled
1645 * by keeping the MSB where it is and ignoring the single zero bit.
1646 */
1647 msb = fls(addr_mask_orig) - 1;
1648 weight = hweight_long(addr_mask_orig);
1649 num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
1650
1651 /* Take the number of zero bits off from the top of the mask. */
1652 addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
1653
1654 edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
1655 edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
1656 edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
1657
1658 /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1659 size = (addr_mask_deinterleaved >> 2) + 1;
1660
1661 /* Return size in MBs. */
1662 return size >> 10;
1663}
1664
1665static void read_dram_ctl_register(struct amd64_pvt *pvt)
1666{
1667
1668 if (pvt->fam == 0xf)
1669 return;
1670
1671 if (!amd64_read_pci_cfg(pvt->F2, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1672 edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1673 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1674
1675 edac_dbg(0, " DCTs operate in %s mode\n",
1676 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1677
1678 if (!dct_ganging_enabled(pvt))
1679 edac_dbg(0, " Address range split per DCT: %s\n",
1680 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1681
1682 edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1683 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1684 (dct_memory_cleared(pvt) ? "yes" : "no"));
1685
1686 edac_dbg(0, " channel interleave: %s, "
1687 "interleave bits selector: 0x%x\n",
1688 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1689 dct_sel_interleave_addr(pvt));
1690 }
1691
1692 amd64_read_pci_cfg(pvt->F2, DCT_SEL_HI, &pvt->dct_sel_hi);
1693}
1694
1695/*
1696 * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG,
1697 * 2.10.12 Memory Interleaving Modes).
1698 */
1699static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1700 u8 intlv_en, int num_dcts_intlv,
1701 u32 dct_sel)
1702{
1703 u8 channel = 0;
1704 u8 select;
1705
1706 if (!(intlv_en))
1707 return (u8)(dct_sel);
1708
1709 if (num_dcts_intlv == 2) {
1710 select = (sys_addr >> 8) & 0x3;
1711 channel = select ? 0x3 : 0;
1712 } else if (num_dcts_intlv == 4) {
1713 u8 intlv_addr = dct_sel_interleave_addr(pvt);
1714 switch (intlv_addr) {
1715 case 0x4:
1716 channel = (sys_addr >> 8) & 0x3;
1717 break;
1718 case 0x5:
1719 channel = (sys_addr >> 9) & 0x3;
1720 break;
1721 }
1722 }
1723 return channel;
1724}
1725
1726/*
1727 * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1728 * Interleaving Modes.
1729 */
1730static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1731 bool hi_range_sel, u8 intlv_en)
1732{
1733 u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1734
1735 if (dct_ganging_enabled(pvt))
1736 return 0;
1737
1738 if (hi_range_sel)
1739 return dct_sel_high;
1740
1741 /*
1742 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1743 */
1744 if (dct_interleave_enabled(pvt)) {
1745 u8 intlv_addr = dct_sel_interleave_addr(pvt);
1746
1747 /* return DCT select function: 0=DCT0, 1=DCT1 */
1748 if (!intlv_addr)
1749 return sys_addr >> 6 & 1;
1750
1751 if (intlv_addr & 0x2) {
1752 u8 shift = intlv_addr & 0x1 ? 9 : 6;
1753 u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) & 1;
1754
1755 return ((sys_addr >> shift) & 1) ^ temp;
1756 }
1757
1758 if (intlv_addr & 0x4) {
1759 u8 shift = intlv_addr & 0x1 ? 9 : 8;
1760
1761 return (sys_addr >> shift) & 1;
1762 }
1763
1764 return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
1765 }
1766
1767 if (dct_high_range_enabled(pvt))
1768 return ~dct_sel_high & 1;
1769
1770 return 0;
1771}
1772
1773/* Convert the sys_addr to the normalized DCT address */
1774static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
1775 u64 sys_addr, bool hi_rng,
1776 u32 dct_sel_base_addr)
1777{
1778 u64 chan_off;
1779 u64 dram_base = get_dram_base(pvt, range);
1780 u64 hole_off = f10_dhar_offset(pvt);
1781 u64 dct_sel_base_off = (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1782
1783 if (hi_rng) {
1784 /*
1785 * if
1786 * base address of high range is below 4Gb
1787 * (bits [47:27] at [31:11])
1788 * DRAM address space on this DCT is hoisted above 4Gb &&
1789 * sys_addr > 4Gb
1790 *
1791 * remove hole offset from sys_addr
1792 * else
1793 * remove high range offset from sys_addr
1794 */
1795 if ((!(dct_sel_base_addr >> 16) ||
1796 dct_sel_base_addr < dhar_base(pvt)) &&
1797 dhar_valid(pvt) &&
1798 (sys_addr >= BIT_64(32)))
1799 chan_off = hole_off;
1800 else
1801 chan_off = dct_sel_base_off;
1802 } else {
1803 /*
1804 * if
1805 * we have a valid hole &&
1806 * sys_addr > 4Gb
1807 *
1808 * remove hole
1809 * else
1810 * remove dram base to normalize to DCT address
1811 */
1812 if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1813 chan_off = hole_off;
1814 else
1815 chan_off = dram_base;
1816 }
1817
1818 return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23));
1819}
1820
1821/*
1822 * checks if the csrow passed in is marked as SPARED, if so returns the new
1823 * spare row
1824 */
1825static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1826{
1827 int tmp_cs;
1828
1829 if (online_spare_swap_done(pvt, dct) &&
1830 csrow == online_spare_bad_dramcs(pvt, dct)) {
1831
1832 for_each_chip_select(tmp_cs, dct, pvt) {
1833 if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
1834 csrow = tmp_cs;
1835 break;
1836 }
1837 }
1838 }
1839 return csrow;
1840}
1841
1842/*
1843 * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1844 * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1845 *
1846 * Return:
1847 * -EINVAL: NOT FOUND
1848 * 0..csrow = Chip-Select Row
1849 */
1850static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
1851{
1852 struct mem_ctl_info *mci;
1853 struct amd64_pvt *pvt;
1854 u64 cs_base, cs_mask;
1855 int cs_found = -EINVAL;
1856 int csrow;
1857
1858 mci = edac_mc_find(nid);
1859 if (!mci)
1860 return cs_found;
1861
1862 pvt = mci->pvt_info;
1863
1864 edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1865
1866 for_each_chip_select(csrow, dct, pvt) {
1867 if (!csrow_enabled(csrow, dct, pvt))
1868 continue;
1869
1870 get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1871
1872 edac_dbg(1, " CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1873 csrow, cs_base, cs_mask);
1874
1875 cs_mask = ~cs_mask;
1876
1877 edac_dbg(1, " (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1878 (in_addr & cs_mask), (cs_base & cs_mask));
1879
1880 if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
1881 if (pvt->fam == 0x15 && pvt->model >= 0x30) {
1882 cs_found = csrow;
1883 break;
1884 }
1885 cs_found = f10_process_possible_spare(pvt, dct, csrow);
1886
1887 edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1888 break;
1889 }
1890 }
1891 return cs_found;
1892}
1893
1894/*
1895 * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1896 * swapped with a region located at the bottom of memory so that the GPU can use
1897 * the interleaved region and thus two channels.
1898 */
1899static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1900{
1901 u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
1902
1903 if (pvt->fam == 0x10) {
1904 /* only revC3 and revE have that feature */
1905 if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3))
1906 return sys_addr;
1907 }
1908
1909 amd64_read_pci_cfg(pvt->F2, SWAP_INTLV_REG, &swap_reg);
1910
1911 if (!(swap_reg & 0x1))
1912 return sys_addr;
1913
1914 swap_base = (swap_reg >> 3) & 0x7f;
1915 swap_limit = (swap_reg >> 11) & 0x7f;
1916 rgn_size = (swap_reg >> 20) & 0x7f;
1917 tmp_addr = sys_addr >> 27;
1918
1919 if (!(sys_addr >> 34) &&
1920 (((tmp_addr >= swap_base) &&
1921 (tmp_addr <= swap_limit)) ||
1922 (tmp_addr < rgn_size)))
1923 return sys_addr ^ (u64)swap_base << 27;
1924
1925 return sys_addr;
1926}
1927
1928/* For a given @dram_range, check if @sys_addr falls within it. */
1929static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1930 u64 sys_addr, int *chan_sel)
1931{
1932 int cs_found = -EINVAL;
1933 u64 chan_addr;
1934 u32 dct_sel_base;
1935 u8 channel;
1936 bool high_range = false;
1937
1938 u8 node_id = dram_dst_node(pvt, range);
1939 u8 intlv_en = dram_intlv_en(pvt, range);
1940 u32 intlv_sel = dram_intlv_sel(pvt, range);
1941
1942 edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1943 range, sys_addr, get_dram_limit(pvt, range));
1944
1945 if (dhar_valid(pvt) &&
1946 dhar_base(pvt) <= sys_addr &&
1947 sys_addr < BIT_64(32)) {
1948 amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1949 sys_addr);
1950 return -EINVAL;
1951 }
1952
1953 if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1954 return -EINVAL;
1955
1956 sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1957
1958 dct_sel_base = dct_sel_baseaddr(pvt);
1959
1960 /*
1961 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1962 * select between DCT0 and DCT1.
1963 */
1964 if (dct_high_range_enabled(pvt) &&
1965 !dct_ganging_enabled(pvt) &&
1966 ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1967 high_range = true;
1968
1969 channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1970
1971 chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1972 high_range, dct_sel_base);
1973
1974 /* Remove node interleaving, see F1x120 */
1975 if (intlv_en)
1976 chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
1977 (chan_addr & 0xfff);
1978
1979 /* remove channel interleave */
1980 if (dct_interleave_enabled(pvt) &&
1981 !dct_high_range_enabled(pvt) &&
1982 !dct_ganging_enabled(pvt)) {
1983
1984 if (dct_sel_interleave_addr(pvt) != 1) {
1985 if (dct_sel_interleave_addr(pvt) == 0x3)
1986 /* hash 9 */
1987 chan_addr = ((chan_addr >> 10) << 9) |
1988 (chan_addr & 0x1ff);
1989 else
1990 /* A[6] or hash 6 */
1991 chan_addr = ((chan_addr >> 7) << 6) |
1992 (chan_addr & 0x3f);
1993 } else
1994 /* A[12] */
1995 chan_addr = ((chan_addr >> 13) << 12) |
1996 (chan_addr & 0xfff);
1997 }
1998
1999 edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr);
2000
2001 cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
2002
2003 if (cs_found >= 0)
2004 *chan_sel = channel;
2005
2006 return cs_found;
2007}
2008
2009static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
2010 u64 sys_addr, int *chan_sel)
2011{
2012 int cs_found = -EINVAL;
2013 int num_dcts_intlv = 0;
2014 u64 chan_addr, chan_offset;
2015 u64 dct_base, dct_limit;
2016 u32 dct_cont_base_reg, dct_cont_limit_reg, tmp;
2017 u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en;
2018
2019 u64 dhar_offset = f10_dhar_offset(pvt);
2020 u8 intlv_addr = dct_sel_interleave_addr(pvt);
2021 u8 node_id = dram_dst_node(pvt, range);
2022 u8 intlv_en = dram_intlv_en(pvt, range);
2023
2024 amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg);
2025 amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg);
2026
2027 dct_offset_en = (u8) ((dct_cont_base_reg >> 3) & BIT(0));
2028 dct_sel = (u8) ((dct_cont_base_reg >> 4) & 0x7);
2029
2030 edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
2031 range, sys_addr, get_dram_limit(pvt, range));
2032
2033 if (!(get_dram_base(pvt, range) <= sys_addr) &&
2034 !(get_dram_limit(pvt, range) >= sys_addr))
2035 return -EINVAL;
2036
2037 if (dhar_valid(pvt) &&
2038 dhar_base(pvt) <= sys_addr &&
2039 sys_addr < BIT_64(32)) {
2040 amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
2041 sys_addr);
2042 return -EINVAL;
2043 }
2044
2045 /* Verify sys_addr is within DCT Range. */
2046 dct_base = (u64) dct_sel_baseaddr(pvt);
2047 dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF;
2048
2049 if (!(dct_cont_base_reg & BIT(0)) &&
2050 !(dct_base <= (sys_addr >> 27) &&
2051 dct_limit >= (sys_addr >> 27)))
2052 return -EINVAL;
2053
2054 /* Verify number of dct's that participate in channel interleaving. */
2055 num_dcts_intlv = (int) hweight8(intlv_en);
2056
2057 if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4))
2058 return -EINVAL;
2059
2060 if (pvt->model >= 0x60)
2061 channel = f1x_determine_channel(pvt, sys_addr, false, intlv_en);
2062 else
2063 channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
2064 num_dcts_intlv, dct_sel);
2065
2066 /* Verify we stay within the MAX number of channels allowed */
2067 if (channel > 3)
2068 return -EINVAL;
2069
2070 leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0));
2071
2072 /* Get normalized DCT addr */
2073 if (leg_mmio_hole && (sys_addr >= BIT_64(32)))
2074 chan_offset = dhar_offset;
2075 else
2076 chan_offset = dct_base << 27;
2077
2078 chan_addr = sys_addr - chan_offset;
2079
2080 /* remove channel interleave */
2081 if (num_dcts_intlv == 2) {
2082 if (intlv_addr == 0x4)
2083 chan_addr = ((chan_addr >> 9) << 8) |
2084 (chan_addr & 0xff);
2085 else if (intlv_addr == 0x5)
2086 chan_addr = ((chan_addr >> 10) << 9) |
2087 (chan_addr & 0x1ff);
2088 else
2089 return -EINVAL;
2090
2091 } else if (num_dcts_intlv == 4) {
2092 if (intlv_addr == 0x4)
2093 chan_addr = ((chan_addr >> 10) << 8) |
2094 (chan_addr & 0xff);
2095 else if (intlv_addr == 0x5)
2096 chan_addr = ((chan_addr >> 11) << 9) |
2097 (chan_addr & 0x1ff);
2098 else
2099 return -EINVAL;
2100 }
2101
2102 if (dct_offset_en) {
2103 amd64_read_pci_cfg(pvt->F1,
2104 DRAM_CONT_HIGH_OFF + (int) channel * 4,
2105 &tmp);
2106 chan_addr += (u64) ((tmp >> 11) & 0xfff) << 27;
2107 }
2108
2109 f15h_select_dct(pvt, channel);
2110
2111 edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr);
2112
2113 /*
2114 * Find Chip select:
2115 * if channel = 3, then alias it to 1. This is because, in F15 M30h,
2116 * there is support for 4 DCT's, but only 2 are currently functional.
2117 * They are DCT0 and DCT3. But we have read all registers of DCT3 into
2118 * pvt->csels[1]. So we need to use '1' here to get correct info.
2119 * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
2120 */
2121 alias_channel = (channel == 3) ? 1 : channel;
2122
2123 cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel);
2124
2125 if (cs_found >= 0)
2126 *chan_sel = alias_channel;
2127
2128 return cs_found;
2129}
2130
2131static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt,
2132 u64 sys_addr,
2133 int *chan_sel)
2134{
2135 int cs_found = -EINVAL;
2136 unsigned range;
2137
2138 for (range = 0; range < DRAM_RANGES; range++) {
2139 if (!dram_rw(pvt, range))
2140 continue;
2141
2142 if (pvt->fam == 0x15 && pvt->model >= 0x30)
2143 cs_found = f15_m30h_match_to_this_node(pvt, range,
2144 sys_addr,
2145 chan_sel);
2146
2147 else if ((get_dram_base(pvt, range) <= sys_addr) &&
2148 (get_dram_limit(pvt, range) >= sys_addr)) {
2149 cs_found = f1x_match_to_this_node(pvt, range,
2150 sys_addr, chan_sel);
2151 if (cs_found >= 0)
2152 break;
2153 }
2154 }
2155 return cs_found;
2156}
2157
2158/*
2159 * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
2160 * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
2161 *
2162 * The @sys_addr is usually an error address received from the hardware
2163 * (MCX_ADDR).
2164 */
2165static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
2166 struct err_info *err)
2167{
2168 struct amd64_pvt *pvt = mci->pvt_info;
2169
2170 error_address_to_page_and_offset(sys_addr, err);
2171
2172 err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
2173 if (err->csrow < 0) {
2174 err->err_code = ERR_CSROW;
2175 return;
2176 }
2177
2178 /*
2179 * We need the syndromes for channel detection only when we're
2180 * ganged. Otherwise @chan should already contain the channel at
2181 * this point.
2182 */
2183 if (dct_ganging_enabled(pvt))
2184 err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
2185}
2186
2187/*
2188 * debug routine to display the memory sizes of all logical DIMMs and its
2189 * CSROWs
2190 */
2191static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
2192{
2193 int dimm, size0, size1;
2194 u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
2195 u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
2196
2197 if (pvt->fam == 0xf) {
2198 /* K8 families < revF not supported yet */
2199 if (pvt->ext_model < K8_REV_F)
2200 return;
2201 else
2202 WARN_ON(ctrl != 0);
2203 }
2204
2205 if (pvt->fam == 0x10) {
2206 dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1
2207 : pvt->dbam0;
2208 dcsb = (ctrl && !dct_ganging_enabled(pvt)) ?
2209 pvt->csels[1].csbases :
2210 pvt->csels[0].csbases;
2211 } else if (ctrl) {
2212 dbam = pvt->dbam0;
2213 dcsb = pvt->csels[1].csbases;
2214 }
2215 edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
2216 ctrl, dbam);
2217
2218 edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
2219
2220 /* Dump memory sizes for DIMM and its CSROWs */
2221 for (dimm = 0; dimm < 4; dimm++) {
2222
2223 size0 = 0;
2224 if (dcsb[dimm*2] & DCSB_CS_ENABLE)
2225 /*
2226 * For F15m60h, we need multiplier for LRDIMM cs_size
2227 * calculation. We pass dimm value to the dbam_to_cs
2228 * mapper so we can find the multiplier from the
2229 * corresponding DCSM.
2230 */
2231 size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
2232 DBAM_DIMM(dimm, dbam),
2233 dimm);
2234
2235 size1 = 0;
2236 if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
2237 size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
2238 DBAM_DIMM(dimm, dbam),
2239 dimm);
2240
2241 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
2242 dimm * 2, size0,
2243 dimm * 2 + 1, size1);
2244 }
2245}
2246
2247static struct amd64_family_type family_types[] = {
2248 [K8_CPUS] = {
2249 .ctl_name = "K8",
2250 .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
2251 .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
2252 .ops = {
2253 .early_channel_count = k8_early_channel_count,
2254 .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
2255 .dbam_to_cs = k8_dbam_to_chip_select,
2256 }
2257 },
2258 [F10_CPUS] = {
2259 .ctl_name = "F10h",
2260 .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
2261 .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
2262 .ops = {
2263 .early_channel_count = f1x_early_channel_count,
2264 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
2265 .dbam_to_cs = f10_dbam_to_chip_select,
2266 }
2267 },
2268 [F15_CPUS] = {
2269 .ctl_name = "F15h",
2270 .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
2271 .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
2272 .ops = {
2273 .early_channel_count = f1x_early_channel_count,
2274 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
2275 .dbam_to_cs = f15_dbam_to_chip_select,
2276 }
2277 },
2278 [F15_M30H_CPUS] = {
2279 .ctl_name = "F15h_M30h",
2280 .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
2281 .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
2282 .ops = {
2283 .early_channel_count = f1x_early_channel_count,
2284 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
2285 .dbam_to_cs = f16_dbam_to_chip_select,
2286 }
2287 },
2288 [F15_M60H_CPUS] = {
2289 .ctl_name = "F15h_M60h",
2290 .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
2291 .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
2292 .ops = {
2293 .early_channel_count = f1x_early_channel_count,
2294 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
2295 .dbam_to_cs = f15_m60h_dbam_to_chip_select,
2296 }
2297 },
2298 [F16_CPUS] = {
2299 .ctl_name = "F16h",
2300 .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
2301 .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
2302 .ops = {
2303 .early_channel_count = f1x_early_channel_count,
2304 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
2305 .dbam_to_cs = f16_dbam_to_chip_select,
2306 }
2307 },
2308 [F16_M30H_CPUS] = {
2309 .ctl_name = "F16h_M30h",
2310 .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
2311 .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
2312 .ops = {
2313 .early_channel_count = f1x_early_channel_count,
2314 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
2315 .dbam_to_cs = f16_dbam_to_chip_select,
2316 }
2317 },
2318 [F17_CPUS] = {
2319 .ctl_name = "F17h",
2320 .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
2321 .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
2322 .ops = {
2323 .early_channel_count = f17_early_channel_count,
2324 .dbam_to_cs = f17_addr_mask_to_cs_size,
2325 }
2326 },
2327 [F17_M10H_CPUS] = {
2328 .ctl_name = "F17h_M10h",
2329 .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
2330 .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
2331 .ops = {
2332 .early_channel_count = f17_early_channel_count,
2333 .dbam_to_cs = f17_addr_mask_to_cs_size,
2334 }
2335 },
2336 [F17_M30H_CPUS] = {
2337 .ctl_name = "F17h_M30h",
2338 .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
2339 .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
2340 .ops = {
2341 .early_channel_count = f17_early_channel_count,
2342 .dbam_to_cs = f17_addr_mask_to_cs_size,
2343 }
2344 },
2345 [F17_M60H_CPUS] = {
2346 .ctl_name = "F17h_M60h",
2347 .f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0,
2348 .f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6,
2349 .ops = {
2350 .early_channel_count = f17_early_channel_count,
2351 .dbam_to_cs = f17_addr_mask_to_cs_size,
2352 }
2353 },
2354 [F17_M70H_CPUS] = {
2355 .ctl_name = "F17h_M70h",
2356 .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
2357 .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
2358 .ops = {
2359 .early_channel_count = f17_early_channel_count,
2360 .dbam_to_cs = f17_addr_mask_to_cs_size,
2361 }
2362 },
2363};
2364
2365/*
2366 * These are tables of eigenvectors (one per line) which can be used for the
2367 * construction of the syndrome tables. The modified syndrome search algorithm
2368 * uses those to find the symbol in error and thus the DIMM.
2369 *
2370 * Algorithm courtesy of Ross LaFetra from AMD.
2371 */
2372static const u16 x4_vectors[] = {
2373 0x2f57, 0x1afe, 0x66cc, 0xdd88,
2374 0x11eb, 0x3396, 0x7f4c, 0xeac8,
2375 0x0001, 0x0002, 0x0004, 0x0008,
2376 0x1013, 0x3032, 0x4044, 0x8088,
2377 0x106b, 0x30d6, 0x70fc, 0xe0a8,
2378 0x4857, 0xc4fe, 0x13cc, 0x3288,
2379 0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
2380 0x1f39, 0x251e, 0xbd6c, 0x6bd8,
2381 0x15c1, 0x2a42, 0x89ac, 0x4758,
2382 0x2b03, 0x1602, 0x4f0c, 0xca08,
2383 0x1f07, 0x3a0e, 0x6b04, 0xbd08,
2384 0x8ba7, 0x465e, 0x244c, 0x1cc8,
2385 0x2b87, 0x164e, 0x642c, 0xdc18,
2386 0x40b9, 0x80de, 0x1094, 0x20e8,
2387 0x27db, 0x1eb6, 0x9dac, 0x7b58,
2388 0x11c1, 0x2242, 0x84ac, 0x4c58,
2389 0x1be5, 0x2d7a, 0x5e34, 0xa718,
2390 0x4b39, 0x8d1e, 0x14b4, 0x28d8,
2391 0x4c97, 0xc87e, 0x11fc, 0x33a8,
2392 0x8e97, 0x497e, 0x2ffc, 0x1aa8,
2393 0x16b3, 0x3d62, 0x4f34, 0x8518,
2394 0x1e2f, 0x391a, 0x5cac, 0xf858,
2395 0x1d9f, 0x3b7a, 0x572c, 0xfe18,
2396 0x15f5, 0x2a5a, 0x5264, 0xa3b8,
2397 0x1dbb, 0x3b66, 0x715c, 0xe3f8,
2398 0x4397, 0xc27e, 0x17fc, 0x3ea8,
2399 0x1617, 0x3d3e, 0x6464, 0xb8b8,
2400 0x23ff, 0x12aa, 0xab6c, 0x56d8,
2401 0x2dfb, 0x1ba6, 0x913c, 0x7328,
2402 0x185d, 0x2ca6, 0x7914, 0x9e28,
2403 0x171b, 0x3e36, 0x7d7c, 0xebe8,
2404 0x4199, 0x82ee, 0x19f4, 0x2e58,
2405 0x4807, 0xc40e, 0x130c, 0x3208,
2406 0x1905, 0x2e0a, 0x5804, 0xac08,
2407 0x213f, 0x132a, 0xadfc, 0x5ba8,
2408 0x19a9, 0x2efe, 0xb5cc, 0x6f88,
2409};
2410
2411static const u16 x8_vectors[] = {
2412 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
2413 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
2414 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
2415 0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
2416 0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
2417 0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
2418 0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
2419 0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
2420 0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
2421 0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
2422 0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
2423 0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
2424 0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
2425 0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
2426 0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
2427 0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
2428 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
2429 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
2430 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
2431};
2432
2433static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
2434 unsigned v_dim)
2435{
2436 unsigned int i, err_sym;
2437
2438 for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
2439 u16 s = syndrome;
2440 unsigned v_idx = err_sym * v_dim;
2441 unsigned v_end = (err_sym + 1) * v_dim;
2442
2443 /* walk over all 16 bits of the syndrome */
2444 for (i = 1; i < (1U << 16); i <<= 1) {
2445
2446 /* if bit is set in that eigenvector... */
2447 if (v_idx < v_end && vectors[v_idx] & i) {
2448 u16 ev_comp = vectors[v_idx++];
2449
2450 /* ... and bit set in the modified syndrome, */
2451 if (s & i) {
2452 /* remove it. */
2453 s ^= ev_comp;
2454
2455 if (!s)
2456 return err_sym;
2457 }
2458
2459 } else if (s & i)
2460 /* can't get to zero, move to next symbol */
2461 break;
2462 }
2463 }
2464
2465 edac_dbg(0, "syndrome(%x) not found\n", syndrome);
2466 return -1;
2467}
2468
2469static int map_err_sym_to_channel(int err_sym, int sym_size)
2470{
2471 if (sym_size == 4)
2472 switch (err_sym) {
2473 case 0x20:
2474 case 0x21:
2475 return 0;
2476 break;
2477 case 0x22:
2478 case 0x23:
2479 return 1;
2480 break;
2481 default:
2482 return err_sym >> 4;
2483 break;
2484 }
2485 /* x8 symbols */
2486 else
2487 switch (err_sym) {
2488 /* imaginary bits not in a DIMM */
2489 case 0x10:
2490 WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
2491 err_sym);
2492 return -1;
2493 break;
2494
2495 case 0x11:
2496 return 0;
2497 break;
2498 case 0x12:
2499 return 1;
2500 break;
2501 default:
2502 return err_sym >> 3;
2503 break;
2504 }
2505 return -1;
2506}
2507
2508static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
2509{
2510 struct amd64_pvt *pvt = mci->pvt_info;
2511 int err_sym = -1;
2512
2513 if (pvt->ecc_sym_sz == 8)
2514 err_sym = decode_syndrome(syndrome, x8_vectors,
2515 ARRAY_SIZE(x8_vectors),
2516 pvt->ecc_sym_sz);
2517 else if (pvt->ecc_sym_sz == 4)
2518 err_sym = decode_syndrome(syndrome, x4_vectors,
2519 ARRAY_SIZE(x4_vectors),
2520 pvt->ecc_sym_sz);
2521 else {
2522 amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
2523 return err_sym;
2524 }
2525
2526 return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
2527}
2528
2529static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
2530 u8 ecc_type)
2531{
2532 enum hw_event_mc_err_type err_type;
2533 const char *string;
2534
2535 if (ecc_type == 2)
2536 err_type = HW_EVENT_ERR_CORRECTED;
2537 else if (ecc_type == 1)
2538 err_type = HW_EVENT_ERR_UNCORRECTED;
2539 else if (ecc_type == 3)
2540 err_type = HW_EVENT_ERR_DEFERRED;
2541 else {
2542 WARN(1, "Something is rotten in the state of Denmark.\n");
2543 return;
2544 }
2545
2546 switch (err->err_code) {
2547 case DECODE_OK:
2548 string = "";
2549 break;
2550 case ERR_NODE:
2551 string = "Failed to map error addr to a node";
2552 break;
2553 case ERR_CSROW:
2554 string = "Failed to map error addr to a csrow";
2555 break;
2556 case ERR_CHANNEL:
2557 string = "Unknown syndrome - possible error reporting race";
2558 break;
2559 case ERR_SYND:
2560 string = "MCA_SYND not valid - unknown syndrome and csrow";
2561 break;
2562 case ERR_NORM_ADDR:
2563 string = "Cannot decode normalized address";
2564 break;
2565 default:
2566 string = "WTF error";
2567 break;
2568 }
2569
2570 edac_mc_handle_error(err_type, mci, 1,
2571 err->page, err->offset, err->syndrome,
2572 err->csrow, err->channel, -1,
2573 string, "");
2574}
2575
2576static inline void decode_bus_error(int node_id, struct mce *m)
2577{
2578 struct mem_ctl_info *mci;
2579 struct amd64_pvt *pvt;
2580 u8 ecc_type = (m->status >> 45) & 0x3;
2581 u8 xec = XEC(m->status, 0x1f);
2582 u16 ec = EC(m->status);
2583 u64 sys_addr;
2584 struct err_info err;
2585
2586 mci = edac_mc_find(node_id);
2587 if (!mci)
2588 return;
2589
2590 pvt = mci->pvt_info;
2591
2592 /* Bail out early if this was an 'observed' error */
2593 if (PP(ec) == NBSL_PP_OBS)
2594 return;
2595
2596 /* Do only ECC errors */
2597 if (xec && xec != F10_NBSL_EXT_ERR_ECC)
2598 return;
2599
2600 memset(&err, 0, sizeof(err));
2601
2602 sys_addr = get_error_address(pvt, m);
2603
2604 if (ecc_type == 2)
2605 err.syndrome = extract_syndrome(m->status);
2606
2607 pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
2608
2609 __log_ecc_error(mci, &err, ecc_type);
2610}
2611
2612/*
2613 * To find the UMC channel represented by this bank we need to match on its
2614 * instance_id. The instance_id of a bank is held in the lower 32 bits of its
2615 * IPID.
2616 *
2617 * Currently, we can derive the channel number by looking at the 6th nibble in
2618 * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
2619 * number.
2620 */
2621static int find_umc_channel(struct mce *m)
2622{
2623 return (m->ipid & GENMASK(31, 0)) >> 20;
2624}
2625
2626static void decode_umc_error(int node_id, struct mce *m)
2627{
2628 u8 ecc_type = (m->status >> 45) & 0x3;
2629 struct mem_ctl_info *mci;
2630 struct amd64_pvt *pvt;
2631 struct err_info err;
2632 u64 sys_addr;
2633
2634 mci = edac_mc_find(node_id);
2635 if (!mci)
2636 return;
2637
2638 pvt = mci->pvt_info;
2639
2640 memset(&err, 0, sizeof(err));
2641
2642 if (m->status & MCI_STATUS_DEFERRED)
2643 ecc_type = 3;
2644
2645 err.channel = find_umc_channel(m);
2646
2647 if (!(m->status & MCI_STATUS_SYNDV)) {
2648 err.err_code = ERR_SYND;
2649 goto log_error;
2650 }
2651
2652 if (ecc_type == 2) {
2653 u8 length = (m->synd >> 18) & 0x3f;
2654
2655 if (length)
2656 err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
2657 else
2658 err.err_code = ERR_CHANNEL;
2659 }
2660
2661 err.csrow = m->synd & 0x7;
2662
2663 if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
2664 err.err_code = ERR_NORM_ADDR;
2665 goto log_error;
2666 }
2667
2668 error_address_to_page_and_offset(sys_addr, &err);
2669
2670log_error:
2671 __log_ecc_error(mci, &err, ecc_type);
2672}
2673
2674/*
2675 * Use pvt->F3 which contains the F3 CPU PCI device to get the related
2676 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
2677 * Reserve F0 and F6 on systems with a UMC.
2678 */
2679static int
2680reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2)
2681{
2682 if (pvt->umc) {
2683 pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
2684 if (!pvt->F0) {
2685 amd64_err("F0 not found, device 0x%x (broken BIOS?)\n", pci_id1);
2686 return -ENODEV;
2687 }
2688
2689 pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
2690 if (!pvt->F6) {
2691 pci_dev_put(pvt->F0);
2692 pvt->F0 = NULL;
2693
2694 amd64_err("F6 not found: device 0x%x (broken BIOS?)\n", pci_id2);
2695 return -ENODEV;
2696 }
2697
2698 if (!pci_ctl_dev)
2699 pci_ctl_dev = &pvt->F0->dev;
2700
2701 edac_dbg(1, "F0: %s\n", pci_name(pvt->F0));
2702 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2703 edac_dbg(1, "F6: %s\n", pci_name(pvt->F6));
2704
2705 return 0;
2706 }
2707
2708 /* Reserve the ADDRESS MAP Device */
2709 pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
2710 if (!pvt->F1) {
2711 amd64_err("F1 not found: device 0x%x (broken BIOS?)\n", pci_id1);
2712 return -ENODEV;
2713 }
2714
2715 /* Reserve the DCT Device */
2716 pvt->F2 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
2717 if (!pvt->F2) {
2718 pci_dev_put(pvt->F1);
2719 pvt->F1 = NULL;
2720
2721 amd64_err("F2 not found: device 0x%x (broken BIOS?)\n", pci_id2);
2722 return -ENODEV;
2723 }
2724
2725 if (!pci_ctl_dev)
2726 pci_ctl_dev = &pvt->F2->dev;
2727
2728 edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
2729 edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
2730 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2731
2732 return 0;
2733}
2734
2735static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2736{
2737 if (pvt->umc) {
2738 pci_dev_put(pvt->F0);
2739 pci_dev_put(pvt->F6);
2740 } else {
2741 pci_dev_put(pvt->F1);
2742 pci_dev_put(pvt->F2);
2743 }
2744}
2745
2746static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
2747{
2748 pvt->ecc_sym_sz = 4;
2749
2750 if (pvt->umc) {
2751 u8 i;
2752
2753 for_each_umc(i) {
2754 /* Check enabled channels only: */
2755 if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
2756 if (pvt->umc[i].ecc_ctrl & BIT(9)) {
2757 pvt->ecc_sym_sz = 16;
2758 return;
2759 } else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
2760 pvt->ecc_sym_sz = 8;
2761 return;
2762 }
2763 }
2764 }
2765 } else if (pvt->fam >= 0x10) {
2766 u32 tmp;
2767
2768 amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2769 /* F16h has only DCT0, so no need to read dbam1. */
2770 if (pvt->fam != 0x16)
2771 amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
2772
2773 /* F10h, revD and later can do x8 ECC too. */
2774 if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
2775 pvt->ecc_sym_sz = 8;
2776 }
2777}
2778
2779/*
2780 * Retrieve the hardware registers of the memory controller.
2781 */
2782static void __read_mc_regs_df(struct amd64_pvt *pvt)
2783{
2784 u8 nid = pvt->mc_node_id;
2785 struct amd64_umc *umc;
2786 u32 i, umc_base;
2787
2788 /* Read registers from each UMC */
2789 for_each_umc(i) {
2790
2791 umc_base = get_umc_base(i);
2792 umc = &pvt->umc[i];
2793
2794 amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
2795 amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
2796 amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
2797 amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
2798 amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi);
2799 }
2800}
2801
2802/*
2803 * Retrieve the hardware registers of the memory controller (this includes the
2804 * 'Address Map' and 'Misc' device regs)
2805 */
2806static void read_mc_regs(struct amd64_pvt *pvt)
2807{
2808 unsigned int range;
2809 u64 msr_val;
2810
2811 /*
2812 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2813 * those are Read-As-Zero.
2814 */
2815 rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2816 edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
2817
2818 /* Check first whether TOP_MEM2 is enabled: */
2819 rdmsrl(MSR_K8_SYSCFG, msr_val);
2820 if (msr_val & BIT(21)) {
2821 rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2822 edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2823 } else {
2824 edac_dbg(0, " TOP_MEM2 disabled\n");
2825 }
2826
2827 if (pvt->umc) {
2828 __read_mc_regs_df(pvt);
2829 amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
2830
2831 goto skip;
2832 }
2833
2834 amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2835
2836 read_dram_ctl_register(pvt);
2837
2838 for (range = 0; range < DRAM_RANGES; range++) {
2839 u8 rw;
2840
2841 /* read settings for this DRAM range */
2842 read_dram_base_limit_regs(pvt, range);
2843
2844 rw = dram_rw(pvt, range);
2845 if (!rw)
2846 continue;
2847
2848 edac_dbg(1, " DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2849 range,
2850 get_dram_base(pvt, range),
2851 get_dram_limit(pvt, range));
2852
2853 edac_dbg(1, " IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2854 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
2855 (rw & 0x1) ? "R" : "-",
2856 (rw & 0x2) ? "W" : "-",
2857 dram_intlv_sel(pvt, range),
2858 dram_dst_node(pvt, range));
2859 }
2860
2861 amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2862 amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0);
2863
2864 amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2865
2866 amd64_read_dct_pci_cfg(pvt, 0, DCLR0, &pvt->dclr0);
2867 amd64_read_dct_pci_cfg(pvt, 0, DCHR0, &pvt->dchr0);
2868
2869 if (!dct_ganging_enabled(pvt)) {
2870 amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1);
2871 amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
2872 }
2873
2874skip:
2875 read_dct_base_mask(pvt);
2876
2877 determine_memory_type(pvt);
2878 edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
2879
2880 determine_ecc_sym_sz(pvt);
2881
2882 dump_misc_regs(pvt);
2883}
2884
2885/*
2886 * NOTE: CPU Revision Dependent code
2887 *
2888 * Input:
2889 * @csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2890 * k8 private pointer to -->
2891 * DRAM Bank Address mapping register
2892 * node_id
2893 * DCL register where dual_channel_active is
2894 *
2895 * The DBAM register consists of 4 sets of 4 bits each definitions:
2896 *
2897 * Bits: CSROWs
2898 * 0-3 CSROWs 0 and 1
2899 * 4-7 CSROWs 2 and 3
2900 * 8-11 CSROWs 4 and 5
2901 * 12-15 CSROWs 6 and 7
2902 *
2903 * Values range from: 0 to 15
2904 * The meaning of the values depends on CPU revision and dual-channel state,
2905 * see relevant BKDG more info.
2906 *
2907 * The memory controller provides for total of only 8 CSROWs in its current
2908 * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2909 * single channel or two (2) DIMMs in dual channel mode.
2910 *
2911 * The following code logic collapses the various tables for CSROW based on CPU
2912 * revision.
2913 *
2914 * Returns:
2915 * The number of PAGE_SIZE pages on the specified CSROW number it
2916 * encompasses
2917 *
2918 */
2919static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
2920{
2921 u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2922 int csrow_nr = csrow_nr_orig;
2923 u32 cs_mode, nr_pages;
2924
2925 if (!pvt->umc) {
2926 csrow_nr >>= 1;
2927 cs_mode = DBAM_DIMM(csrow_nr, dbam);
2928 } else {
2929 cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt);
2930 }
2931
2932 nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
2933 nr_pages <<= 20 - PAGE_SHIFT;
2934
2935 edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2936 csrow_nr_orig, dct, cs_mode);
2937 edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
2938
2939 return nr_pages;
2940}
2941
2942static int init_csrows_df(struct mem_ctl_info *mci)
2943{
2944 struct amd64_pvt *pvt = mci->pvt_info;
2945 enum edac_type edac_mode = EDAC_NONE;
2946 enum dev_type dev_type = DEV_UNKNOWN;
2947 struct dimm_info *dimm;
2948 int empty = 1;
2949 u8 umc, cs;
2950
2951 if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) {
2952 edac_mode = EDAC_S16ECD16ED;
2953 dev_type = DEV_X16;
2954 } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) {
2955 edac_mode = EDAC_S8ECD8ED;
2956 dev_type = DEV_X8;
2957 } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) {
2958 edac_mode = EDAC_S4ECD4ED;
2959 dev_type = DEV_X4;
2960 } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) {
2961 edac_mode = EDAC_SECDED;
2962 }
2963
2964 for_each_umc(umc) {
2965 for_each_chip_select(cs, umc, pvt) {
2966 if (!csrow_enabled(cs, umc, pvt))
2967 continue;
2968
2969 empty = 0;
2970 dimm = mci->csrows[cs]->channels[umc]->dimm;
2971
2972 edac_dbg(1, "MC node: %d, csrow: %d\n",
2973 pvt->mc_node_id, cs);
2974
2975 dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
2976 dimm->mtype = pvt->dram_type;
2977 dimm->edac_mode = edac_mode;
2978 dimm->dtype = dev_type;
2979 dimm->grain = 64;
2980 }
2981 }
2982
2983 return empty;
2984}
2985
2986/*
2987 * Initialize the array of csrow attribute instances, based on the values
2988 * from pci config hardware registers.
2989 */
2990static int init_csrows(struct mem_ctl_info *mci)
2991{
2992 struct amd64_pvt *pvt = mci->pvt_info;
2993 enum edac_type edac_mode = EDAC_NONE;
2994 struct csrow_info *csrow;
2995 struct dimm_info *dimm;
2996 int i, j, empty = 1;
2997 int nr_pages = 0;
2998 u32 val;
2999
3000 if (pvt->umc)
3001 return init_csrows_df(mci);
3002
3003 amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
3004
3005 pvt->nbcfg = val;
3006
3007 edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
3008 pvt->mc_node_id, val,
3009 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
3010
3011 /*
3012 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
3013 */
3014 for_each_chip_select(i, 0, pvt) {
3015 bool row_dct0 = !!csrow_enabled(i, 0, pvt);
3016 bool row_dct1 = false;
3017
3018 if (pvt->fam != 0xf)
3019 row_dct1 = !!csrow_enabled(i, 1, pvt);
3020
3021 if (!row_dct0 && !row_dct1)
3022 continue;
3023
3024 csrow = mci->csrows[i];
3025 empty = 0;
3026
3027 edac_dbg(1, "MC node: %d, csrow: %d\n",
3028 pvt->mc_node_id, i);
3029
3030 if (row_dct0) {
3031 nr_pages = get_csrow_nr_pages(pvt, 0, i);
3032 csrow->channels[0]->dimm->nr_pages = nr_pages;
3033 }
3034
3035 /* K8 has only one DCT */
3036 if (pvt->fam != 0xf && row_dct1) {
3037 int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i);
3038
3039 csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
3040 nr_pages += row_dct1_pages;
3041 }
3042
3043 edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
3044
3045 /* Determine DIMM ECC mode: */
3046 if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
3047 edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL)
3048 ? EDAC_S4ECD4ED
3049 : EDAC_SECDED;
3050 }
3051
3052 for (j = 0; j < pvt->channel_count; j++) {
3053 dimm = csrow->channels[j]->dimm;
3054 dimm->mtype = pvt->dram_type;
3055 dimm->edac_mode = edac_mode;
3056 dimm->grain = 64;
3057 }
3058 }
3059
3060 return empty;
3061}
3062
3063/* get all cores on this DCT */
3064static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
3065{
3066 int cpu;
3067
3068 for_each_online_cpu(cpu)
3069 if (amd_get_nb_id(cpu) == nid)
3070 cpumask_set_cpu(cpu, mask);
3071}
3072
3073/* check MCG_CTL on all the cpus on this node */
3074static bool nb_mce_bank_enabled_on_node(u16 nid)
3075{
3076 cpumask_var_t mask;
3077 int cpu, nbe;
3078 bool ret = false;
3079
3080 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
3081 amd64_warn("%s: Error allocating mask\n", __func__);
3082 return false;
3083 }
3084
3085 get_cpus_on_this_dct_cpumask(mask, nid);
3086
3087 rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
3088
3089 for_each_cpu(cpu, mask) {
3090 struct msr *reg = per_cpu_ptr(msrs, cpu);
3091 nbe = reg->l & MSR_MCGCTL_NBE;
3092
3093 edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
3094 cpu, reg->q,
3095 (nbe ? "enabled" : "disabled"));
3096
3097 if (!nbe)
3098 goto out;
3099 }
3100 ret = true;
3101
3102out:
3103 free_cpumask_var(mask);
3104 return ret;
3105}
3106
3107static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
3108{
3109 cpumask_var_t cmask;
3110 int cpu;
3111
3112 if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
3113 amd64_warn("%s: error allocating mask\n", __func__);
3114 return -ENOMEM;
3115 }
3116
3117 get_cpus_on_this_dct_cpumask(cmask, nid);
3118
3119 rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
3120
3121 for_each_cpu(cpu, cmask) {
3122
3123 struct msr *reg = per_cpu_ptr(msrs, cpu);
3124
3125 if (on) {
3126 if (reg->l & MSR_MCGCTL_NBE)
3127 s->flags.nb_mce_enable = 1;
3128
3129 reg->l |= MSR_MCGCTL_NBE;
3130 } else {
3131 /*
3132 * Turn off NB MCE reporting only when it was off before
3133 */
3134 if (!s->flags.nb_mce_enable)
3135 reg->l &= ~MSR_MCGCTL_NBE;
3136 }
3137 }
3138 wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
3139
3140 free_cpumask_var(cmask);
3141
3142 return 0;
3143}
3144
3145static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
3146 struct pci_dev *F3)
3147{
3148 bool ret = true;
3149 u32 value, mask = 0x3; /* UECC/CECC enable */
3150
3151 if (toggle_ecc_err_reporting(s, nid, ON)) {
3152 amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
3153 return false;
3154 }
3155
3156 amd64_read_pci_cfg(F3, NBCTL, &value);
3157
3158 s->old_nbctl = value & mask;
3159 s->nbctl_valid = true;
3160
3161 value |= mask;
3162 amd64_write_pci_cfg(F3, NBCTL, value);
3163
3164 amd64_read_pci_cfg(F3, NBCFG, &value);
3165
3166 edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
3167 nid, value, !!(value & NBCFG_ECC_ENABLE));
3168
3169 if (!(value & NBCFG_ECC_ENABLE)) {
3170 amd64_warn("DRAM ECC disabled on this node, enabling...\n");
3171
3172 s->flags.nb_ecc_prev = 0;
3173
3174 /* Attempt to turn on DRAM ECC Enable */
3175 value |= NBCFG_ECC_ENABLE;
3176 amd64_write_pci_cfg(F3, NBCFG, value);
3177
3178 amd64_read_pci_cfg(F3, NBCFG, &value);
3179
3180 if (!(value & NBCFG_ECC_ENABLE)) {
3181 amd64_warn("Hardware rejected DRAM ECC enable,"
3182 "check memory DIMM configuration.\n");
3183 ret = false;
3184 } else {
3185 amd64_info("Hardware accepted DRAM ECC Enable\n");
3186 }
3187 } else {
3188 s->flags.nb_ecc_prev = 1;
3189 }
3190
3191 edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
3192 nid, value, !!(value & NBCFG_ECC_ENABLE));
3193
3194 return ret;
3195}
3196
3197static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
3198 struct pci_dev *F3)
3199{
3200 u32 value, mask = 0x3; /* UECC/CECC enable */
3201
3202 if (!s->nbctl_valid)
3203 return;
3204
3205 amd64_read_pci_cfg(F3, NBCTL, &value);
3206 value &= ~mask;
3207 value |= s->old_nbctl;
3208
3209 amd64_write_pci_cfg(F3, NBCTL, value);
3210
3211 /* restore previous BIOS DRAM ECC "off" setting we force-enabled */
3212 if (!s->flags.nb_ecc_prev) {
3213 amd64_read_pci_cfg(F3, NBCFG, &value);
3214 value &= ~NBCFG_ECC_ENABLE;
3215 amd64_write_pci_cfg(F3, NBCFG, value);
3216 }
3217
3218 /* restore the NB Enable MCGCTL bit */
3219 if (toggle_ecc_err_reporting(s, nid, OFF))
3220 amd64_warn("Error restoring NB MCGCTL settings!\n");
3221}
3222
3223/*
3224 * EDAC requires that the BIOS have ECC enabled before
3225 * taking over the processing of ECC errors. A command line
3226 * option allows to force-enable hardware ECC later in
3227 * enable_ecc_error_reporting().
3228 */
3229static const char *ecc_msg =
3230 "ECC disabled in the BIOS or no ECC capability, module will not load.\n"
3231 " Either enable ECC checking or force module loading by setting "
3232 "'ecc_enable_override'.\n"
3233 " (Note that use of the override may cause unknown side effects.)\n";
3234
3235static bool ecc_enabled(struct pci_dev *F3, u16 nid)
3236{
3237 bool nb_mce_en = false;
3238 u8 ecc_en = 0, i;
3239 u32 value;
3240
3241 if (boot_cpu_data.x86 >= 0x17) {
3242 u8 umc_en_mask = 0, ecc_en_mask = 0;
3243
3244 for_each_umc(i) {
3245 u32 base = get_umc_base(i);
3246
3247 /* Only check enabled UMCs. */
3248 if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value))
3249 continue;
3250
3251 if (!(value & UMC_SDP_INIT))
3252 continue;
3253
3254 umc_en_mask |= BIT(i);
3255
3256 if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value))
3257 continue;
3258
3259 if (value & UMC_ECC_ENABLED)
3260 ecc_en_mask |= BIT(i);
3261 }
3262
3263 /* Check whether at least one UMC is enabled: */
3264 if (umc_en_mask)
3265 ecc_en = umc_en_mask == ecc_en_mask;
3266 else
3267 edac_dbg(0, "Node %d: No enabled UMCs.\n", nid);
3268
3269 /* Assume UMC MCA banks are enabled. */
3270 nb_mce_en = true;
3271 } else {
3272 amd64_read_pci_cfg(F3, NBCFG, &value);
3273
3274 ecc_en = !!(value & NBCFG_ECC_ENABLE);
3275
3276 nb_mce_en = nb_mce_bank_enabled_on_node(nid);
3277 if (!nb_mce_en)
3278 edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
3279 MSR_IA32_MCG_CTL, nid);
3280 }
3281
3282 amd64_info("Node %d: DRAM ECC %s.\n",
3283 nid, (ecc_en ? "enabled" : "disabled"));
3284
3285 if (!ecc_en || !nb_mce_en) {
3286 amd64_info("%s", ecc_msg);
3287 return false;
3288 }
3289 return true;
3290}
3291
3292static inline void
3293f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
3294{
3295 u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1;
3296
3297 for_each_umc(i) {
3298 if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
3299 ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
3300 cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
3301
3302 dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6));
3303 dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7));
3304 }
3305 }
3306
3307 /* Set chipkill only if ECC is enabled: */
3308 if (ecc_en) {
3309 mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
3310
3311 if (!cpk_en)
3312 return;
3313
3314 if (dev_x4)
3315 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3316 else if (dev_x16)
3317 mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED;
3318 else
3319 mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED;
3320 }
3321}
3322
3323static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
3324 struct amd64_family_type *fam)
3325{
3326 struct amd64_pvt *pvt = mci->pvt_info;
3327
3328 mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
3329 mci->edac_ctl_cap = EDAC_FLAG_NONE;
3330
3331 if (pvt->umc) {
3332 f17h_determine_edac_ctl_cap(mci, pvt);
3333 } else {
3334 if (pvt->nbcap & NBCAP_SECDED)
3335 mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
3336
3337 if (pvt->nbcap & NBCAP_CHIPKILL)
3338 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3339 }
3340
3341 mci->edac_cap = determine_edac_cap(pvt);
3342 mci->mod_name = EDAC_MOD_STR;
3343 mci->ctl_name = fam->ctl_name;
3344 mci->dev_name = pci_name(pvt->F3);
3345 mci->ctl_page_to_phys = NULL;
3346
3347 /* memory scrubber interface */
3348 mci->set_sdram_scrub_rate = set_scrub_rate;
3349 mci->get_sdram_scrub_rate = get_scrub_rate;
3350}
3351
3352/*
3353 * returns a pointer to the family descriptor on success, NULL otherwise.
3354 */
3355static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
3356{
3357 struct amd64_family_type *fam_type = NULL;
3358
3359 pvt->ext_model = boot_cpu_data.x86_model >> 4;
3360 pvt->stepping = boot_cpu_data.x86_stepping;
3361 pvt->model = boot_cpu_data.x86_model;
3362 pvt->fam = boot_cpu_data.x86;
3363
3364 switch (pvt->fam) {
3365 case 0xf:
3366 fam_type = &family_types[K8_CPUS];
3367 pvt->ops = &family_types[K8_CPUS].ops;
3368 break;
3369
3370 case 0x10:
3371 fam_type = &family_types[F10_CPUS];
3372 pvt->ops = &family_types[F10_CPUS].ops;
3373 break;
3374
3375 case 0x15:
3376 if (pvt->model == 0x30) {
3377 fam_type = &family_types[F15_M30H_CPUS];
3378 pvt->ops = &family_types[F15_M30H_CPUS].ops;
3379 break;
3380 } else if (pvt->model == 0x60) {
3381 fam_type = &family_types[F15_M60H_CPUS];
3382 pvt->ops = &family_types[F15_M60H_CPUS].ops;
3383 break;
3384 }
3385
3386 fam_type = &family_types[F15_CPUS];
3387 pvt->ops = &family_types[F15_CPUS].ops;
3388 break;
3389
3390 case 0x16:
3391 if (pvt->model == 0x30) {
3392 fam_type = &family_types[F16_M30H_CPUS];
3393 pvt->ops = &family_types[F16_M30H_CPUS].ops;
3394 break;
3395 }
3396 fam_type = &family_types[F16_CPUS];
3397 pvt->ops = &family_types[F16_CPUS].ops;
3398 break;
3399
3400 case 0x17:
3401 if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
3402 fam_type = &family_types[F17_M10H_CPUS];
3403 pvt->ops = &family_types[F17_M10H_CPUS].ops;
3404 break;
3405 } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
3406 fam_type = &family_types[F17_M30H_CPUS];
3407 pvt->ops = &family_types[F17_M30H_CPUS].ops;
3408 break;
3409 } else if (pvt->model >= 0x60 && pvt->model <= 0x6f) {
3410 fam_type = &family_types[F17_M60H_CPUS];
3411 pvt->ops = &family_types[F17_M60H_CPUS].ops;
3412 break;
3413 } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) {
3414 fam_type = &family_types[F17_M70H_CPUS];
3415 pvt->ops = &family_types[F17_M70H_CPUS].ops;
3416 break;
3417 }
3418 /* fall through */
3419 case 0x18:
3420 fam_type = &family_types[F17_CPUS];
3421 pvt->ops = &family_types[F17_CPUS].ops;
3422
3423 if (pvt->fam == 0x18)
3424 family_types[F17_CPUS].ctl_name = "F18h";
3425 break;
3426
3427 default:
3428 amd64_err("Unsupported family!\n");
3429 return NULL;
3430 }
3431
3432 amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
3433 (pvt->fam == 0xf ?
3434 (pvt->ext_model >= K8_REV_F ? "revF or later "
3435 : "revE or earlier ")
3436 : ""), pvt->mc_node_id);
3437 return fam_type;
3438}
3439
3440static const struct attribute_group *amd64_edac_attr_groups[] = {
3441#ifdef CONFIG_EDAC_DEBUG
3442 &amd64_edac_dbg_group,
3443#endif
3444#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
3445 &amd64_edac_inj_group,
3446#endif
3447 NULL
3448};
3449
3450/* Set the number of Unified Memory Controllers in the system. */
3451static void compute_num_umcs(void)
3452{
3453 u8 model = boot_cpu_data.x86_model;
3454
3455 if (boot_cpu_data.x86 < 0x17)
3456 return;
3457
3458 if (model >= 0x30 && model <= 0x3f)
3459 num_umcs = 8;
3460 else
3461 num_umcs = 2;
3462
3463 edac_dbg(1, "Number of UMCs: %x", num_umcs);
3464}
3465
3466static int init_one_instance(unsigned int nid)
3467{
3468 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
3469 struct amd64_family_type *fam_type = NULL;
3470 struct mem_ctl_info *mci = NULL;
3471 struct edac_mc_layer layers[2];
3472 struct amd64_pvt *pvt = NULL;
3473 u16 pci_id1, pci_id2;
3474 int err = 0, ret;
3475
3476 ret = -ENOMEM;
3477 pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
3478 if (!pvt)
3479 goto err_ret;
3480
3481 pvt->mc_node_id = nid;
3482 pvt->F3 = F3;
3483
3484 ret = -EINVAL;
3485 fam_type = per_family_init(pvt);
3486 if (!fam_type)
3487 goto err_free;
3488
3489 if (pvt->fam >= 0x17) {
3490 pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
3491 if (!pvt->umc) {
3492 ret = -ENOMEM;
3493 goto err_free;
3494 }
3495
3496 pci_id1 = fam_type->f0_id;
3497 pci_id2 = fam_type->f6_id;
3498 } else {
3499 pci_id1 = fam_type->f1_id;
3500 pci_id2 = fam_type->f2_id;
3501 }
3502
3503 err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
3504 if (err)
3505 goto err_post_init;
3506
3507 read_mc_regs(pvt);
3508
3509 /*
3510 * We need to determine how many memory channels there are. Then use
3511 * that information for calculating the size of the dynamic instance
3512 * tables in the 'mci' structure.
3513 */
3514 ret = -EINVAL;
3515 pvt->channel_count = pvt->ops->early_channel_count(pvt);
3516 if (pvt->channel_count < 0)
3517 goto err_siblings;
3518
3519 ret = -ENOMEM;
3520 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
3521 layers[0].size = pvt->csels[0].b_cnt;
3522 layers[0].is_virt_csrow = true;
3523 layers[1].type = EDAC_MC_LAYER_CHANNEL;
3524
3525 /*
3526 * Always allocate two channels since we can have setups with DIMMs on
3527 * only one channel. Also, this simplifies handling later for the price
3528 * of a couple of KBs tops.
3529 *
3530 * On Fam17h+, the number of controllers may be greater than two. So set
3531 * the size equal to the maximum number of UMCs.
3532 */
3533 if (pvt->fam >= 0x17)
3534 layers[1].size = num_umcs;
3535 else
3536 layers[1].size = 2;
3537 layers[1].is_virt_csrow = false;
3538
3539 mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
3540 if (!mci)
3541 goto err_siblings;
3542
3543 mci->pvt_info = pvt;
3544 mci->pdev = &pvt->F3->dev;
3545
3546 setup_mci_misc_attrs(mci, fam_type);
3547
3548 if (init_csrows(mci))
3549 mci->edac_cap = EDAC_FLAG_NONE;
3550
3551 ret = -ENODEV;
3552 if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
3553 edac_dbg(1, "failed edac_mc_add_mc()\n");
3554 goto err_add_mc;
3555 }
3556
3557 return 0;
3558
3559err_add_mc:
3560 edac_mc_free(mci);
3561
3562err_siblings:
3563 free_mc_sibling_devs(pvt);
3564
3565err_post_init:
3566 if (pvt->fam >= 0x17)
3567 kfree(pvt->umc);
3568
3569err_free:
3570 kfree(pvt);
3571
3572err_ret:
3573 return ret;
3574}
3575
3576static int probe_one_instance(unsigned int nid)
3577{
3578 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
3579 struct ecc_settings *s;
3580 int ret;
3581
3582 ret = -ENOMEM;
3583 s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
3584 if (!s)
3585 goto err_out;
3586
3587 ecc_stngs[nid] = s;
3588
3589 if (!ecc_enabled(F3, nid)) {
3590 ret = 0;
3591
3592 if (!ecc_enable_override)
3593 goto err_enable;
3594
3595 if (boot_cpu_data.x86 >= 0x17) {
3596 amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
3597 goto err_enable;
3598 } else
3599 amd64_warn("Forcing ECC on!\n");
3600
3601 if (!enable_ecc_error_reporting(s, nid, F3))
3602 goto err_enable;
3603 }
3604
3605 ret = init_one_instance(nid);
3606 if (ret < 0) {
3607 amd64_err("Error probing instance: %d\n", nid);
3608
3609 if (boot_cpu_data.x86 < 0x17)
3610 restore_ecc_error_reporting(s, nid, F3);
3611
3612 goto err_enable;
3613 }
3614
3615 return ret;
3616
3617err_enable:
3618 kfree(s);
3619 ecc_stngs[nid] = NULL;
3620
3621err_out:
3622 return ret;
3623}
3624
3625static void remove_one_instance(unsigned int nid)
3626{
3627 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
3628 struct ecc_settings *s = ecc_stngs[nid];
3629 struct mem_ctl_info *mci;
3630 struct amd64_pvt *pvt;
3631
3632 mci = find_mci_by_dev(&F3->dev);
3633 WARN_ON(!mci);
3634
3635 /* Remove from EDAC CORE tracking list */
3636 mci = edac_mc_del_mc(&F3->dev);
3637 if (!mci)
3638 return;
3639
3640 pvt = mci->pvt_info;
3641
3642 restore_ecc_error_reporting(s, nid, F3);
3643
3644 free_mc_sibling_devs(pvt);
3645
3646 kfree(ecc_stngs[nid]);
3647 ecc_stngs[nid] = NULL;
3648
3649 /* Free the EDAC CORE resources */
3650 mci->pvt_info = NULL;
3651
3652 kfree(pvt);
3653 edac_mc_free(mci);
3654}
3655
3656static void setup_pci_device(void)
3657{
3658 if (pci_ctl)
3659 return;
3660
3661 pci_ctl = edac_pci_create_generic_ctl(pci_ctl_dev, EDAC_MOD_STR);
3662 if (!pci_ctl) {
3663 pr_warn("%s(): Unable to create PCI control\n", __func__);
3664 pr_warn("%s(): PCI error report via EDAC not set\n", __func__);
3665 }
3666}
3667
3668static const struct x86_cpu_id amd64_cpuids[] = {
3669 { X86_VENDOR_AMD, 0xF, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3670 { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3671 { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3672 { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3673 { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3674 { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3675 { }
3676};
3677MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
3678
3679static int __init amd64_edac_init(void)
3680{
3681 const char *owner;
3682 int err = -ENODEV;
3683 int i;
3684
3685 owner = edac_get_owner();
3686 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
3687 return -EBUSY;
3688
3689 if (!x86_match_cpu(amd64_cpuids))
3690 return -ENODEV;
3691
3692 if (amd_cache_northbridges() < 0)
3693 return -ENODEV;
3694
3695 opstate_init();
3696
3697 err = -ENOMEM;
3698 ecc_stngs = kcalloc(amd_nb_num(), sizeof(ecc_stngs[0]), GFP_KERNEL);
3699 if (!ecc_stngs)
3700 goto err_free;
3701
3702 msrs = msrs_alloc();
3703 if (!msrs)
3704 goto err_free;
3705
3706 compute_num_umcs();
3707
3708 for (i = 0; i < amd_nb_num(); i++) {
3709 err = probe_one_instance(i);
3710 if (err) {
3711 /* unwind properly */
3712 while (--i >= 0)
3713 remove_one_instance(i);
3714
3715 goto err_pci;
3716 }
3717 }
3718
3719 if (!edac_has_mcs()) {
3720 err = -ENODEV;
3721 goto err_pci;
3722 }
3723
3724 /* register stuff with EDAC MCE */
3725 if (report_gart_errors)
3726 amd_report_gart_errors(true);
3727
3728 if (boot_cpu_data.x86 >= 0x17)
3729 amd_register_ecc_decoder(decode_umc_error);
3730 else
3731 amd_register_ecc_decoder(decode_bus_error);
3732
3733 setup_pci_device();
3734
3735#ifdef CONFIG_X86_32
3736 amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
3737#endif
3738
3739 printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
3740
3741 return 0;
3742
3743err_pci:
3744 pci_ctl_dev = NULL;
3745
3746 msrs_free(msrs);
3747 msrs = NULL;
3748
3749err_free:
3750 kfree(ecc_stngs);
3751 ecc_stngs = NULL;
3752
3753 return err;
3754}
3755
3756static void __exit amd64_edac_exit(void)
3757{
3758 int i;
3759
3760 if (pci_ctl)
3761 edac_pci_release_generic_ctl(pci_ctl);
3762
3763 /* unregister from EDAC MCE */
3764 amd_report_gart_errors(false);
3765
3766 if (boot_cpu_data.x86 >= 0x17)
3767 amd_unregister_ecc_decoder(decode_umc_error);
3768 else
3769 amd_unregister_ecc_decoder(decode_bus_error);
3770
3771 for (i = 0; i < amd_nb_num(); i++)
3772 remove_one_instance(i);
3773
3774 kfree(ecc_stngs);
3775 ecc_stngs = NULL;
3776
3777 pci_ctl_dev = NULL;
3778
3779 msrs_free(msrs);
3780 msrs = NULL;
3781}
3782
3783module_init(amd64_edac_init);
3784module_exit(amd64_edac_exit);
3785
3786MODULE_LICENSE("GPL");
3787MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3788 "Dave Peterson, Thayne Harbaugh");
3789MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3790 EDAC_AMD64_VERSION);
3791
3792module_param(edac_op_state, int, 0444);
3793MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");