1/* Intel i7 core/Nehalem Memory Controller kernel module
2 *
3 * This driver supports the memory controllers found on the Intel
4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6 * and Westmere-EP.
7 *
8 * This file may be distributed under the terms of the
9 * GNU General Public License version 2 only.
10 *
11 * Copyright (c) 2009-2010 by:
12 *	 Mauro Carvalho Chehab
13 *
14 * Red Hat Inc. http://www.redhat.com
15 *
16 * Forked and adapted from the i5400_edac driver
17 *
18 * Based on the following public Intel datasheets:
19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20 * Datasheet, Volume 2:
21 *	http://download.intel.com/design/processor/datashts/320835.pdf
22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
23 *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24 * also available at:
25 * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26 */
27
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/pci.h>
31#include <linux/pci_ids.h>
32#include <linux/slab.h>
33#include <linux/delay.h>
34#include <linux/dmi.h>
35#include <linux/edac.h>
36#include <linux/mmzone.h>
37#include <linux/smp.h>
38#include <asm/mce.h>
39#include <asm/processor.h>
40#include <asm/div64.h>
41
42#include "edac_core.h"
43
44/* Static vars */
45static LIST_HEAD(i7core_edac_list);
46static DEFINE_MUTEX(i7core_edac_lock);
47static int probed;
48
49static int use_pci_fixup;
50module_param(use_pci_fixup, int, 0444);
51MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52/*
53 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54 * registers start at bus 255, and are not reported by BIOS.
55 * We currently find devices with only 2 sockets. In order to support more QPI
56 * Quick Path Interconnect, just increment this number.
57 */
58#define MAX_SOCKET_BUSES	2
59
60
61/*
62 * Alter this version for the module when modifications are made
63 */
64#define I7CORE_REVISION    " Ver: 1.0.0"
65#define EDAC_MOD_STR      "i7core_edac"
66
67/*
68 * Debug macros
69 */
70#define i7core_printk(level, fmt, arg...)			\
71	edac_printk(level, "i7core", fmt, ##arg)
72
73#define i7core_mc_printk(mci, level, fmt, arg...)		\
74	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75
76/*
77 * i7core Memory Controller Registers
78 */
79
80	/* OFFSETS for Device 0 Function 0 */
81
82#define MC_CFG_CONTROL	0x90
83  #define MC_CFG_UNLOCK		0x02
84  #define MC_CFG_LOCK		0x00
85
86	/* OFFSETS for Device 3 Function 0 */
87
88#define MC_CONTROL	0x48
89#define MC_STATUS	0x4c
90#define MC_MAX_DOD	0x64
91
92/*
93 * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
94 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95 */
96
97#define MC_TEST_ERR_RCV1	0x60
98  #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
99
100#define MC_TEST_ERR_RCV0	0x64
101  #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
102  #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
103
104/* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
105#define MC_SSRCONTROL		0x48
106  #define SSR_MODE_DISABLE	0x00
107  #define SSR_MODE_ENABLE	0x01
108  #define SSR_MODE_MASK		0x03
109
110#define MC_SCRUB_CONTROL	0x4c
111  #define STARTSCRUB		(1 << 24)
112  #define SCRUBINTERVAL_MASK    0xffffff
113
114#define MC_COR_ECC_CNT_0	0x80
115#define MC_COR_ECC_CNT_1	0x84
116#define MC_COR_ECC_CNT_2	0x88
117#define MC_COR_ECC_CNT_3	0x8c
118#define MC_COR_ECC_CNT_4	0x90
119#define MC_COR_ECC_CNT_5	0x94
120
121#define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
122#define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
123
124
125	/* OFFSETS for Devices 4,5 and 6 Function 0 */
126
127#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128  #define THREE_DIMMS_PRESENT		(1 << 24)
129  #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
130  #define QUAD_RANK_PRESENT		(1 << 22)
131  #define REGISTERED_DIMM		(1 << 15)
132
133#define MC_CHANNEL_MAPPER	0x60
134  #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135  #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
136
137#define MC_CHANNEL_RANK_PRESENT 0x7c
138  #define RANK_PRESENT_MASK		0xffff
139
140#define MC_CHANNEL_ADDR_MATCH	0xf0
141#define MC_CHANNEL_ERROR_MASK	0xf8
142#define MC_CHANNEL_ERROR_INJECT	0xfc
143  #define INJECT_ADDR_PARITY	0x10
144  #define INJECT_ECC		0x08
145  #define MASK_CACHELINE	0x06
146  #define MASK_FULL_CACHELINE	0x06
147  #define MASK_MSB32_CACHELINE	0x04
148  #define MASK_LSB32_CACHELINE	0x02
149  #define NO_MASK_CACHELINE	0x00
150  #define REPEAT_EN		0x01
151
152	/* OFFSETS for Devices 4,5 and 6 Function 1 */
153
154#define MC_DOD_CH_DIMM0		0x48
155#define MC_DOD_CH_DIMM1		0x4c
156#define MC_DOD_CH_DIMM2		0x50
157  #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
158  #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
159  #define DIMM_PRESENT_MASK	(1 << 9)
160  #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
161  #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
162  #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
163  #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
164  #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
165  #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
166  #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
167  #define MC_DOD_NUMCOL_MASK		3
168  #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
169
170#define MC_RANK_PRESENT		0x7c
171
172#define MC_SAG_CH_0	0x80
173#define MC_SAG_CH_1	0x84
174#define MC_SAG_CH_2	0x88
175#define MC_SAG_CH_3	0x8c
176#define MC_SAG_CH_4	0x90
177#define MC_SAG_CH_5	0x94
178#define MC_SAG_CH_6	0x98
179#define MC_SAG_CH_7	0x9c
180
181#define MC_RIR_LIMIT_CH_0	0x40
182#define MC_RIR_LIMIT_CH_1	0x44
183#define MC_RIR_LIMIT_CH_2	0x48
184#define MC_RIR_LIMIT_CH_3	0x4C
185#define MC_RIR_LIMIT_CH_4	0x50
186#define MC_RIR_LIMIT_CH_5	0x54
187#define MC_RIR_LIMIT_CH_6	0x58
188#define MC_RIR_LIMIT_CH_7	0x5C
189#define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
190
191#define MC_RIR_WAY_CH		0x80
192  #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
193  #define MC_RIR_WAY_RANK_MASK		0x7
194
195/*
196 * i7core structs
197 */
198
199#define NUM_CHANS 3
200#define MAX_DIMMS 3		/* Max DIMMS per channel */
201#define MAX_MCR_FUNC  4
202#define MAX_CHAN_FUNC 3
203
204struct i7core_info {
205	u32	mc_control;
206	u32	mc_status;
207	u32	max_dod;
208	u32	ch_map;
209};
210
211
212struct i7core_inject {
213	int	enable;
214
215	u32	section;
216	u32	type;
217	u32	eccmask;
218
219	/* Error address mask */
220	int channel, dimm, rank, bank, page, col;
221};
222
223struct i7core_channel {
224	bool		is_3dimms_present;
225	bool		is_single_4rank;
226	bool		has_4rank;
227	u32		dimms;
228};
229
230struct pci_id_descr {
231	int			dev;
232	int			func;
233	int 			dev_id;
234	int			optional;
235};
236
237struct pci_id_table {
238	const struct pci_id_descr	*descr;
239	int				n_devs;
240};
241
242struct i7core_dev {
243	struct list_head	list;
244	u8			socket;
245	struct pci_dev		**pdev;
246	int			n_devs;
247	struct mem_ctl_info	*mci;
248};
249
250struct i7core_pvt {
251	struct device *addrmatch_dev, *chancounts_dev;
252
253	struct pci_dev	*pci_noncore;
254	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
255	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
256
257	struct i7core_dev *i7core_dev;
258
259	struct i7core_info	info;
260	struct i7core_inject	inject;
261	struct i7core_channel	channel[NUM_CHANS];
262
263	int		ce_count_available;
264
265			/* ECC corrected errors counts per udimm */
266	unsigned long	udimm_ce_count[MAX_DIMMS];
267	int		udimm_last_ce_count[MAX_DIMMS];
268			/* ECC corrected errors counts per rdimm */
269	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
270	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
271
272	bool		is_registered, enable_scrub;
273
274	/* Fifo double buffers */
275	struct mce		mce_entry[MCE_LOG_LEN];
276	struct mce		mce_outentry[MCE_LOG_LEN];
277
278	/* Fifo in/out counters */
279	unsigned		mce_in, mce_out;
280
281	/* Count indicator to show errors not got */
282	unsigned		mce_overrun;
283
284	/* DCLK Frequency used for computing scrub rate */
285	int			dclk_freq;
286
287	/* Struct to control EDAC polling */
288	struct edac_pci_ctl_info *i7core_pci;
289};
290
291#define PCI_DESCR(device, function, device_id)	\
292	.dev = (device),			\
293	.func = (function),			\
294	.dev_id = (device_id)
295
296static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
297		/* Memory controller */
298	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
299	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
300			/* Exists only for RDIMM */
301	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
302	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
303
304		/* Channel 0 */
305	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
306	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
307	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
308	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
309
310		/* Channel 1 */
311	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
312	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
313	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
314	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
315
316		/* Channel 2 */
317	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
318	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
319	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
320	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
321
322		/* Generic Non-core registers */
323	/*
324	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
325	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
326	 * the probing code needs to test for the other address in case of
327	 * failure of this one
328	 */
329	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
330
331};
332
333static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
334	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
335	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
336	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
337
338	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
339	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
340	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
341	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
342
343	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
344	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
345	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
346	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
347
348	/*
349	 * This is the PCI device has an alternate address on some
350	 * processors like Core i7 860
351	 */
352	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
353};
354
355static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
356		/* Memory controller */
357	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
358	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
359			/* Exists only for RDIMM */
360	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
361	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
362
363		/* Channel 0 */
364	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
365	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
366	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
367	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
368
369		/* Channel 1 */
370	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
371	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
372	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
373	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
374
375		/* Channel 2 */
376	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
377	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
378	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
379	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
380
381		/* Generic Non-core registers */
382	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
383
384};
385
386#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
387static const struct pci_id_table pci_dev_table[] = {
388	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
389	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
390	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
391	{0,}			/* 0 terminated list. */
392};
393
394/*
395 *	pci_device_id	table for which devices we are looking for
396 */
397static const struct pci_device_id i7core_pci_tbl[] = {
398	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
399	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
400	{0,}			/* 0 terminated list. */
401};
402
403/****************************************************************************
404			Ancillary status routines
405 ****************************************************************************/
406
407	/* MC_CONTROL bits */
408#define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
409#define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
410
411	/* MC_STATUS bits */
412#define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
413#define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
414
415	/* MC_MAX_DOD read functions */
416static inline int numdimms(u32 dimms)
417{
418	return (dimms & 0x3) + 1;
419}
420
421static inline int numrank(u32 rank)
422{
423	static const int ranks[] = { 1, 2, 4, -EINVAL };
424
425	return ranks[rank & 0x3];
426}
427
428static inline int numbank(u32 bank)
429{
430	static const int banks[] = { 4, 8, 16, -EINVAL };
431
432	return banks[bank & 0x3];
433}
434
435static inline int numrow(u32 row)
436{
437	static const int rows[] = {
438		1 << 12, 1 << 13, 1 << 14, 1 << 15,
439		1 << 16, -EINVAL, -EINVAL, -EINVAL,
440	};
441
442	return rows[row & 0x7];
443}
444
445static inline int numcol(u32 col)
446{
447	static const int cols[] = {
448		1 << 10, 1 << 11, 1 << 12, -EINVAL,
449	};
450	return cols[col & 0x3];
451}
452
453static struct i7core_dev *get_i7core_dev(u8 socket)
454{
455	struct i7core_dev *i7core_dev;
456
457	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
458		if (i7core_dev->socket == socket)
459			return i7core_dev;
460	}
461
462	return NULL;
463}
464
465static struct i7core_dev *alloc_i7core_dev(u8 socket,
466					   const struct pci_id_table *table)
467{
468	struct i7core_dev *i7core_dev;
469
470	i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
471	if (!i7core_dev)
472		return NULL;
473
474	i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
475				   GFP_KERNEL);
476	if (!i7core_dev->pdev) {
477		kfree(i7core_dev);
478		return NULL;
479	}
480
481	i7core_dev->socket = socket;
482	i7core_dev->n_devs = table->n_devs;
483	list_add_tail(&i7core_dev->list, &i7core_edac_list);
484
485	return i7core_dev;
486}
487
488static void free_i7core_dev(struct i7core_dev *i7core_dev)
489{
490	list_del(&i7core_dev->list);
491	kfree(i7core_dev->pdev);
492	kfree(i7core_dev);
493}
494
495/****************************************************************************
496			Memory check routines
497 ****************************************************************************/
498
499static int get_dimm_config(struct mem_ctl_info *mci)
500{
501	struct i7core_pvt *pvt = mci->pvt_info;
502	struct pci_dev *pdev;
503	int i, j;
504	enum edac_type mode;
505	enum mem_type mtype;
506	struct dimm_info *dimm;
507
508	/* Get data from the MC register, function 0 */
509	pdev = pvt->pci_mcr[0];
510	if (!pdev)
511		return -ENODEV;
512
513	/* Device 3 function 0 reads */
514	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
515	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
516	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
517	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
518
519	edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
520		 pvt->i7core_dev->socket, pvt->info.mc_control,
521		 pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
522
523	if (ECC_ENABLED(pvt)) {
524		edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
525		if (ECCx8(pvt))
526			mode = EDAC_S8ECD8ED;
527		else
528			mode = EDAC_S4ECD4ED;
529	} else {
530		edac_dbg(0, "ECC disabled\n");
531		mode = EDAC_NONE;
532	}
533
534	/* FIXME: need to handle the error codes */
535	edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
536		 numdimms(pvt->info.max_dod),
537		 numrank(pvt->info.max_dod >> 2),
538		 numbank(pvt->info.max_dod >> 4),
539		 numrow(pvt->info.max_dod >> 6),
540		 numcol(pvt->info.max_dod >> 9));
541
542	for (i = 0; i < NUM_CHANS; i++) {
543		u32 data, dimm_dod[3], value[8];
544
545		if (!pvt->pci_ch[i][0])
546			continue;
547
548		if (!CH_ACTIVE(pvt, i)) {
549			edac_dbg(0, "Channel %i is not active\n", i);
550			continue;
551		}
552		if (CH_DISABLED(pvt, i)) {
553			edac_dbg(0, "Channel %i is disabled\n", i);
554			continue;
555		}
556
557		/* Devices 4-6 function 0 */
558		pci_read_config_dword(pvt->pci_ch[i][0],
559				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
560
561
562		if (data & THREE_DIMMS_PRESENT)
563			pvt->channel[i].is_3dimms_present = true;
564
565		if (data & SINGLE_QUAD_RANK_PRESENT)
566			pvt->channel[i].is_single_4rank = true;
567
568		if (data & QUAD_RANK_PRESENT)
569			pvt->channel[i].has_4rank = true;
570
571		if (data & REGISTERED_DIMM)
572			mtype = MEM_RDDR3;
573		else
574			mtype = MEM_DDR3;
575
576		/* Devices 4-6 function 1 */
577		pci_read_config_dword(pvt->pci_ch[i][1],
578				MC_DOD_CH_DIMM0, &dimm_dod[0]);
579		pci_read_config_dword(pvt->pci_ch[i][1],
580				MC_DOD_CH_DIMM1, &dimm_dod[1]);
581		pci_read_config_dword(pvt->pci_ch[i][1],
582				MC_DOD_CH_DIMM2, &dimm_dod[2]);
583
584		edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
585			 i,
586			 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587			 data,
588			 pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589			 pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590			 pvt->channel[i].has_4rank ? "HAS_4R " : "",
591			 (data & REGISTERED_DIMM) ? 'R' : 'U');
592
593		for (j = 0; j < 3; j++) {
594			u32 banks, ranks, rows, cols;
595			u32 size, npages;
596
597			if (!DIMM_PRESENT(dimm_dod[j]))
598				continue;
599
600			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601				       i, j, 0);
602			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606
607			/* DDR3 has 8 I/O banks */
608			size = (rows * cols * banks * ranks) >> (20 - 3);
609
610			edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
611				 j, size,
612				 RANKOFFSET(dimm_dod[j]),
613				 banks, ranks, rows, cols);
614
615			npages = MiB_TO_PAGES(size);
616
617			dimm->nr_pages = npages;
618
619			switch (banks) {
620			case 4:
621				dimm->dtype = DEV_X4;
622				break;
623			case 8:
624				dimm->dtype = DEV_X8;
625				break;
626			case 16:
627				dimm->dtype = DEV_X16;
628				break;
629			default:
630				dimm->dtype = DEV_UNKNOWN;
631			}
632
633			snprintf(dimm->label, sizeof(dimm->label),
634				 "CPU#%uChannel#%u_DIMM#%u",
635				 pvt->i7core_dev->socket, i, j);
636			dimm->grain = 8;
637			dimm->edac_mode = mode;
638			dimm->mtype = mtype;
639		}
640
641		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
642		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
643		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
644		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
645		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
646		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
647		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
648		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
649		edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
650		for (j = 0; j < 8; j++)
651			edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
652				 (value[j] >> 27) & 0x1,
653				 (value[j] >> 24) & 0x7,
654				 (value[j] & ((1 << 24) - 1)));
655	}
656
657	return 0;
658}
659
660/****************************************************************************
661			Error insertion routines
662 ****************************************************************************/
663
664#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
665
666/* The i7core has independent error injection features per channel.
667   However, to have a simpler code, we don't allow enabling error injection
668   on more than one channel.
669   Also, since a change at an inject parameter will be applied only at enable,
670   we're disabling error injection on all write calls to the sysfs nodes that
671   controls the error code injection.
672 */
673static int disable_inject(const struct mem_ctl_info *mci)
674{
675	struct i7core_pvt *pvt = mci->pvt_info;
676
677	pvt->inject.enable = 0;
678
679	if (!pvt->pci_ch[pvt->inject.channel][0])
680		return -ENODEV;
681
682	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
683				MC_CHANNEL_ERROR_INJECT, 0);
684
685	return 0;
686}
687
688/*
689 * i7core inject inject.section
690 *
691 *	accept and store error injection inject.section value
692 *	bit 0 - refers to the lower 32-byte half cacheline
693 *	bit 1 - refers to the upper 32-byte half cacheline
694 */
695static ssize_t i7core_inject_section_store(struct device *dev,
696					   struct device_attribute *mattr,
697					   const char *data, size_t count)
698{
699	struct mem_ctl_info *mci = to_mci(dev);
700	struct i7core_pvt *pvt = mci->pvt_info;
701	unsigned long value;
702	int rc;
703
704	if (pvt->inject.enable)
705		disable_inject(mci);
706
707	rc = kstrtoul(data, 10, &value);
708	if ((rc < 0) || (value > 3))
709		return -EIO;
710
711	pvt->inject.section = (u32) value;
712	return count;
713}
714
715static ssize_t i7core_inject_section_show(struct device *dev,
716					  struct device_attribute *mattr,
717					  char *data)
718{
719	struct mem_ctl_info *mci = to_mci(dev);
720	struct i7core_pvt *pvt = mci->pvt_info;
721	return sprintf(data, "0x%08x\n", pvt->inject.section);
722}
723
724/*
725 * i7core inject.type
726 *
727 *	accept and store error injection inject.section value
728 *	bit 0 - repeat enable - Enable error repetition
729 *	bit 1 - inject ECC error
730 *	bit 2 - inject parity error
731 */
732static ssize_t i7core_inject_type_store(struct device *dev,
733					struct device_attribute *mattr,
734					const char *data, size_t count)
735{
736	struct mem_ctl_info *mci = to_mci(dev);
737struct i7core_pvt *pvt = mci->pvt_info;
738	unsigned long value;
739	int rc;
740
741	if (pvt->inject.enable)
742		disable_inject(mci);
743
744	rc = kstrtoul(data, 10, &value);
745	if ((rc < 0) || (value > 7))
746		return -EIO;
747
748	pvt->inject.type = (u32) value;
749	return count;
750}
751
752static ssize_t i7core_inject_type_show(struct device *dev,
753				       struct device_attribute *mattr,
754				       char *data)
755{
756	struct mem_ctl_info *mci = to_mci(dev);
757	struct i7core_pvt *pvt = mci->pvt_info;
758
759	return sprintf(data, "0x%08x\n", pvt->inject.type);
760}
761
762/*
763 * i7core_inject_inject.eccmask_store
764 *
765 * The type of error (UE/CE) will depend on the inject.eccmask value:
766 *   Any bits set to a 1 will flip the corresponding ECC bit
767 *   Correctable errors can be injected by flipping 1 bit or the bits within
768 *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
769 *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
770 *   uncorrectable error to be injected.
771 */
772static ssize_t i7core_inject_eccmask_store(struct device *dev,
773					   struct device_attribute *mattr,
774					   const char *data, size_t count)
775{
776	struct mem_ctl_info *mci = to_mci(dev);
777	struct i7core_pvt *pvt = mci->pvt_info;
778	unsigned long value;
779	int rc;
780
781	if (pvt->inject.enable)
782		disable_inject(mci);
783
784	rc = kstrtoul(data, 10, &value);
785	if (rc < 0)
786		return -EIO;
787
788	pvt->inject.eccmask = (u32) value;
789	return count;
790}
791
792static ssize_t i7core_inject_eccmask_show(struct device *dev,
793					  struct device_attribute *mattr,
794					  char *data)
795{
796	struct mem_ctl_info *mci = to_mci(dev);
797	struct i7core_pvt *pvt = mci->pvt_info;
798
799	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
800}
801
802/*
803 * i7core_addrmatch
804 *
805 * The type of error (UE/CE) will depend on the inject.eccmask value:
806 *   Any bits set to a 1 will flip the corresponding ECC bit
807 *   Correctable errors can be injected by flipping 1 bit or the bits within
808 *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
809 *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
810 *   uncorrectable error to be injected.
811 */
812
813#define DECLARE_ADDR_MATCH(param, limit)			\
814static ssize_t i7core_inject_store_##param(			\
815	struct device *dev,					\
816	struct device_attribute *mattr,				\
817	const char *data, size_t count)				\
818{								\
819	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
820	struct i7core_pvt *pvt;					\
821	long value;						\
822	int rc;							\
823								\
824	edac_dbg(1, "\n");					\
825	pvt = mci->pvt_info;					\
826								\
827	if (pvt->inject.enable)					\
828		disable_inject(mci);				\
829								\
830	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
831		value = -1;					\
832	else {							\
833		rc = kstrtoul(data, 10, &value);		\
834		if ((rc < 0) || (value >= limit))		\
835			return -EIO;				\
836	}							\
837								\
838	pvt->inject.param = value;				\
839								\
840	return count;						\
841}								\
842								\
843static ssize_t i7core_inject_show_##param(			\
844	struct device *dev,					\
845	struct device_attribute *mattr,				\
846	char *data)						\
847{								\
848	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
849	struct i7core_pvt *pvt;					\
850								\
851	pvt = mci->pvt_info;					\
852	edac_dbg(1, "pvt=%p\n", pvt);				\
853	if (pvt->inject.param < 0)				\
854		return sprintf(data, "any\n");			\
855	else							\
856		return sprintf(data, "%d\n", pvt->inject.param);\
857}
858
859#define ATTR_ADDR_MATCH(param)					\
860	static DEVICE_ATTR(param, S_IRUGO | S_IWUSR,		\
861		    i7core_inject_show_##param,			\
862		    i7core_inject_store_##param)
863
864DECLARE_ADDR_MATCH(channel, 3);
865DECLARE_ADDR_MATCH(dimm, 3);
866DECLARE_ADDR_MATCH(rank, 4);
867DECLARE_ADDR_MATCH(bank, 32);
868DECLARE_ADDR_MATCH(page, 0x10000);
869DECLARE_ADDR_MATCH(col, 0x4000);
870
871ATTR_ADDR_MATCH(channel);
872ATTR_ADDR_MATCH(dimm);
873ATTR_ADDR_MATCH(rank);
874ATTR_ADDR_MATCH(bank);
875ATTR_ADDR_MATCH(page);
876ATTR_ADDR_MATCH(col);
877
878static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
879{
880	u32 read;
881	int count;
882
883	edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
884		 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885		 where, val);
886
887	for (count = 0; count < 10; count++) {
888		if (count)
889			msleep(100);
890		pci_write_config_dword(dev, where, val);
891		pci_read_config_dword(dev, where, &read);
892
893		if (read == val)
894			return 0;
895	}
896
897	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
898		"write=%08x. Read=%08x\n",
899		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
900		where, val, read);
901
902	return -EINVAL;
903}
904
905/*
906 * This routine prepares the Memory Controller for error injection.
907 * The error will be injected when some process tries to write to the
908 * memory that matches the given criteria.
909 * The criteria can be set in terms of a mask where dimm, rank, bank, page
910 * and col can be specified.
911 * A -1 value for any of the mask items will make the MCU to ignore
912 * that matching criteria for error injection.
913 *
914 * It should be noticed that the error will only happen after a write operation
915 * on a memory that matches the condition. if REPEAT_EN is not enabled at
916 * inject mask, then it will produce just one error. Otherwise, it will repeat
917 * until the injectmask would be cleaned.
918 *
919 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
920 *    is reliable enough to check if the MC is using the
921 *    three channels. However, this is not clear at the datasheet.
922 */
923static ssize_t i7core_inject_enable_store(struct device *dev,
924					  struct device_attribute *mattr,
925					  const char *data, size_t count)
926{
927	struct mem_ctl_info *mci = to_mci(dev);
928	struct i7core_pvt *pvt = mci->pvt_info;
929	u32 injectmask;
930	u64 mask = 0;
931	int  rc;
932	long enable;
933
934	if (!pvt->pci_ch[pvt->inject.channel][0])
935		return 0;
936
937	rc = kstrtoul(data, 10, &enable);
938	if ((rc < 0))
939		return 0;
940
941	if (enable) {
942		pvt->inject.enable = 1;
943	} else {
944		disable_inject(mci);
945		return count;
946	}
947
948	/* Sets pvt->inject.dimm mask */
949	if (pvt->inject.dimm < 0)
950		mask |= 1LL << 41;
951	else {
952		if (pvt->channel[pvt->inject.channel].dimms > 2)
953			mask |= (pvt->inject.dimm & 0x3LL) << 35;
954		else
955			mask |= (pvt->inject.dimm & 0x1LL) << 36;
956	}
957
958	/* Sets pvt->inject.rank mask */
959	if (pvt->inject.rank < 0)
960		mask |= 1LL << 40;
961	else {
962		if (pvt->channel[pvt->inject.channel].dimms > 2)
963			mask |= (pvt->inject.rank & 0x1LL) << 34;
964		else
965			mask |= (pvt->inject.rank & 0x3LL) << 34;
966	}
967
968	/* Sets pvt->inject.bank mask */
969	if (pvt->inject.bank < 0)
970		mask |= 1LL << 39;
971	else
972		mask |= (pvt->inject.bank & 0x15LL) << 30;
973
974	/* Sets pvt->inject.page mask */
975	if (pvt->inject.page < 0)
976		mask |= 1LL << 38;
977	else
978		mask |= (pvt->inject.page & 0xffff) << 14;
979
980	/* Sets pvt->inject.column mask */
981	if (pvt->inject.col < 0)
982		mask |= 1LL << 37;
983	else
984		mask |= (pvt->inject.col & 0x3fff);
985
986	/*
987	 * bit    0: REPEAT_EN
988	 * bits 1-2: MASK_HALF_CACHELINE
989	 * bit    3: INJECT_ECC
990	 * bit    4: INJECT_ADDR_PARITY
991	 */
992
993	injectmask = (pvt->inject.type & 1) |
994		     (pvt->inject.section & 0x3) << 1 |
995		     (pvt->inject.type & 0x6) << (3 - 1);
996
997	/* Unlock writes to registers - this register is write only */
998	pci_write_config_dword(pvt->pci_noncore,
999			       MC_CFG_CONTROL, 0x2);
1000
1001	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1002			       MC_CHANNEL_ADDR_MATCH, mask);
1003	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1004			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1005
1006	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1007			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1008
1009	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1010			       MC_CHANNEL_ERROR_INJECT, injectmask);
1011
1012	/*
1013	 * This is something undocumented, based on my tests
1014	 * Without writing 8 to this register, errors aren't injected. Not sure
1015	 * why.
1016	 */
1017	pci_write_config_dword(pvt->pci_noncore,
1018			       MC_CFG_CONTROL, 8);
1019
1020	edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1021		 mask, pvt->inject.eccmask, injectmask);
1022
1023
1024	return count;
1025}
1026
1027static ssize_t i7core_inject_enable_show(struct device *dev,
1028					 struct device_attribute *mattr,
1029					 char *data)
1030{
1031	struct mem_ctl_info *mci = to_mci(dev);
1032	struct i7core_pvt *pvt = mci->pvt_info;
1033	u32 injectmask;
1034
1035	if (!pvt->pci_ch[pvt->inject.channel][0])
1036		return 0;
1037
1038	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1039			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1040
1041	edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1042
1043	if (injectmask & 0x0c)
1044		pvt->inject.enable = 1;
1045
1046	return sprintf(data, "%d\n", pvt->inject.enable);
1047}
1048
1049#define DECLARE_COUNTER(param)					\
1050static ssize_t i7core_show_counter_##param(			\
1051	struct device *dev,					\
1052	struct device_attribute *mattr,				\
1053	char *data)						\
1054{								\
1055	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
1056	struct i7core_pvt *pvt = mci->pvt_info;			\
1057								\
1058	edac_dbg(1, "\n");					\
1059	if (!pvt->ce_count_available || (pvt->is_registered))	\
1060		return sprintf(data, "data unavailable\n");	\
1061	return sprintf(data, "%lu\n",				\
1062			pvt->udimm_ce_count[param]);		\
1063}
1064
1065#define ATTR_COUNTER(param)					\
1066	static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR,	\
1067		    i7core_show_counter_##param,		\
1068		    NULL)
1069
1070DECLARE_COUNTER(0);
1071DECLARE_COUNTER(1);
1072DECLARE_COUNTER(2);
1073
1074ATTR_COUNTER(0);
1075ATTR_COUNTER(1);
1076ATTR_COUNTER(2);
1077
1078/*
1079 * inject_addrmatch device sysfs struct
1080 */
1081
1082static struct attribute *i7core_addrmatch_attrs[] = {
1083	&dev_attr_channel.attr,
1084	&dev_attr_dimm.attr,
1085	&dev_attr_rank.attr,
1086	&dev_attr_bank.attr,
1087	&dev_attr_page.attr,
1088	&dev_attr_col.attr,
1089	NULL
1090};
1091
1092static struct attribute_group addrmatch_grp = {
1093	.attrs	= i7core_addrmatch_attrs,
1094};
1095
1096static const struct attribute_group *addrmatch_groups[] = {
1097	&addrmatch_grp,
1098	NULL
1099};
1100
1101static void addrmatch_release(struct device *device)
1102{
1103	edac_dbg(1, "Releasing device %s\n", dev_name(device));
1104	kfree(device);
1105}
1106
1107static struct device_type addrmatch_type = {
1108	.groups		= addrmatch_groups,
1109	.release	= addrmatch_release,
1110};
1111
1112/*
1113 * all_channel_counts sysfs struct
1114 */
1115
1116static struct attribute *i7core_udimm_counters_attrs[] = {
1117	&dev_attr_udimm0.attr,
1118	&dev_attr_udimm1.attr,
1119	&dev_attr_udimm2.attr,
1120	NULL
1121};
1122
1123static struct attribute_group all_channel_counts_grp = {
1124	.attrs	= i7core_udimm_counters_attrs,
1125};
1126
1127static const struct attribute_group *all_channel_counts_groups[] = {
1128	&all_channel_counts_grp,
1129	NULL
1130};
1131
1132static void all_channel_counts_release(struct device *device)
1133{
1134	edac_dbg(1, "Releasing device %s\n", dev_name(device));
1135	kfree(device);
1136}
1137
1138static struct device_type all_channel_counts_type = {
1139	.groups		= all_channel_counts_groups,
1140	.release	= all_channel_counts_release,
1141};
1142
1143/*
1144 * inject sysfs attributes
1145 */
1146
1147static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1148		   i7core_inject_section_show, i7core_inject_section_store);
1149
1150static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1151		   i7core_inject_type_show, i7core_inject_type_store);
1152
1153
1154static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1155		   i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1156
1157static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1158		   i7core_inject_enable_show, i7core_inject_enable_store);
1159
1160static struct attribute *i7core_dev_attrs[] = {
1161	&dev_attr_inject_section.attr,
1162	&dev_attr_inject_type.attr,
1163	&dev_attr_inject_eccmask.attr,
1164	&dev_attr_inject_enable.attr,
1165	NULL
1166};
1167
1168ATTRIBUTE_GROUPS(i7core_dev);
1169
1170static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1171{
1172	struct i7core_pvt *pvt = mci->pvt_info;
1173	int rc;
1174
1175	pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1176	if (!pvt->addrmatch_dev)
1177		return -ENOMEM;
1178
1179	pvt->addrmatch_dev->type = &addrmatch_type;
1180	pvt->addrmatch_dev->bus = mci->dev.bus;
1181	device_initialize(pvt->addrmatch_dev);
1182	pvt->addrmatch_dev->parent = &mci->dev;
1183	dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1184	dev_set_drvdata(pvt->addrmatch_dev, mci);
1185
1186	edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1187
1188	rc = device_add(pvt->addrmatch_dev);
1189	if (rc < 0)
1190		return rc;
1191
1192	if (!pvt->is_registered) {
1193		pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1194					      GFP_KERNEL);
1195		if (!pvt->chancounts_dev) {
1196			put_device(pvt->addrmatch_dev);
1197			device_del(pvt->addrmatch_dev);
1198			return -ENOMEM;
1199		}
1200
1201		pvt->chancounts_dev->type = &all_channel_counts_type;
1202		pvt->chancounts_dev->bus = mci->dev.bus;
1203		device_initialize(pvt->chancounts_dev);
1204		pvt->chancounts_dev->parent = &mci->dev;
1205		dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1206		dev_set_drvdata(pvt->chancounts_dev, mci);
1207
1208		edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1209
1210		rc = device_add(pvt->chancounts_dev);
1211		if (rc < 0)
1212			return rc;
1213	}
1214	return 0;
1215}
1216
1217static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1218{
1219	struct i7core_pvt *pvt = mci->pvt_info;
1220
1221	edac_dbg(1, "\n");
1222
1223	if (!pvt->is_registered) {
1224		put_device(pvt->chancounts_dev);
1225		device_del(pvt->chancounts_dev);
1226	}
1227	put_device(pvt->addrmatch_dev);
1228	device_del(pvt->addrmatch_dev);
1229}
1230
1231/****************************************************************************
1232	Device initialization routines: put/get, init/exit
1233 ****************************************************************************/
1234
1235/*
1236 *	i7core_put_all_devices	'put' all the devices that we have
1237 *				reserved via 'get'
1238 */
1239static void i7core_put_devices(struct i7core_dev *i7core_dev)
1240{
1241	int i;
1242
1243	edac_dbg(0, "\n");
1244	for (i = 0; i < i7core_dev->n_devs; i++) {
1245		struct pci_dev *pdev = i7core_dev->pdev[i];
1246		if (!pdev)
1247			continue;
1248		edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1249			 pdev->bus->number,
1250			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1251		pci_dev_put(pdev);
1252	}
1253}
1254
1255static void i7core_put_all_devices(void)
1256{
1257	struct i7core_dev *i7core_dev, *tmp;
1258
1259	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1260		i7core_put_devices(i7core_dev);
1261		free_i7core_dev(i7core_dev);
1262	}
1263}
1264
1265static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1266{
1267	struct pci_dev *pdev = NULL;
1268	int i;
1269
1270	/*
1271	 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1272	 * aren't announced by acpi. So, we need to use a legacy scan probing
1273	 * to detect them
1274	 */
1275	while (table && table->descr) {
1276		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1277		if (unlikely(!pdev)) {
1278			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1279				pcibios_scan_specific_bus(255-i);
1280		}
1281		pci_dev_put(pdev);
1282		table++;
1283	}
1284}
1285
1286static unsigned i7core_pci_lastbus(void)
1287{
1288	int last_bus = 0, bus;
1289	struct pci_bus *b = NULL;
1290
1291	while ((b = pci_find_next_bus(b)) != NULL) {
1292		bus = b->number;
1293		edac_dbg(0, "Found bus %d\n", bus);
1294		if (bus > last_bus)
1295			last_bus = bus;
1296	}
1297
1298	edac_dbg(0, "Last bus %d\n", last_bus);
1299
1300	return last_bus;
1301}
1302
1303/*
1304 *	i7core_get_all_devices	Find and perform 'get' operation on the MCH's
1305 *			device/functions we want to reference for this driver
1306 *
1307 *			Need to 'get' device 16 func 1 and func 2
1308 */
1309static int i7core_get_onedevice(struct pci_dev **prev,
1310				const struct pci_id_table *table,
1311				const unsigned devno,
1312				const unsigned last_bus)
1313{
1314	struct i7core_dev *i7core_dev;
1315	const struct pci_id_descr *dev_descr = &table->descr[devno];
1316
1317	struct pci_dev *pdev = NULL;
1318	u8 bus = 0;
1319	u8 socket = 0;
1320
1321	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1322			      dev_descr->dev_id, *prev);
1323
1324	/*
1325	 * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1326	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1327	 * to probe for the alternate address in case of failure
1328	 */
1329	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
1330		pci_dev_get(*prev);	/* pci_get_device will put it */
1331		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1332				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1333	}
1334
1335	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
1336	    !pdev) {
1337		pci_dev_get(*prev);	/* pci_get_device will put it */
1338		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1339				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1340				      *prev);
1341	}
1342
1343	if (!pdev) {
1344		if (*prev) {
1345			*prev = pdev;
1346			return 0;
1347		}
1348
1349		if (dev_descr->optional)
1350			return 0;
1351
1352		if (devno == 0)
1353			return -ENODEV;
1354
1355		i7core_printk(KERN_INFO,
1356			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1357			dev_descr->dev, dev_descr->func,
1358			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1359
1360		/* End of list, leave */
1361		return -ENODEV;
1362	}
1363	bus = pdev->bus->number;
1364
1365	socket = last_bus - bus;
1366
1367	i7core_dev = get_i7core_dev(socket);
1368	if (!i7core_dev) {
1369		i7core_dev = alloc_i7core_dev(socket, table);
1370		if (!i7core_dev) {
1371			pci_dev_put(pdev);
1372			return -ENOMEM;
1373		}
1374	}
1375
1376	if (i7core_dev->pdev[devno]) {
1377		i7core_printk(KERN_ERR,
1378			"Duplicated device for "
1379			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1380			bus, dev_descr->dev, dev_descr->func,
1381			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1382		pci_dev_put(pdev);
1383		return -ENODEV;
1384	}
1385
1386	i7core_dev->pdev[devno] = pdev;
1387
1388	/* Sanity check */
1389	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1390			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1391		i7core_printk(KERN_ERR,
1392			"Device PCI ID %04x:%04x "
1393			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1394			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1395			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1396			bus, dev_descr->dev, dev_descr->func);
1397		return -ENODEV;
1398	}
1399
1400	/* Be sure that the device is enabled */
1401	if (unlikely(pci_enable_device(pdev) < 0)) {
1402		i7core_printk(KERN_ERR,
1403			"Couldn't enable "
1404			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1405			bus, dev_descr->dev, dev_descr->func,
1406			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1407		return -ENODEV;
1408	}
1409
1410	edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1411		 socket, bus, dev_descr->dev,
1412		 dev_descr->func,
1413		 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1414
1415	/*
1416	 * As stated on drivers/pci/search.c, the reference count for
1417	 * @from is always decremented if it is not %NULL. So, as we need
1418	 * to get all devices up to null, we need to do a get for the device
1419	 */
1420	pci_dev_get(pdev);
1421
1422	*prev = pdev;
1423
1424	return 0;
1425}
1426
1427static int i7core_get_all_devices(void)
1428{
1429	int i, rc, last_bus;
1430	struct pci_dev *pdev = NULL;
1431	const struct pci_id_table *table = pci_dev_table;
1432
1433	last_bus = i7core_pci_lastbus();
1434
1435	while (table && table->descr) {
1436		for (i = 0; i < table->n_devs; i++) {
1437			pdev = NULL;
1438			do {
1439				rc = i7core_get_onedevice(&pdev, table, i,
1440							  last_bus);
1441				if (rc < 0) {
1442					if (i == 0) {
1443						i = table->n_devs;
1444						break;
1445					}
1446					i7core_put_all_devices();
1447					return -ENODEV;
1448				}
1449			} while (pdev);
1450		}
1451		table++;
1452	}
1453
1454	return 0;
1455}
1456
1457static int mci_bind_devs(struct mem_ctl_info *mci,
1458			 struct i7core_dev *i7core_dev)
1459{
1460	struct i7core_pvt *pvt = mci->pvt_info;
1461	struct pci_dev *pdev;
1462	int i, func, slot;
1463	char *family;
1464
1465	pvt->is_registered = false;
1466	pvt->enable_scrub  = false;
1467	for (i = 0; i < i7core_dev->n_devs; i++) {
1468		pdev = i7core_dev->pdev[i];
1469		if (!pdev)
1470			continue;
1471
1472		func = PCI_FUNC(pdev->devfn);
1473		slot = PCI_SLOT(pdev->devfn);
1474		if (slot == 3) {
1475			if (unlikely(func > MAX_MCR_FUNC))
1476				goto error;
1477			pvt->pci_mcr[func] = pdev;
1478		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1479			if (unlikely(func > MAX_CHAN_FUNC))
1480				goto error;
1481			pvt->pci_ch[slot - 4][func] = pdev;
1482		} else if (!slot && !func) {
1483			pvt->pci_noncore = pdev;
1484
1485			/* Detect the processor family */
1486			switch (pdev->device) {
1487			case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1488				family = "Xeon 35xx/ i7core";
1489				pvt->enable_scrub = false;
1490				break;
1491			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1492				family = "i7-800/i5-700";
1493				pvt->enable_scrub = false;
1494				break;
1495			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1496				family = "Xeon 34xx";
1497				pvt->enable_scrub = false;
1498				break;
1499			case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1500				family = "Xeon 55xx";
1501				pvt->enable_scrub = true;
1502				break;
1503			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1504				family = "Xeon 56xx / i7-900";
1505				pvt->enable_scrub = true;
1506				break;
1507			default:
1508				family = "unknown";
1509				pvt->enable_scrub = false;
1510			}
1511			edac_dbg(0, "Detected a processor type %s\n", family);
1512		} else
1513			goto error;
1514
1515		edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1516			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1517			 pdev, i7core_dev->socket);
1518
1519		if (PCI_SLOT(pdev->devfn) == 3 &&
1520			PCI_FUNC(pdev->devfn) == 2)
1521			pvt->is_registered = true;
1522	}
1523
1524	return 0;
1525
1526error:
1527	i7core_printk(KERN_ERR, "Device %d, function %d "
1528		      "is out of the expected range\n",
1529		      slot, func);
1530	return -EINVAL;
1531}
1532
1533/****************************************************************************
1534			Error check routines
1535 ****************************************************************************/
1536
1537static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1538					 const int chan,
1539					 const int new0,
1540					 const int new1,
1541					 const int new2)
1542{
1543	struct i7core_pvt *pvt = mci->pvt_info;
1544	int add0 = 0, add1 = 0, add2 = 0;
1545	/* Updates CE counters if it is not the first time here */
1546	if (pvt->ce_count_available) {
1547		/* Updates CE counters */
1548
1549		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1550		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1551		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1552
1553		if (add2 < 0)
1554			add2 += 0x7fff;
1555		pvt->rdimm_ce_count[chan][2] += add2;
1556
1557		if (add1 < 0)
1558			add1 += 0x7fff;
1559		pvt->rdimm_ce_count[chan][1] += add1;
1560
1561		if (add0 < 0)
1562			add0 += 0x7fff;
1563		pvt->rdimm_ce_count[chan][0] += add0;
1564	} else
1565		pvt->ce_count_available = 1;
1566
1567	/* Store the new values */
1568	pvt->rdimm_last_ce_count[chan][2] = new2;
1569	pvt->rdimm_last_ce_count[chan][1] = new1;
1570	pvt->rdimm_last_ce_count[chan][0] = new0;
1571
1572	/*updated the edac core */
1573	if (add0 != 0)
1574		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
1575				     0, 0, 0,
1576				     chan, 0, -1, "error", "");
1577	if (add1 != 0)
1578		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
1579				     0, 0, 0,
1580				     chan, 1, -1, "error", "");
1581	if (add2 != 0)
1582		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
1583				     0, 0, 0,
1584				     chan, 2, -1, "error", "");
1585}
1586
1587static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1588{
1589	struct i7core_pvt *pvt = mci->pvt_info;
1590	u32 rcv[3][2];
1591	int i, new0, new1, new2;
1592
1593	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1594	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1595								&rcv[0][0]);
1596	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1597								&rcv[0][1]);
1598	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1599								&rcv[1][0]);
1600	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1601								&rcv[1][1]);
1602	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1603								&rcv[2][0]);
1604	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1605								&rcv[2][1]);
1606	for (i = 0 ; i < 3; i++) {
1607		edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1608			 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1609		/*if the channel has 3 dimms*/
1610		if (pvt->channel[i].dimms > 2) {
1611			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1612			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1613			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1614		} else {
1615			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1616					DIMM_BOT_COR_ERR(rcv[i][0]);
1617			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1618					DIMM_BOT_COR_ERR(rcv[i][1]);
1619			new2 = 0;
1620		}
1621
1622		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1623	}
1624}
1625
1626/* This function is based on the device 3 function 4 registers as described on:
1627 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1628 *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1629 * also available at:
1630 * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1631 */
1632static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1633{
1634	struct i7core_pvt *pvt = mci->pvt_info;
1635	u32 rcv1, rcv0;
1636	int new0, new1, new2;
1637
1638	if (!pvt->pci_mcr[4]) {
1639		edac_dbg(0, "MCR registers not found\n");
1640		return;
1641	}
1642
1643	/* Corrected test errors */
1644	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1645	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1646
1647	/* Store the new values */
1648	new2 = DIMM2_COR_ERR(rcv1);
1649	new1 = DIMM1_COR_ERR(rcv0);
1650	new0 = DIMM0_COR_ERR(rcv0);
1651
1652	/* Updates CE counters if it is not the first time here */
1653	if (pvt->ce_count_available) {
1654		/* Updates CE counters */
1655		int add0, add1, add2;
1656
1657		add2 = new2 - pvt->udimm_last_ce_count[2];
1658		add1 = new1 - pvt->udimm_last_ce_count[1];
1659		add0 = new0 - pvt->udimm_last_ce_count[0];
1660
1661		if (add2 < 0)
1662			add2 += 0x7fff;
1663		pvt->udimm_ce_count[2] += add2;
1664
1665		if (add1 < 0)
1666			add1 += 0x7fff;
1667		pvt->udimm_ce_count[1] += add1;
1668
1669		if (add0 < 0)
1670			add0 += 0x7fff;
1671		pvt->udimm_ce_count[0] += add0;
1672
1673		if (add0 | add1 | add2)
1674			i7core_printk(KERN_ERR, "New Corrected error(s): "
1675				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1676				      add0, add1, add2);
1677	} else
1678		pvt->ce_count_available = 1;
1679
1680	/* Store the new values */
1681	pvt->udimm_last_ce_count[2] = new2;
1682	pvt->udimm_last_ce_count[1] = new1;
1683	pvt->udimm_last_ce_count[0] = new0;
1684}
1685
1686/*
1687 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1688 * Architectures Software Developer’s Manual Volume 3B.
1689 * Nehalem are defined as family 0x06, model 0x1a
1690 *
1691 * The MCA registers used here are the following ones:
1692 *     struct mce field	MCA Register
1693 *     m->status	MSR_IA32_MC8_STATUS
1694 *     m->addr		MSR_IA32_MC8_ADDR
1695 *     m->misc		MSR_IA32_MC8_MISC
1696 * In the case of Nehalem, the error information is masked at .status and .misc
1697 * fields
1698 */
1699static void i7core_mce_output_error(struct mem_ctl_info *mci,
1700				    const struct mce *m)
1701{
1702	struct i7core_pvt *pvt = mci->pvt_info;
1703	char *optype, *err;
1704	enum hw_event_mc_err_type tp_event;
1705	unsigned long error = m->status & 0x1ff0000l;
1706	bool uncorrected_error = m->mcgstatus & 1ll << 61;
1707	bool ripv = m->mcgstatus & 1;
1708	u32 optypenum = (m->status >> 4) & 0x07;
1709	u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1710	u32 dimm = (m->misc >> 16) & 0x3;
1711	u32 channel = (m->misc >> 18) & 0x3;
1712	u32 syndrome = m->misc >> 32;
1713	u32 errnum = find_first_bit(&error, 32);
1714
1715	if (uncorrected_error) {
1716		if (ripv)
1717			tp_event = HW_EVENT_ERR_FATAL;
1718		else
1719			tp_event = HW_EVENT_ERR_UNCORRECTED;
1720	} else {
1721		tp_event = HW_EVENT_ERR_CORRECTED;
1722	}
1723
1724	switch (optypenum) {
1725	case 0:
1726		optype = "generic undef request";
1727		break;
1728	case 1:
1729		optype = "read error";
1730		break;
1731	case 2:
1732		optype = "write error";
1733		break;
1734	case 3:
1735		optype = "addr/cmd error";
1736		break;
1737	case 4:
1738		optype = "scrubbing error";
1739		break;
1740	default:
1741		optype = "reserved";
1742		break;
1743	}
1744
1745	switch (errnum) {
1746	case 16:
1747		err = "read ECC error";
1748		break;
1749	case 17:
1750		err = "RAS ECC error";
1751		break;
1752	case 18:
1753		err = "write parity error";
1754		break;
1755	case 19:
1756		err = "redundacy loss";
1757		break;
1758	case 20:
1759		err = "reserved";
1760		break;
1761	case 21:
1762		err = "memory range error";
1763		break;
1764	case 22:
1765		err = "RTID out of range";
1766		break;
1767	case 23:
1768		err = "address parity error";
1769		break;
1770	case 24:
1771		err = "byte enable parity error";
1772		break;
1773	default:
1774		err = "unknown";
1775	}
1776
1777	/*
1778	 * Call the helper to output message
1779	 * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1780	 * only one event
1781	 */
1782	if (uncorrected_error || !pvt->is_registered)
1783		edac_mc_handle_error(tp_event, mci, core_err_cnt,
1784				     m->addr >> PAGE_SHIFT,
1785				     m->addr & ~PAGE_MASK,
1786				     syndrome,
1787				     channel, dimm, -1,
1788				     err, optype);
1789}
1790
1791/*
1792 *	i7core_check_error	Retrieve and process errors reported by the
1793 *				hardware. Called by the Core module.
1794 */
1795static void i7core_check_error(struct mem_ctl_info *mci)
1796{
1797	struct i7core_pvt *pvt = mci->pvt_info;
1798	int i;
1799	unsigned count = 0;
1800	struct mce *m;
1801
1802	/*
1803	 * MCE first step: Copy all mce errors into a temporary buffer
1804	 * We use a double buffering here, to reduce the risk of
1805	 * losing an error.
1806	 */
1807	smp_rmb();
1808	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1809		% MCE_LOG_LEN;
1810	if (!count)
1811		goto check_ce_error;
1812
1813	m = pvt->mce_outentry;
1814	if (pvt->mce_in + count > MCE_LOG_LEN) {
1815		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1816
1817		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1818		smp_wmb();
1819		pvt->mce_in = 0;
1820		count -= l;
1821		m += l;
1822	}
1823	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1824	smp_wmb();
1825	pvt->mce_in += count;
1826
1827	smp_rmb();
1828	if (pvt->mce_overrun) {
1829		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1830			      pvt->mce_overrun);
1831		smp_wmb();
1832		pvt->mce_overrun = 0;
1833	}
1834
1835	/*
1836	 * MCE second step: parse errors and display
1837	 */
1838	for (i = 0; i < count; i++)
1839		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1840
1841	/*
1842	 * Now, let's increment CE error counts
1843	 */
1844check_ce_error:
1845	if (!pvt->is_registered)
1846		i7core_udimm_check_mc_ecc_err(mci);
1847	else
1848		i7core_rdimm_check_mc_ecc_err(mci);
1849}
1850
1851/*
1852 * i7core_mce_check_error	Replicates mcelog routine to get errors
1853 *				This routine simply queues mcelog errors, and
1854 *				return. The error itself should be handled later
1855 *				by i7core_check_error.
1856 * WARNING: As this routine should be called at NMI time, extra care should
1857 * be taken to avoid deadlocks, and to be as fast as possible.
1858 */
1859static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1860				  void *data)
1861{
1862	struct mce *mce = (struct mce *)data;
1863	struct i7core_dev *i7_dev;
1864	struct mem_ctl_info *mci;
1865	struct i7core_pvt *pvt;
1866
1867	i7_dev = get_i7core_dev(mce->socketid);
1868	if (!i7_dev)
1869		return NOTIFY_DONE;
1870
1871	mci = i7_dev->mci;
1872	pvt = mci->pvt_info;
1873
1874	/*
1875	 * Just let mcelog handle it if the error is
1876	 * outside the memory controller
1877	 */
1878	if (((mce->status & 0xffff) >> 7) != 1)
1879		return NOTIFY_DONE;
1880
1881	/* Bank 8 registers are the only ones that we know how to handle */
1882	if (mce->bank != 8)
1883		return NOTIFY_DONE;
1884
1885	smp_rmb();
1886	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1887		smp_wmb();
1888		pvt->mce_overrun++;
1889		return NOTIFY_DONE;
1890	}
1891
1892	/* Copy memory error at the ringbuffer */
1893	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1894	smp_wmb();
1895	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1896
1897	/* Handle fatal errors immediately */
1898	if (mce->mcgstatus & 1)
1899		i7core_check_error(mci);
1900
1901	/* Advise mcelog that the errors were handled */
1902	return NOTIFY_STOP;
1903}
1904
1905static struct notifier_block i7_mce_dec = {
1906	.notifier_call	= i7core_mce_check_error,
1907};
1908
1909struct memdev_dmi_entry {
1910	u8 type;
1911	u8 length;
1912	u16 handle;
1913	u16 phys_mem_array_handle;
1914	u16 mem_err_info_handle;
1915	u16 total_width;
1916	u16 data_width;
1917	u16 size;
1918	u8 form;
1919	u8 device_set;
1920	u8 device_locator;
1921	u8 bank_locator;
1922	u8 memory_type;
1923	u16 type_detail;
1924	u16 speed;
1925	u8 manufacturer;
1926	u8 serial_number;
1927	u8 asset_tag;
1928	u8 part_number;
1929	u8 attributes;
1930	u32 extended_size;
1931	u16 conf_mem_clk_speed;
1932} __attribute__((__packed__));
1933
1934
1935/*
1936 * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1937 * memory devices show the same speed, and if they don't then consider
1938 * all speeds to be invalid.
1939 */
1940static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1941{
1942	int *dclk_freq = _dclk_freq;
1943	u16 dmi_mem_clk_speed;
1944
1945	if (*dclk_freq == -1)
1946		return;
1947
1948	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1949		struct memdev_dmi_entry *memdev_dmi_entry =
1950			(struct memdev_dmi_entry *)dh;
1951		unsigned long conf_mem_clk_speed_offset =
1952			(unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1953			(unsigned long)&memdev_dmi_entry->type;
1954		unsigned long speed_offset =
1955			(unsigned long)&memdev_dmi_entry->speed -
1956			(unsigned long)&memdev_dmi_entry->type;
1957
1958		/* Check that a DIMM is present */
1959		if (memdev_dmi_entry->size == 0)
1960			return;
1961
1962		/*
1963		 * Pick the configured speed if it's available, otherwise
1964		 * pick the DIMM speed, or we don't have a speed.
1965		 */
1966		if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1967			dmi_mem_clk_speed =
1968				memdev_dmi_entry->conf_mem_clk_speed;
1969		} else if (memdev_dmi_entry->length > speed_offset) {
1970			dmi_mem_clk_speed = memdev_dmi_entry->speed;
1971		} else {
1972			*dclk_freq = -1;
1973			return;
1974		}
1975
1976		if (*dclk_freq == 0) {
1977			/* First pass, speed was 0 */
1978			if (dmi_mem_clk_speed > 0) {
1979				/* Set speed if a valid speed is read */
1980				*dclk_freq = dmi_mem_clk_speed;
1981			} else {
1982				/* Otherwise we don't have a valid speed */
1983				*dclk_freq = -1;
1984			}
1985		} else if (*dclk_freq > 0 &&
1986			   *dclk_freq != dmi_mem_clk_speed) {
1987			/*
1988			 * If we have a speed, check that all DIMMS are the same
1989			 * speed, otherwise set the speed as invalid.
1990			 */
1991			*dclk_freq = -1;
1992		}
1993	}
1994}
1995
1996/*
1997 * The default DCLK frequency is used as a fallback if we
1998 * fail to find anything reliable in the DMI. The value
1999 * is taken straight from the datasheet.
2000 */
2001#define DEFAULT_DCLK_FREQ 800
2002
2003static int get_dclk_freq(void)
2004{
2005	int dclk_freq = 0;
2006
2007	dmi_walk(decode_dclk, (void *)&dclk_freq);
2008
2009	if (dclk_freq < 1)
2010		return DEFAULT_DCLK_FREQ;
2011
2012	return dclk_freq;
2013}
2014
2015/*
2016 * set_sdram_scrub_rate		This routine sets byte/sec bandwidth scrub rate
2017 *				to hardware according to SCRUBINTERVAL formula
2018 *				found in datasheet.
2019 */
2020static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
2021{
2022	struct i7core_pvt *pvt = mci->pvt_info;
2023	struct pci_dev *pdev;
2024	u32 dw_scrub;
2025	u32 dw_ssr;
2026
2027	/* Get data from the MC register, function 2 */
2028	pdev = pvt->pci_mcr[2];
2029	if (!pdev)
2030		return -ENODEV;
2031
2032	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
2033
2034	if (new_bw == 0) {
2035		/* Prepare to disable petrol scrub */
2036		dw_scrub &= ~STARTSCRUB;
2037		/* Stop the patrol scrub engine */
2038		write_and_test(pdev, MC_SCRUB_CONTROL,
2039			       dw_scrub & ~SCRUBINTERVAL_MASK);
2040
2041		/* Get current status of scrub rate and set bit to disable */
2042		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2043		dw_ssr &= ~SSR_MODE_MASK;
2044		dw_ssr |= SSR_MODE_DISABLE;
2045	} else {
2046		const int cache_line_size = 64;
2047		const u32 freq_dclk_mhz = pvt->dclk_freq;
2048		unsigned long long scrub_interval;
2049		/*
2050		 * Translate the desired scrub rate to a register value and
2051		 * program the corresponding register value.
2052		 */
2053		scrub_interval = (unsigned long long)freq_dclk_mhz *
2054			cache_line_size * 1000000;
2055		do_div(scrub_interval, new_bw);
2056
2057		if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
2058			return -EINVAL;
2059
2060		dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
2061
2062		/* Start the patrol scrub engine */
2063		pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2064				       STARTSCRUB | dw_scrub);
2065
2066		/* Get current status of scrub rate and set bit to enable */
2067		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2068		dw_ssr &= ~SSR_MODE_MASK;
2069		dw_ssr |= SSR_MODE_ENABLE;
2070	}
2071	/* Disable or enable scrubbing */
2072	pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2073
2074	return new_bw;
2075}
2076
2077/*
2078 * get_sdram_scrub_rate		This routine convert current scrub rate value
2079 *				into byte/sec bandwidth according to
2080 *				SCRUBINTERVAL formula found in datasheet.
2081 */
2082static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2083{
2084	struct i7core_pvt *pvt = mci->pvt_info;
2085	struct pci_dev *pdev;
2086	const u32 cache_line_size = 64;
2087	const u32 freq_dclk_mhz = pvt->dclk_freq;
2088	unsigned long long scrub_rate;
2089	u32 scrubval;
2090
2091	/* Get data from the MC register, function 2 */
2092	pdev = pvt->pci_mcr[2];
2093	if (!pdev)
2094		return -ENODEV;
2095
2096	/* Get current scrub control data */
2097	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2098
2099	/* Mask highest 8-bits to 0 */
2100	scrubval &=  SCRUBINTERVAL_MASK;
2101	if (!scrubval)
2102		return 0;
2103
2104	/* Calculate scrub rate value into byte/sec bandwidth */
2105	scrub_rate =  (unsigned long long)freq_dclk_mhz *
2106		1000000 * cache_line_size;
2107	do_div(scrub_rate, scrubval);
2108	return (int)scrub_rate;
2109}
2110
2111static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2112{
2113	struct i7core_pvt *pvt = mci->pvt_info;
2114	u32 pci_lock;
2115
2116	/* Unlock writes to pci registers */
2117	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2118	pci_lock &= ~0x3;
2119	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2120			       pci_lock | MC_CFG_UNLOCK);
2121
2122	mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2123	mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2124}
2125
2126static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2127{
2128	struct i7core_pvt *pvt = mci->pvt_info;
2129	u32 pci_lock;
2130
2131	/* Lock writes to pci registers */
2132	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2133	pci_lock &= ~0x3;
2134	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2135			       pci_lock | MC_CFG_LOCK);
2136}
2137
2138static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2139{
2140	pvt->i7core_pci = edac_pci_create_generic_ctl(
2141						&pvt->i7core_dev->pdev[0]->dev,
2142						EDAC_MOD_STR);
2143	if (unlikely(!pvt->i7core_pci))
2144		i7core_printk(KERN_WARNING,
2145			      "Unable to setup PCI error report via EDAC\n");
2146}
2147
2148static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2149{
2150	if (likely(pvt->i7core_pci))
2151		edac_pci_release_generic_ctl(pvt->i7core_pci);
2152	else
2153		i7core_printk(KERN_ERR,
2154				"Couldn't find mem_ctl_info for socket %d\n",
2155				pvt->i7core_dev->socket);
2156	pvt->i7core_pci = NULL;
2157}
2158
2159static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2160{
2161	struct mem_ctl_info *mci = i7core_dev->mci;
2162	struct i7core_pvt *pvt;
2163
2164	if (unlikely(!mci || !mci->pvt_info)) {
2165		edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2166
2167		i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2168		return;
2169	}
2170
2171	pvt = mci->pvt_info;
2172
2173	edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2174
2175	/* Disable scrubrate setting */
2176	if (pvt->enable_scrub)
2177		disable_sdram_scrub_setting(mci);
2178
2179	/* Disable EDAC polling */
2180	i7core_pci_ctl_release(pvt);
2181
2182	/* Remove MC sysfs nodes */
2183	i7core_delete_sysfs_devices(mci);
2184	edac_mc_del_mc(mci->pdev);
2185
2186	edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2187	kfree(mci->ctl_name);
2188	edac_mc_free(mci);
2189	i7core_dev->mci = NULL;
2190}
2191
2192static int i7core_register_mci(struct i7core_dev *i7core_dev)
2193{
2194	struct mem_ctl_info *mci;
2195	struct i7core_pvt *pvt;
2196	int rc;
2197	struct edac_mc_layer layers[2];
2198
2199	/* allocate a new MC control structure */
2200
2201	layers[0].type = EDAC_MC_LAYER_CHANNEL;
2202	layers[0].size = NUM_CHANS;
2203	layers[0].is_virt_csrow = false;
2204	layers[1].type = EDAC_MC_LAYER_SLOT;
2205	layers[1].size = MAX_DIMMS;
2206	layers[1].is_virt_csrow = true;
2207	mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2208			    sizeof(*pvt));
2209	if (unlikely(!mci))
2210		return -ENOMEM;
2211
2212	edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2213
2214	pvt = mci->pvt_info;
2215	memset(pvt, 0, sizeof(*pvt));
2216
2217	/* Associates i7core_dev and mci for future usage */
2218	pvt->i7core_dev = i7core_dev;
2219	i7core_dev->mci = mci;
2220
2221	/*
2222	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2223	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2224	 * memory channels
2225	 */
2226	mci->mtype_cap = MEM_FLAG_DDR3;
2227	mci->edac_ctl_cap = EDAC_FLAG_NONE;
2228	mci->edac_cap = EDAC_FLAG_NONE;
2229	mci->mod_name = "i7core_edac.c";
2230	mci->mod_ver = I7CORE_REVISION;
2231	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2232				  i7core_dev->socket);
2233	mci->dev_name = pci_name(i7core_dev->pdev[0]);
2234	mci->ctl_page_to_phys = NULL;
2235
2236	/* Store pci devices at mci for faster access */
2237	rc = mci_bind_devs(mci, i7core_dev);
2238	if (unlikely(rc < 0))
2239		goto fail0;
2240
2241
2242	/* Get dimm basic config */
2243	get_dimm_config(mci);
2244	/* record ptr to the generic device */
2245	mci->pdev = &i7core_dev->pdev[0]->dev;
2246	/* Set the function pointer to an actual operation function */
2247	mci->edac_check = i7core_check_error;
2248
2249	/* Enable scrubrate setting */
2250	if (pvt->enable_scrub)
2251		enable_sdram_scrub_setting(mci);
2252
2253	/* add this new MC control structure to EDAC's list of MCs */
2254	if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) {
2255		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2256		/* FIXME: perhaps some code should go here that disables error
2257		 * reporting if we just enabled it
2258		 */
2259
2260		rc = -EINVAL;
2261		goto fail0;
2262	}
2263	if (i7core_create_sysfs_devices(mci)) {
2264		edac_dbg(0, "MC: failed to create sysfs nodes\n");
2265		edac_mc_del_mc(mci->pdev);
2266		rc = -EINVAL;
2267		goto fail0;
2268	}
2269
2270	/* Default error mask is any memory */
2271	pvt->inject.channel = 0;
2272	pvt->inject.dimm = -1;
2273	pvt->inject.rank = -1;
2274	pvt->inject.bank = -1;
2275	pvt->inject.page = -1;
2276	pvt->inject.col = -1;
2277
2278	/* allocating generic PCI control info */
2279	i7core_pci_ctl_create(pvt);
2280
2281	/* DCLK for scrub rate setting */
2282	pvt->dclk_freq = get_dclk_freq();
2283
2284	return 0;
2285
2286fail0:
2287	kfree(mci->ctl_name);
2288	edac_mc_free(mci);
2289	i7core_dev->mci = NULL;
2290	return rc;
2291}
2292
2293/*
2294 *	i7core_probe	Probe for ONE instance of device to see if it is
2295 *			present.
2296 *	return:
2297 *		0 for FOUND a device
2298 *		< 0 for error code
2299 */
2300
2301static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2302{
2303	int rc, count = 0;
2304	struct i7core_dev *i7core_dev;
2305
2306	/* get the pci devices we want to reserve for our use */
2307	mutex_lock(&i7core_edac_lock);
2308
2309	/*
2310	 * All memory controllers are allocated at the first pass.
2311	 */
2312	if (unlikely(probed >= 1)) {
2313		mutex_unlock(&i7core_edac_lock);
2314		return -ENODEV;
2315	}
2316	probed++;
2317
2318	rc = i7core_get_all_devices();
2319	if (unlikely(rc < 0))
2320		goto fail0;
2321
2322	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2323		count++;
2324		rc = i7core_register_mci(i7core_dev);
2325		if (unlikely(rc < 0))
2326			goto fail1;
2327	}
2328
2329	/*
2330	 * Nehalem-EX uses a different memory controller. However, as the
2331	 * memory controller is not visible on some Nehalem/Nehalem-EP, we
2332	 * need to indirectly probe via a X58 PCI device. The same devices
2333	 * are found on (some) Nehalem-EX. So, on those machines, the
2334	 * probe routine needs to return -ENODEV, as the actual Memory
2335	 * Controller registers won't be detected.
2336	 */
2337	if (!count) {
2338		rc = -ENODEV;
2339		goto fail1;
2340	}
2341
2342	i7core_printk(KERN_INFO,
2343		      "Driver loaded, %d memory controller(s) found.\n",
2344		      count);
2345
2346	mutex_unlock(&i7core_edac_lock);
2347	return 0;
2348
2349fail1:
2350	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2351		i7core_unregister_mci(i7core_dev);
2352
2353	i7core_put_all_devices();
2354fail0:
2355	mutex_unlock(&i7core_edac_lock);
2356	return rc;
2357}
2358
2359/*
2360 *	i7core_remove	destructor for one instance of device
2361 *
2362 */
2363static void i7core_remove(struct pci_dev *pdev)
2364{
2365	struct i7core_dev *i7core_dev;
2366
2367	edac_dbg(0, "\n");
2368
2369	/*
2370	 * we have a trouble here: pdev value for removal will be wrong, since
2371	 * it will point to the X58 register used to detect that the machine
2372	 * is a Nehalem or upper design. However, due to the way several PCI
2373	 * devices are grouped together to provide MC functionality, we need
2374	 * to use a different method for releasing the devices
2375	 */
2376
2377	mutex_lock(&i7core_edac_lock);
2378
2379	if (unlikely(!probed)) {
2380		mutex_unlock(&i7core_edac_lock);
2381		return;
2382	}
2383
2384	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2385		i7core_unregister_mci(i7core_dev);
2386
2387	/* Release PCI resources */
2388	i7core_put_all_devices();
2389
2390	probed--;
2391
2392	mutex_unlock(&i7core_edac_lock);
2393}
2394
2395MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2396
2397/*
2398 *	i7core_driver	pci_driver structure for this module
2399 *
2400 */
2401static struct pci_driver i7core_driver = {
2402	.name     = "i7core_edac",
2403	.probe    = i7core_probe,
2404	.remove   = i7core_remove,
2405	.id_table = i7core_pci_tbl,
2406};
2407
2408/*
2409 *	i7core_init		Module entry function
2410 *			Try to initialize this module for its devices
2411 */
2412static int __init i7core_init(void)
2413{
2414	int pci_rc;
2415
2416	edac_dbg(2, "\n");
2417
2418	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2419	opstate_init();
2420
2421	if (use_pci_fixup)
2422		i7core_xeon_pci_fixup(pci_dev_table);
2423
2424	pci_rc = pci_register_driver(&i7core_driver);
2425
2426	if (pci_rc >= 0) {
2427		mce_register_decode_chain(&i7_mce_dec);
2428		return 0;
2429	}
2430
2431	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2432		      pci_rc);
2433
2434	return pci_rc;
2435}
2436
2437/*
2438 *	i7core_exit()	Module exit function
2439 *			Unregister the driver
2440 */
2441static void __exit i7core_exit(void)
2442{
2443	edac_dbg(2, "\n");
2444	pci_unregister_driver(&i7core_driver);
2445	mce_unregister_decode_chain(&i7_mce_dec);
2446}
2447
2448module_init(i7core_init);
2449module_exit(i7core_exit);
2450
2451MODULE_LICENSE("GPL");
2452MODULE_AUTHOR("Mauro Carvalho Chehab");
2453MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2454MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2455		   I7CORE_REVISION);
2456
2457module_param(edac_op_state, int, 0444);
2458MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2459