1 /*
2  * Cavium ThunderX memory controller kernel module
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
9  *
10  */
11 
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/edac.h>
15 #include <linux/interrupt.h>
16 #include <linux/string.h>
17 #include <linux/stop_machine.h>
18 #include <linux/delay.h>
19 #include <linux/sizes.h>
20 #include <linux/atomic.h>
21 #include <linux/bitfield.h>
22 #include <linux/circ_buf.h>
23 
24 #include <asm/page.h>
25 
26 #include "edac_module.h"
27 
28 #define phys_to_pfn(phys)	(PFN_DOWN(phys))
29 
30 #define THUNDERX_NODE		GENMASK(45, 44)
31 
32 enum {
33 	ERR_CORRECTED	= 1,
34 	ERR_UNCORRECTED	= 2,
35 	ERR_UNKNOWN	= 3,
36 };
37 
38 #define MAX_SYNDROME_REGS 4
39 
40 struct error_syndrome {
41 	u64 reg[MAX_SYNDROME_REGS];
42 };
43 
44 struct error_descr {
45 	int	type;
46 	u64	mask;
47 	char	*descr;
48 };
49 
decode_register(char * str,size_t size,const struct error_descr * descr,const uint64_t reg)50 static void decode_register(char *str, size_t size,
51 			   const struct error_descr *descr,
52 			   const uint64_t reg)
53 {
54 	int ret = 0;
55 
56 	while (descr->type && descr->mask && descr->descr) {
57 		if (reg & descr->mask) {
58 			ret = snprintf(str, size, "\n\t%s, %s",
59 				       descr->type == ERR_CORRECTED ?
60 					 "Corrected" : "Uncorrected",
61 				       descr->descr);
62 			str += ret;
63 			size -= ret;
64 		}
65 		descr++;
66 	}
67 }
68 
get_bits(unsigned long data,int pos,int width)69 static unsigned long get_bits(unsigned long data, int pos, int width)
70 {
71 	return (data >> pos) & ((1 << width) - 1);
72 }
73 
74 #define L2C_CTL			0x87E080800000
75 #define L2C_CTL_DISIDXALIAS	BIT(0)
76 
77 #define PCI_DEVICE_ID_THUNDER_LMC 0xa022
78 
79 #define LMC_FADR		0x20
80 #define LMC_FADR_FDIMM(x)	((x >> 37) & 0x1)
81 #define LMC_FADR_FBUNK(x)	((x >> 36) & 0x1)
82 #define LMC_FADR_FBANK(x)	((x >> 32) & 0xf)
83 #define LMC_FADR_FROW(x)	((x >> 14) & 0xffff)
84 #define LMC_FADR_FCOL(x)	((x >> 0) & 0x1fff)
85 
86 #define LMC_NXM_FADR		0x28
87 #define LMC_ECC_SYND		0x38
88 
89 #define LMC_ECC_PARITY_TEST	0x108
90 
91 #define LMC_INT_W1S		0x150
92 
93 #define LMC_INT_ENA_W1C		0x158
94 #define LMC_INT_ENA_W1S		0x160
95 
96 #define LMC_CONFIG		0x188
97 
98 #define LMC_CONFIG_BG2		BIT(62)
99 #define LMC_CONFIG_RANK_ENA	BIT(42)
100 #define LMC_CONFIG_PBANK_LSB(x)	(((x) >> 5) & 0xF)
101 #define LMC_CONFIG_ROW_LSB(x)	(((x) >> 2) & 0x7)
102 
103 #define LMC_CONTROL		0x190
104 #define LMC_CONTROL_XOR_BANK	BIT(16)
105 
106 #define LMC_INT			0x1F0
107 
108 #define LMC_INT_DDR_ERR		BIT(11)
109 #define LMC_INT_DED_ERR		(0xFUL << 5)
110 #define LMC_INT_SEC_ERR         (0xFUL << 1)
111 #define LMC_INT_NXM_WR_MASK	BIT(0)
112 
113 #define LMC_DDR_PLL_CTL		0x258
114 #define LMC_DDR_PLL_CTL_DDR4	BIT(29)
115 
116 #define LMC_FADR_SCRAMBLED	0x330
117 
118 #define LMC_INT_UE              (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
119 				 LMC_INT_NXM_WR_MASK)
120 
121 #define LMC_INT_CE		(LMC_INT_SEC_ERR)
122 
123 static const struct error_descr lmc_errors[] = {
124 	{
125 		.type  = ERR_CORRECTED,
126 		.mask  = LMC_INT_SEC_ERR,
127 		.descr = "Single-bit ECC error",
128 	},
129 	{
130 		.type  = ERR_UNCORRECTED,
131 		.mask  = LMC_INT_DDR_ERR,
132 		.descr = "DDR chip error",
133 	},
134 	{
135 		.type  = ERR_UNCORRECTED,
136 		.mask  = LMC_INT_DED_ERR,
137 		.descr = "Double-bit ECC error",
138 	},
139 	{
140 		.type = ERR_UNCORRECTED,
141 		.mask = LMC_INT_NXM_WR_MASK,
142 		.descr = "Non-existent memory write",
143 	},
144 	{0, 0, NULL},
145 };
146 
147 #define LMC_INT_EN_DDR_ERROR_ALERT_ENA	BIT(5)
148 #define LMC_INT_EN_DLCRAM_DED_ERR	BIT(4)
149 #define LMC_INT_EN_DLCRAM_SEC_ERR	BIT(3)
150 #define LMC_INT_INTR_DED_ENA		BIT(2)
151 #define LMC_INT_INTR_SEC_ENA		BIT(1)
152 #define LMC_INT_INTR_NXM_WR_ENA		BIT(0)
153 
154 #define LMC_INT_ENA_ALL			GENMASK(5, 0)
155 
156 #define LMC_DDR_PLL_CTL		0x258
157 #define LMC_DDR_PLL_CTL_DDR4	BIT(29)
158 
159 #define LMC_CONTROL		0x190
160 #define LMC_CONTROL_RDIMM	BIT(0)
161 
162 #define LMC_SCRAM_FADR		0x330
163 
164 #define LMC_CHAR_MASK0		0x228
165 #define LMC_CHAR_MASK2		0x238
166 
167 #define RING_ENTRIES	8
168 
169 struct debugfs_entry {
170 	const char *name;
171 	umode_t mode;
172 	const struct file_operations fops;
173 };
174 
175 struct lmc_err_ctx {
176 	u64 reg_int;
177 	u64 reg_fadr;
178 	u64 reg_nxm_fadr;
179 	u64 reg_scram_fadr;
180 	u64 reg_ecc_synd;
181 };
182 
183 struct thunderx_lmc {
184 	void __iomem *regs;
185 	struct pci_dev *pdev;
186 	struct msix_entry msix_ent;
187 
188 	atomic_t ecc_int;
189 
190 	u64 mask0;
191 	u64 mask2;
192 	u64 parity_test;
193 	u64 node;
194 
195 	int xbits;
196 	int bank_width;
197 	int pbank_lsb;
198 	int dimm_lsb;
199 	int rank_lsb;
200 	int bank_lsb;
201 	int row_lsb;
202 	int col_hi_lsb;
203 
204 	int xor_bank;
205 	int l2c_alias;
206 
207 	struct page *mem;
208 
209 	struct lmc_err_ctx err_ctx[RING_ENTRIES];
210 	unsigned long ring_head;
211 	unsigned long ring_tail;
212 };
213 
214 #define ring_pos(pos, size) ((pos) & (size - 1))
215 
216 #define DEBUGFS_STRUCT(_name, _mode, _write, _read)			    \
217 static struct debugfs_entry debugfs_##_name = {				    \
218 	.name = __stringify(_name),					    \
219 	.mode = VERIFY_OCTAL_PERMISSIONS(_mode),			    \
220 	.fops = {							    \
221 		.open = simple_open,					    \
222 		.write = _write,					    \
223 		.read  = _read,						    \
224 		.llseek = generic_file_llseek,				    \
225 	},								    \
226 }
227 
228 #define DEBUGFS_FIELD_ATTR(_type, _field)				    \
229 static ssize_t thunderx_##_type##_##_field##_read(struct file *file,	    \
230 					    char __user *data,		    \
231 					    size_t count, loff_t *ppos)	    \
232 {									    \
233 	struct thunderx_##_type *pdata = file->private_data;		    \
234 	char buf[20];							    \
235 									    \
236 	snprintf(buf, count, "0x%016llx", pdata->_field);		    \
237 	return simple_read_from_buffer(data, count, ppos,		    \
238 				       buf, sizeof(buf));		    \
239 }									    \
240 									    \
241 static ssize_t thunderx_##_type##_##_field##_write(struct file *file,	    \
242 					     const char __user *data,	    \
243 					     size_t count, loff_t *ppos)    \
244 {									    \
245 	struct thunderx_##_type *pdata = file->private_data;		    \
246 	int res;							    \
247 									    \
248 	res = kstrtoull_from_user(data, count, 0, &pdata->_field);	    \
249 									    \
250 	return res ? res : count;					    \
251 }									    \
252 									    \
253 DEBUGFS_STRUCT(_field, 0600,						    \
254 		   thunderx_##_type##_##_field##_write,			    \
255 		   thunderx_##_type##_##_field##_read)			    \
256 
257 #define DEBUGFS_REG_ATTR(_type, _name, _reg)				    \
258 static ssize_t thunderx_##_type##_##_name##_read(struct file *file,	    \
259 					   char __user *data,		    \
260 					   size_t count, loff_t *ppos)      \
261 {									    \
262 	struct thunderx_##_type *pdata = file->private_data;		    \
263 	char buf[20];							    \
264 									    \
265 	sprintf(buf, "0x%016llx", readq(pdata->regs + _reg));		    \
266 	return simple_read_from_buffer(data, count, ppos,		    \
267 				       buf, sizeof(buf));		    \
268 }									    \
269 									    \
270 static ssize_t thunderx_##_type##_##_name##_write(struct file *file,	    \
271 					    const char __user *data,	    \
272 					    size_t count, loff_t *ppos)     \
273 {									    \
274 	struct thunderx_##_type *pdata = file->private_data;		    \
275 	u64 val;							    \
276 	int res;							    \
277 									    \
278 	res = kstrtoull_from_user(data, count, 0, &val);		    \
279 									    \
280 	if (!res) {							    \
281 		writeq(val, pdata->regs + _reg);			    \
282 		res = count;						    \
283 	}								    \
284 									    \
285 	return res;							    \
286 }									    \
287 									    \
288 DEBUGFS_STRUCT(_name, 0600,						    \
289 	       thunderx_##_type##_##_name##_write,			    \
290 	       thunderx_##_type##_##_name##_read)
291 
292 #define LMC_DEBUGFS_ENT(_field)	DEBUGFS_FIELD_ATTR(lmc, _field)
293 
294 /*
295  * To get an ECC error injected, the following steps are needed:
296  * - Setup the ECC injection by writing the appropriate parameters:
297  *	echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
298  *	echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
299  *	echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
300  * - Do the actual injection:
301  *	echo 1 > /sys/kernel/debug/<device number>/inject_ecc
302  */
thunderx_lmc_inject_int_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)303 static ssize_t thunderx_lmc_inject_int_write(struct file *file,
304 					     const char __user *data,
305 					     size_t count, loff_t *ppos)
306 {
307 	struct thunderx_lmc *lmc = file->private_data;
308 	u64 val;
309 	int res;
310 
311 	res = kstrtoull_from_user(data, count, 0, &val);
312 
313 	if (!res) {
314 		/* Trigger the interrupt */
315 		writeq(val, lmc->regs + LMC_INT_W1S);
316 		res = count;
317 	}
318 
319 	return res;
320 }
321 
thunderx_lmc_int_read(struct file * file,char __user * data,size_t count,loff_t * ppos)322 static ssize_t thunderx_lmc_int_read(struct file *file,
323 				     char __user *data,
324 				     size_t count, loff_t *ppos)
325 {
326 	struct thunderx_lmc *lmc = file->private_data;
327 	char buf[20];
328 	u64 lmc_int = readq(lmc->regs + LMC_INT);
329 
330 	snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
331 	return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
332 }
333 
334 #define TEST_PATTERN 0xa5
335 
inject_ecc_fn(void * arg)336 static int inject_ecc_fn(void *arg)
337 {
338 	struct thunderx_lmc *lmc = arg;
339 	uintptr_t addr, phys;
340 	unsigned int cline_size = cache_line_size();
341 	const unsigned int lines = PAGE_SIZE / cline_size;
342 	unsigned int i, cl_idx;
343 
344 	addr = (uintptr_t)page_address(lmc->mem);
345 	phys = (uintptr_t)page_to_phys(lmc->mem);
346 
347 	cl_idx = (phys & 0x7f) >> 4;
348 	lmc->parity_test &= ~(7ULL << 8);
349 	lmc->parity_test |= (cl_idx << 8);
350 
351 	writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
352 	writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
353 	writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
354 
355 	readq(lmc->regs + LMC_CHAR_MASK0);
356 	readq(lmc->regs + LMC_CHAR_MASK2);
357 	readq(lmc->regs + LMC_ECC_PARITY_TEST);
358 
359 	for (i = 0; i < lines; i++) {
360 		memset((void *)addr, TEST_PATTERN, cline_size);
361 		barrier();
362 
363 		/*
364 		 * Flush L1 cachelines to the PoC (L2).
365 		 * This will cause cacheline eviction to the L2.
366 		 */
367 		asm volatile("dc civac, %0\n"
368 			     "dsb sy\n"
369 			     : : "r"(addr + i * cline_size));
370 	}
371 
372 	for (i = 0; i < lines; i++) {
373 		/*
374 		 * Flush L2 cachelines to the DRAM.
375 		 * This will cause cacheline eviction to the DRAM
376 		 * and ECC corruption according to the masks set.
377 		 */
378 		__asm__ volatile("sys #0,c11,C1,#2, %0\n"
379 				 : : "r"(phys + i * cline_size));
380 	}
381 
382 	for (i = 0; i < lines; i++) {
383 		/*
384 		 * Invalidate L2 cachelines.
385 		 * The subsequent load will cause cacheline fetch
386 		 * from the DRAM and an error interrupt
387 		 */
388 		__asm__ volatile("sys #0,c11,C1,#1, %0"
389 				 : : "r"(phys + i * cline_size));
390 	}
391 
392 	for (i = 0; i < lines; i++) {
393 		/*
394 		 * Invalidate L1 cachelines.
395 		 * The subsequent load will cause cacheline fetch
396 		 * from the L2 and/or DRAM
397 		 */
398 		asm volatile("dc ivac, %0\n"
399 			     "dsb sy\n"
400 			     : : "r"(addr + i * cline_size));
401 	}
402 
403 	return 0;
404 }
405 
thunderx_lmc_inject_ecc_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)406 static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
407 					     const char __user *data,
408 					     size_t count, loff_t *ppos)
409 {
410 	struct thunderx_lmc *lmc = file->private_data;
411 	unsigned int cline_size = cache_line_size();
412 	u8 *tmp;
413 	void __iomem *addr;
414 	unsigned int offs, timeout = 100000;
415 
416 	atomic_set(&lmc->ecc_int, 0);
417 
418 	lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
419 	if (!lmc->mem)
420 		return -ENOMEM;
421 
422 	tmp = kmalloc(cline_size, GFP_KERNEL);
423 	if (!tmp) {
424 		__free_pages(lmc->mem, 0);
425 		return -ENOMEM;
426 	}
427 
428 	addr = page_address(lmc->mem);
429 
430 	while (!atomic_read(&lmc->ecc_int) && timeout--) {
431 		stop_machine(inject_ecc_fn, lmc, NULL);
432 
433 		for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
434 			/*
435 			 * Do a load from the previously rigged location
436 			 * This should generate an error interrupt.
437 			 */
438 			memcpy(tmp, addr + offs, cline_size);
439 			asm volatile("dsb ld\n");
440 		}
441 	}
442 
443 	kfree(tmp);
444 	__free_pages(lmc->mem, 0);
445 
446 	return count;
447 }
448 
449 LMC_DEBUGFS_ENT(mask0);
450 LMC_DEBUGFS_ENT(mask2);
451 LMC_DEBUGFS_ENT(parity_test);
452 
453 DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
454 DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
455 DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
456 
457 struct debugfs_entry *lmc_dfs_ents[] = {
458 	&debugfs_mask0,
459 	&debugfs_mask2,
460 	&debugfs_parity_test,
461 	&debugfs_inject_ecc,
462 	&debugfs_inject_int,
463 	&debugfs_int_w1c,
464 };
465 
thunderx_create_debugfs_nodes(struct dentry * parent,struct debugfs_entry * attrs[],void * data,size_t num)466 static int thunderx_create_debugfs_nodes(struct dentry *parent,
467 					  struct debugfs_entry *attrs[],
468 					  void *data,
469 					  size_t num)
470 {
471 	int i;
472 	struct dentry *ent;
473 
474 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
475 		return 0;
476 
477 	if (!parent)
478 		return -ENOENT;
479 
480 	for (i = 0; i < num; i++) {
481 		ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
482 					       parent, data, &attrs[i]->fops);
483 
484 		if (!ent)
485 			break;
486 	}
487 
488 	return i;
489 }
490 
thunderx_faddr_to_phys(u64 faddr,struct thunderx_lmc * lmc)491 static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
492 {
493 	phys_addr_t addr = 0;
494 	int bank, xbits;
495 
496 	addr |= lmc->node << 40;
497 	addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
498 	addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
499 	addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
500 	addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
501 
502 	bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
503 
504 	if (lmc->xor_bank)
505 		bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
506 
507 	addr |= bank << lmc->bank_lsb;
508 
509 	xbits = PCI_FUNC(lmc->pdev->devfn);
510 
511 	if (lmc->l2c_alias)
512 		xbits ^= get_bits(addr, 20, lmc->xbits) ^
513 			 get_bits(addr, 12, lmc->xbits);
514 
515 	addr |= xbits << 7;
516 
517 	return addr;
518 }
519 
thunderx_get_num_lmcs(unsigned int node)520 static unsigned int thunderx_get_num_lmcs(unsigned int node)
521 {
522 	unsigned int number = 0;
523 	struct pci_dev *pdev = NULL;
524 
525 	do {
526 		pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
527 				      PCI_DEVICE_ID_THUNDER_LMC,
528 				      pdev);
529 		if (pdev) {
530 #ifdef CONFIG_NUMA
531 			if (pdev->dev.numa_node == node)
532 				number++;
533 #else
534 			number++;
535 #endif
536 		}
537 	} while (pdev);
538 
539 	return number;
540 }
541 
542 #define LMC_MESSAGE_SIZE	120
543 #define LMC_OTHER_SIZE		(50 * ARRAY_SIZE(lmc_errors))
544 
thunderx_lmc_err_isr(int irq,void * dev_id)545 static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
546 {
547 	struct mem_ctl_info *mci = dev_id;
548 	struct thunderx_lmc *lmc = mci->pvt_info;
549 
550 	unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
551 	struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
552 
553 	writeq(0, lmc->regs + LMC_CHAR_MASK0);
554 	writeq(0, lmc->regs + LMC_CHAR_MASK2);
555 	writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
556 
557 	ctx->reg_int = readq(lmc->regs + LMC_INT);
558 	ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
559 	ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
560 	ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
561 	ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
562 
563 	lmc->ring_head++;
564 
565 	atomic_set(&lmc->ecc_int, 1);
566 
567 	/* Clear the interrupt */
568 	writeq(ctx->reg_int, lmc->regs + LMC_INT);
569 
570 	return IRQ_WAKE_THREAD;
571 }
572 
thunderx_lmc_threaded_isr(int irq,void * dev_id)573 static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
574 {
575 	struct mem_ctl_info *mci = dev_id;
576 	struct thunderx_lmc *lmc = mci->pvt_info;
577 	phys_addr_t phys_addr;
578 
579 	unsigned long tail;
580 	struct lmc_err_ctx *ctx;
581 
582 	irqreturn_t ret = IRQ_NONE;
583 
584 	char *msg;
585 	char *other;
586 
587 	msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
588 	other =  kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
589 
590 	if (!msg || !other)
591 		goto err_free;
592 
593 	while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
594 		ARRAY_SIZE(lmc->err_ctx))) {
595 		tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
596 
597 		ctx = &lmc->err_ctx[tail];
598 
599 		dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
600 			ctx->reg_int);
601 		dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
602 			ctx->reg_fadr);
603 		dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
604 			ctx->reg_nxm_fadr);
605 		dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
606 			ctx->reg_scram_fadr);
607 		dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
608 			ctx->reg_ecc_synd);
609 
610 		snprintf(msg, LMC_MESSAGE_SIZE,
611 			 "DIMM %lld rank %lld bank %lld row %lld col %lld",
612 			 LMC_FADR_FDIMM(ctx->reg_scram_fadr),
613 			 LMC_FADR_FBUNK(ctx->reg_scram_fadr),
614 			 LMC_FADR_FBANK(ctx->reg_scram_fadr),
615 			 LMC_FADR_FROW(ctx->reg_scram_fadr),
616 			 LMC_FADR_FCOL(ctx->reg_scram_fadr));
617 
618 		decode_register(other, LMC_OTHER_SIZE, lmc_errors,
619 				ctx->reg_int);
620 
621 		phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
622 
623 		if (ctx->reg_int & LMC_INT_UE)
624 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
625 					     phys_to_pfn(phys_addr),
626 					     offset_in_page(phys_addr),
627 					     0, -1, -1, -1, msg, other);
628 		else if (ctx->reg_int & LMC_INT_CE)
629 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
630 					     phys_to_pfn(phys_addr),
631 					     offset_in_page(phys_addr),
632 					     0, -1, -1, -1, msg, other);
633 
634 		lmc->ring_tail++;
635 	}
636 
637 	ret = IRQ_HANDLED;
638 
639 err_free:
640 	kfree(msg);
641 	kfree(other);
642 
643 	return ret;
644 }
645 
646 static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
647 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
648 	{ 0, },
649 };
650 
pci_dev_to_mc_idx(struct pci_dev * pdev)651 static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
652 {
653 	int node = dev_to_node(&pdev->dev);
654 	int ret = PCI_FUNC(pdev->devfn);
655 
656 	ret += max(node, 0) << 3;
657 
658 	return ret;
659 }
660 
thunderx_lmc_probe(struct pci_dev * pdev,const struct pci_device_id * id)661 static int thunderx_lmc_probe(struct pci_dev *pdev,
662 				const struct pci_device_id *id)
663 {
664 	struct thunderx_lmc *lmc;
665 	struct edac_mc_layer layer;
666 	struct mem_ctl_info *mci;
667 	u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
668 	int ret;
669 	u64 lmc_int;
670 	void *l2c_ioaddr;
671 
672 	layer.type = EDAC_MC_LAYER_SLOT;
673 	layer.size = 2;
674 	layer.is_virt_csrow = false;
675 
676 	ret = pcim_enable_device(pdev);
677 	if (ret) {
678 		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
679 		return ret;
680 	}
681 
682 	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
683 	if (ret) {
684 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
685 		return ret;
686 	}
687 
688 	mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
689 			    sizeof(struct thunderx_lmc));
690 	if (!mci)
691 		return -ENOMEM;
692 
693 	mci->pdev = &pdev->dev;
694 	lmc = mci->pvt_info;
695 
696 	pci_set_drvdata(pdev, mci);
697 
698 	lmc->regs = pcim_iomap_table(pdev)[0];
699 
700 	lmc_control = readq(lmc->regs + LMC_CONTROL);
701 	lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
702 	lmc_config = readq(lmc->regs + LMC_CONFIG);
703 
704 	if (lmc_control & LMC_CONTROL_RDIMM) {
705 		mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
706 					   lmc_ddr_pll_ctl) ?
707 				MEM_RDDR4 : MEM_RDDR3;
708 	} else {
709 		mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
710 					   lmc_ddr_pll_ctl) ?
711 				MEM_DDR4 : MEM_DDR3;
712 	}
713 
714 	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
715 	mci->edac_cap = EDAC_FLAG_SECDED;
716 
717 	mci->mod_name = "thunderx-lmc";
718 	mci->ctl_name = "thunderx-lmc";
719 	mci->dev_name = dev_name(&pdev->dev);
720 	mci->scrub_mode = SCRUB_NONE;
721 
722 	lmc->pdev = pdev;
723 	lmc->msix_ent.entry = 0;
724 
725 	lmc->ring_head = 0;
726 	lmc->ring_tail = 0;
727 
728 	ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
729 	if (ret) {
730 		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
731 		goto err_free;
732 	}
733 
734 	ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
735 					thunderx_lmc_err_isr,
736 					thunderx_lmc_threaded_isr, 0,
737 					"[EDAC] ThunderX LMC", mci);
738 	if (ret) {
739 		dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
740 		goto err_free;
741 	}
742 
743 	lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
744 
745 	lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
746 	lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
747 			   FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
748 
749 	lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
750 	lmc->dimm_lsb  = 28 + lmc->pbank_lsb + lmc->xbits;
751 	lmc->rank_lsb = lmc->dimm_lsb;
752 	lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
753 	lmc->bank_lsb = 7 + lmc->xbits;
754 	lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
755 
756 	lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
757 
758 	lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
759 
760 	l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE);
761 	if (!l2c_ioaddr) {
762 		dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
763 		ret = -ENOMEM;
764 		goto err_free;
765 	}
766 
767 	lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
768 
769 	iounmap(l2c_ioaddr);
770 
771 	ret = edac_mc_add_mc(mci);
772 	if (ret) {
773 		dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
774 		goto err_free;
775 	}
776 
777 	lmc_int = readq(lmc->regs + LMC_INT);
778 	writeq(lmc_int, lmc->regs + LMC_INT);
779 
780 	writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
781 
782 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
783 		ret = thunderx_create_debugfs_nodes(mci->debugfs,
784 						    lmc_dfs_ents,
785 						    lmc,
786 						    ARRAY_SIZE(lmc_dfs_ents));
787 
788 		if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
789 			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
790 				 ret, ret >= 0 ? " created" : "");
791 		}
792 	}
793 
794 	return 0;
795 
796 err_free:
797 	pci_set_drvdata(pdev, NULL);
798 	edac_mc_free(mci);
799 
800 	return ret;
801 }
802 
thunderx_lmc_remove(struct pci_dev * pdev)803 static void thunderx_lmc_remove(struct pci_dev *pdev)
804 {
805 	struct mem_ctl_info *mci = pci_get_drvdata(pdev);
806 	struct thunderx_lmc *lmc = mci->pvt_info;
807 
808 	writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
809 
810 	edac_mc_del_mc(&pdev->dev);
811 	edac_mc_free(mci);
812 }
813 
814 MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
815 
816 static struct pci_driver thunderx_lmc_driver = {
817 	.name     = "thunderx_lmc_edac",
818 	.probe    = thunderx_lmc_probe,
819 	.remove   = thunderx_lmc_remove,
820 	.id_table = thunderx_lmc_pci_tbl,
821 };
822 
823 /*---------------------- OCX driver ---------------------------------*/
824 
825 #define PCI_DEVICE_ID_THUNDER_OCX 0xa013
826 
827 #define OCX_LINK_INTS		3
828 #define OCX_INTS		(OCX_LINK_INTS + 1)
829 #define OCX_RX_LANES		24
830 #define OCX_RX_LANE_STATS	15
831 
832 #define OCX_COM_INT		0x100
833 #define OCX_COM_INT_W1S		0x108
834 #define OCX_COM_INT_ENA_W1S	0x110
835 #define OCX_COM_INT_ENA_W1C	0x118
836 
837 #define OCX_COM_IO_BADID		BIT(54)
838 #define OCX_COM_MEM_BADID		BIT(53)
839 #define OCX_COM_COPR_BADID		BIT(52)
840 #define OCX_COM_WIN_REQ_BADID		BIT(51)
841 #define OCX_COM_WIN_REQ_TOUT		BIT(50)
842 #define OCX_COM_RX_LANE			GENMASK(23, 0)
843 
844 #define OCX_COM_INT_CE			(OCX_COM_IO_BADID      | \
845 					 OCX_COM_MEM_BADID     | \
846 					 OCX_COM_COPR_BADID    | \
847 					 OCX_COM_WIN_REQ_BADID | \
848 					 OCX_COM_WIN_REQ_TOUT)
849 
850 static const struct error_descr ocx_com_errors[] = {
851 	{
852 		.type  = ERR_CORRECTED,
853 		.mask  = OCX_COM_IO_BADID,
854 		.descr = "Invalid IO transaction node ID",
855 	},
856 	{
857 		.type  = ERR_CORRECTED,
858 		.mask  = OCX_COM_MEM_BADID,
859 		.descr = "Invalid memory transaction node ID",
860 	},
861 	{
862 		.type  = ERR_CORRECTED,
863 		.mask  = OCX_COM_COPR_BADID,
864 		.descr = "Invalid coprocessor transaction node ID",
865 	},
866 	{
867 		.type  = ERR_CORRECTED,
868 		.mask  = OCX_COM_WIN_REQ_BADID,
869 		.descr = "Invalid SLI transaction node ID",
870 	},
871 	{
872 		.type  = ERR_CORRECTED,
873 		.mask  = OCX_COM_WIN_REQ_TOUT,
874 		.descr = "Window/core request timeout",
875 	},
876 	{0, 0, NULL},
877 };
878 
879 #define OCX_COM_LINKX_INT(x)		(0x120 + (x) * 8)
880 #define OCX_COM_LINKX_INT_W1S(x)	(0x140 + (x) * 8)
881 #define OCX_COM_LINKX_INT_ENA_W1S(x)	(0x160 + (x) * 8)
882 #define OCX_COM_LINKX_INT_ENA_W1C(x)	(0x180 + (x) * 8)
883 
884 #define OCX_COM_LINK_BAD_WORD			BIT(13)
885 #define OCX_COM_LINK_ALIGN_FAIL			BIT(12)
886 #define OCX_COM_LINK_ALIGN_DONE			BIT(11)
887 #define OCX_COM_LINK_UP				BIT(10)
888 #define OCX_COM_LINK_STOP			BIT(9)
889 #define OCX_COM_LINK_BLK_ERR			BIT(8)
890 #define OCX_COM_LINK_REINIT			BIT(7)
891 #define OCX_COM_LINK_LNK_DATA			BIT(6)
892 #define OCX_COM_LINK_RXFIFO_DBE			BIT(5)
893 #define OCX_COM_LINK_RXFIFO_SBE			BIT(4)
894 #define OCX_COM_LINK_TXFIFO_DBE			BIT(3)
895 #define OCX_COM_LINK_TXFIFO_SBE			BIT(2)
896 #define OCX_COM_LINK_REPLAY_DBE			BIT(1)
897 #define OCX_COM_LINK_REPLAY_SBE			BIT(0)
898 
899 static const struct error_descr ocx_com_link_errors[] = {
900 	{
901 		.type  = ERR_CORRECTED,
902 		.mask  = OCX_COM_LINK_REPLAY_SBE,
903 		.descr = "Replay buffer single-bit error",
904 	},
905 	{
906 		.type  = ERR_CORRECTED,
907 		.mask  = OCX_COM_LINK_TXFIFO_SBE,
908 		.descr = "TX FIFO single-bit error",
909 	},
910 	{
911 		.type  = ERR_CORRECTED,
912 		.mask  = OCX_COM_LINK_RXFIFO_SBE,
913 		.descr = "RX FIFO single-bit error",
914 	},
915 	{
916 		.type  = ERR_CORRECTED,
917 		.mask  = OCX_COM_LINK_BLK_ERR,
918 		.descr = "Block code error",
919 	},
920 	{
921 		.type  = ERR_CORRECTED,
922 		.mask  = OCX_COM_LINK_ALIGN_FAIL,
923 		.descr = "Link alignment failure",
924 	},
925 	{
926 		.type  = ERR_CORRECTED,
927 		.mask  = OCX_COM_LINK_BAD_WORD,
928 		.descr = "Bad code word",
929 	},
930 	{
931 		.type  = ERR_UNCORRECTED,
932 		.mask  = OCX_COM_LINK_REPLAY_DBE,
933 		.descr = "Replay buffer double-bit error",
934 	},
935 	{
936 		.type  = ERR_UNCORRECTED,
937 		.mask  = OCX_COM_LINK_TXFIFO_DBE,
938 		.descr = "TX FIFO double-bit error",
939 	},
940 	{
941 		.type  = ERR_UNCORRECTED,
942 		.mask  = OCX_COM_LINK_RXFIFO_DBE,
943 		.descr = "RX FIFO double-bit error",
944 	},
945 	{
946 		.type  = ERR_UNCORRECTED,
947 		.mask  = OCX_COM_LINK_STOP,
948 		.descr = "Link stopped",
949 	},
950 	{0, 0, NULL},
951 };
952 
953 #define OCX_COM_LINK_INT_UE       (OCX_COM_LINK_REPLAY_DBE | \
954 				   OCX_COM_LINK_TXFIFO_DBE | \
955 				   OCX_COM_LINK_RXFIFO_DBE | \
956 				   OCX_COM_LINK_STOP)
957 
958 #define OCX_COM_LINK_INT_CE       (OCX_COM_LINK_REPLAY_SBE | \
959 				   OCX_COM_LINK_TXFIFO_SBE | \
960 				   OCX_COM_LINK_RXFIFO_SBE | \
961 				   OCX_COM_LINK_BLK_ERR    | \
962 				   OCX_COM_LINK_ALIGN_FAIL | \
963 				   OCX_COM_LINK_BAD_WORD)
964 
965 #define OCX_LNE_INT(x)			(0x8018 + (x) * 0x100)
966 #define OCX_LNE_INT_EN(x)		(0x8020 + (x) * 0x100)
967 #define OCX_LNE_BAD_CNT(x)		(0x8028 + (x) * 0x100)
968 #define OCX_LNE_CFG(x)			(0x8000 + (x) * 0x100)
969 #define OCX_LNE_STAT(x, y)		(0x8040 + (x) * 0x100 + (y) * 8)
970 
971 #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS		BIT(8)
972 #define OCX_LNE_CFG_RX_STAT_WRAP_DIS		BIT(2)
973 #define OCX_LNE_CFG_RX_STAT_RDCLR		BIT(1)
974 #define OCX_LNE_CFG_RX_STAT_ENA			BIT(0)
975 
976 
977 #define OCX_LANE_BAD_64B67B			BIT(8)
978 #define OCX_LANE_DSKEW_FIFO_OVFL		BIT(5)
979 #define OCX_LANE_SCRM_SYNC_LOSS			BIT(4)
980 #define OCX_LANE_UKWN_CNTL_WORD			BIT(3)
981 #define OCX_LANE_CRC32_ERR			BIT(2)
982 #define OCX_LANE_BDRY_SYNC_LOSS			BIT(1)
983 #define OCX_LANE_SERDES_LOCK_LOSS		BIT(0)
984 
985 #define OCX_COM_LANE_INT_UE       (0)
986 #define OCX_COM_LANE_INT_CE       (OCX_LANE_SERDES_LOCK_LOSS | \
987 				   OCX_LANE_BDRY_SYNC_LOSS   | \
988 				   OCX_LANE_CRC32_ERR        | \
989 				   OCX_LANE_UKWN_CNTL_WORD   | \
990 				   OCX_LANE_SCRM_SYNC_LOSS   | \
991 				   OCX_LANE_DSKEW_FIFO_OVFL  | \
992 				   OCX_LANE_BAD_64B67B)
993 
994 static const struct error_descr ocx_lane_errors[] = {
995 	{
996 		.type  = ERR_CORRECTED,
997 		.mask  = OCX_LANE_SERDES_LOCK_LOSS,
998 		.descr = "RX SerDes lock lost",
999 	},
1000 	{
1001 		.type  = ERR_CORRECTED,
1002 		.mask  = OCX_LANE_BDRY_SYNC_LOSS,
1003 		.descr = "RX word boundary lost",
1004 	},
1005 	{
1006 		.type  = ERR_CORRECTED,
1007 		.mask  = OCX_LANE_CRC32_ERR,
1008 		.descr = "CRC32 error",
1009 	},
1010 	{
1011 		.type  = ERR_CORRECTED,
1012 		.mask  = OCX_LANE_UKWN_CNTL_WORD,
1013 		.descr = "Unknown control word",
1014 	},
1015 	{
1016 		.type  = ERR_CORRECTED,
1017 		.mask  = OCX_LANE_SCRM_SYNC_LOSS,
1018 		.descr = "Scrambler synchronization lost",
1019 	},
1020 	{
1021 		.type  = ERR_CORRECTED,
1022 		.mask  = OCX_LANE_DSKEW_FIFO_OVFL,
1023 		.descr = "RX deskew FIFO overflow",
1024 	},
1025 	{
1026 		.type  = ERR_CORRECTED,
1027 		.mask  = OCX_LANE_BAD_64B67B,
1028 		.descr = "Bad 64B/67B codeword",
1029 	},
1030 	{0, 0, NULL},
1031 };
1032 
1033 #define OCX_LNE_INT_ENA_ALL		(GENMASK(9, 8) | GENMASK(6, 0))
1034 #define OCX_COM_INT_ENA_ALL		(GENMASK(54, 50) | GENMASK(23, 0))
1035 #define OCX_COM_LINKX_INT_ENA_ALL	(GENMASK(13, 12) | \
1036 					 GENMASK(9, 7) | GENMASK(5, 0))
1037 
1038 #define OCX_TLKX_ECC_CTL(x)		(0x10018 + (x) * 0x2000)
1039 #define OCX_RLKX_ECC_CTL(x)		(0x18018 + (x) * 0x2000)
1040 
1041 struct ocx_com_err_ctx {
1042 	u64 reg_com_int;
1043 	u64 reg_lane_int[OCX_RX_LANES];
1044 	u64 reg_lane_stat11[OCX_RX_LANES];
1045 };
1046 
1047 struct ocx_link_err_ctx {
1048 	u64 reg_com_link_int;
1049 	int link;
1050 };
1051 
1052 struct thunderx_ocx {
1053 	void __iomem *regs;
1054 	int com_link;
1055 	struct pci_dev *pdev;
1056 	struct edac_device_ctl_info *edac_dev;
1057 
1058 	struct dentry *debugfs;
1059 	struct msix_entry msix_ent[OCX_INTS];
1060 
1061 	struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
1062 	struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
1063 
1064 	unsigned long com_ring_head;
1065 	unsigned long com_ring_tail;
1066 
1067 	unsigned long link_ring_head;
1068 	unsigned long link_ring_tail;
1069 };
1070 
1071 #define OCX_MESSAGE_SIZE	SZ_1K
1072 #define OCX_OTHER_SIZE		(50 * ARRAY_SIZE(ocx_com_link_errors))
1073 
1074 /* This handler is threaded */
thunderx_ocx_com_isr(int irq,void * irq_id)1075 static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
1076 {
1077 	struct msix_entry *msix = irq_id;
1078 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1079 						msix_ent[msix->entry]);
1080 
1081 	int lane;
1082 	unsigned long head = ring_pos(ocx->com_ring_head,
1083 				      ARRAY_SIZE(ocx->com_err_ctx));
1084 	struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
1085 
1086 	ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
1087 
1088 	for (lane = 0; lane < OCX_RX_LANES; lane++) {
1089 		ctx->reg_lane_int[lane] =
1090 			readq(ocx->regs + OCX_LNE_INT(lane));
1091 		ctx->reg_lane_stat11[lane] =
1092 			readq(ocx->regs + OCX_LNE_STAT(lane, 11));
1093 
1094 		writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
1095 	}
1096 
1097 	writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
1098 
1099 	ocx->com_ring_head++;
1100 
1101 	return IRQ_WAKE_THREAD;
1102 }
1103 
thunderx_ocx_com_threaded_isr(int irq,void * irq_id)1104 static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
1105 {
1106 	struct msix_entry *msix = irq_id;
1107 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1108 						msix_ent[msix->entry]);
1109 
1110 	irqreturn_t ret = IRQ_NONE;
1111 
1112 	unsigned long tail;
1113 	struct ocx_com_err_ctx *ctx;
1114 	int lane;
1115 	char *msg;
1116 	char *other;
1117 
1118 	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1119 	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1120 
1121 	if (!msg || !other)
1122 		goto err_free;
1123 
1124 	while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
1125 			ARRAY_SIZE(ocx->com_err_ctx))) {
1126 		tail = ring_pos(ocx->com_ring_tail,
1127 				ARRAY_SIZE(ocx->com_err_ctx));
1128 		ctx = &ocx->com_err_ctx[tail];
1129 
1130 		snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
1131 			ocx->edac_dev->ctl_name, ctx->reg_com_int);
1132 
1133 		decode_register(other, OCX_OTHER_SIZE,
1134 				ocx_com_errors, ctx->reg_com_int);
1135 
1136 		strncat(msg, other, OCX_MESSAGE_SIZE);
1137 
1138 		for (lane = 0; lane < OCX_RX_LANES; lane++)
1139 			if (ctx->reg_com_int & BIT(lane)) {
1140 				snprintf(other, OCX_OTHER_SIZE,
1141 					 "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
1142 					 lane, ctx->reg_lane_int[lane],
1143 					 lane, ctx->reg_lane_stat11[lane]);
1144 
1145 				strncat(msg, other, OCX_MESSAGE_SIZE);
1146 
1147 				decode_register(other, OCX_OTHER_SIZE,
1148 						ocx_lane_errors,
1149 						ctx->reg_lane_int[lane]);
1150 				strncat(msg, other, OCX_MESSAGE_SIZE);
1151 			}
1152 
1153 		if (ctx->reg_com_int & OCX_COM_INT_CE)
1154 			edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1155 
1156 		ocx->com_ring_tail++;
1157 	}
1158 
1159 	ret = IRQ_HANDLED;
1160 
1161 err_free:
1162 	kfree(other);
1163 	kfree(msg);
1164 
1165 	return ret;
1166 }
1167 
thunderx_ocx_lnk_isr(int irq,void * irq_id)1168 static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
1169 {
1170 	struct msix_entry *msix = irq_id;
1171 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1172 						msix_ent[msix->entry]);
1173 	unsigned long head = ring_pos(ocx->link_ring_head,
1174 				      ARRAY_SIZE(ocx->link_err_ctx));
1175 	struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
1176 
1177 	ctx->link = msix->entry;
1178 	ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1179 
1180 	writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1181 
1182 	ocx->link_ring_head++;
1183 
1184 	return IRQ_WAKE_THREAD;
1185 }
1186 
thunderx_ocx_lnk_threaded_isr(int irq,void * irq_id)1187 static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
1188 {
1189 	struct msix_entry *msix = irq_id;
1190 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1191 						msix_ent[msix->entry]);
1192 	irqreturn_t ret = IRQ_NONE;
1193 	unsigned long tail;
1194 	struct ocx_link_err_ctx *ctx;
1195 
1196 	char *msg;
1197 	char *other;
1198 
1199 	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1200 	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1201 
1202 	if (!msg || !other)
1203 		goto err_free;
1204 
1205 	while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
1206 			ARRAY_SIZE(ocx->link_err_ctx))) {
1207 		tail = ring_pos(ocx->link_ring_head,
1208 				ARRAY_SIZE(ocx->link_err_ctx));
1209 
1210 		ctx = &ocx->link_err_ctx[tail];
1211 
1212 		snprintf(msg, OCX_MESSAGE_SIZE,
1213 			 "%s: OCX_COM_LINK_INT[%d]: %016llx",
1214 			 ocx->edac_dev->ctl_name,
1215 			 ctx->link, ctx->reg_com_link_int);
1216 
1217 		decode_register(other, OCX_OTHER_SIZE,
1218 				ocx_com_link_errors, ctx->reg_com_link_int);
1219 
1220 		strncat(msg, other, OCX_MESSAGE_SIZE);
1221 
1222 		if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
1223 			edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
1224 		else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
1225 			edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1226 
1227 		ocx->link_ring_tail++;
1228 	}
1229 
1230 	ret = IRQ_HANDLED;
1231 err_free:
1232 	kfree(other);
1233 	kfree(msg);
1234 
1235 	return ret;
1236 }
1237 
1238 #define OCX_DEBUGFS_ATTR(_name, _reg)	DEBUGFS_REG_ATTR(ocx, _name, _reg)
1239 
1240 OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
1241 OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
1242 OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
1243 
1244 OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
1245 OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
1246 OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
1247 
1248 OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
1249 OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
1250 OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
1251 
1252 OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
1253 OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
1254 OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
1255 OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
1256 OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
1257 OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
1258 OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
1259 OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
1260 
1261 OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
1262 OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
1263 OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
1264 OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
1265 OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
1266 OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
1267 OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
1268 OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
1269 
1270 OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
1271 OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
1272 OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
1273 OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
1274 OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
1275 OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
1276 OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
1277 OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
1278 
1279 OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
1280 
1281 struct debugfs_entry *ocx_dfs_ents[] = {
1282 	&debugfs_tlk0_ecc_ctl,
1283 	&debugfs_tlk1_ecc_ctl,
1284 	&debugfs_tlk2_ecc_ctl,
1285 
1286 	&debugfs_rlk0_ecc_ctl,
1287 	&debugfs_rlk1_ecc_ctl,
1288 	&debugfs_rlk2_ecc_ctl,
1289 
1290 	&debugfs_com_link0_int,
1291 	&debugfs_com_link1_int,
1292 	&debugfs_com_link2_int,
1293 
1294 	&debugfs_lne00_badcnt,
1295 	&debugfs_lne01_badcnt,
1296 	&debugfs_lne02_badcnt,
1297 	&debugfs_lne03_badcnt,
1298 	&debugfs_lne04_badcnt,
1299 	&debugfs_lne05_badcnt,
1300 	&debugfs_lne06_badcnt,
1301 	&debugfs_lne07_badcnt,
1302 	&debugfs_lne08_badcnt,
1303 	&debugfs_lne09_badcnt,
1304 	&debugfs_lne10_badcnt,
1305 	&debugfs_lne11_badcnt,
1306 	&debugfs_lne12_badcnt,
1307 	&debugfs_lne13_badcnt,
1308 	&debugfs_lne14_badcnt,
1309 	&debugfs_lne15_badcnt,
1310 	&debugfs_lne16_badcnt,
1311 	&debugfs_lne17_badcnt,
1312 	&debugfs_lne18_badcnt,
1313 	&debugfs_lne19_badcnt,
1314 	&debugfs_lne20_badcnt,
1315 	&debugfs_lne21_badcnt,
1316 	&debugfs_lne22_badcnt,
1317 	&debugfs_lne23_badcnt,
1318 
1319 	&debugfs_com_int,
1320 };
1321 
1322 static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
1323 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
1324 	{ 0, },
1325 };
1326 
thunderx_ocx_clearstats(struct thunderx_ocx * ocx)1327 static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
1328 {
1329 	int lane, stat, cfg;
1330 
1331 	for (lane = 0; lane < OCX_RX_LANES; lane++) {
1332 		cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
1333 		cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
1334 		cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
1335 		writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
1336 
1337 		for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
1338 			readq(ocx->regs + OCX_LNE_STAT(lane, stat));
1339 	}
1340 }
1341 
thunderx_ocx_probe(struct pci_dev * pdev,const struct pci_device_id * id)1342 static int thunderx_ocx_probe(struct pci_dev *pdev,
1343 			      const struct pci_device_id *id)
1344 {
1345 	struct thunderx_ocx *ocx;
1346 	struct edac_device_ctl_info *edac_dev;
1347 	char name[32];
1348 	int idx;
1349 	int i;
1350 	int ret;
1351 	u64 reg;
1352 
1353 	ret = pcim_enable_device(pdev);
1354 	if (ret) {
1355 		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1356 		return ret;
1357 	}
1358 
1359 	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
1360 	if (ret) {
1361 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1362 		return ret;
1363 	}
1364 
1365 	idx = edac_device_alloc_index();
1366 	snprintf(name, sizeof(name), "OCX%d", idx);
1367 	edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
1368 					      name, 1, "CCPI", 1,
1369 					      0, NULL, 0, idx);
1370 	if (!edac_dev) {
1371 		dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
1372 		return -ENOMEM;
1373 	}
1374 	ocx = edac_dev->pvt_info;
1375 	ocx->edac_dev = edac_dev;
1376 	ocx->com_ring_head = 0;
1377 	ocx->com_ring_tail = 0;
1378 	ocx->link_ring_head = 0;
1379 	ocx->link_ring_tail = 0;
1380 
1381 	ocx->regs = pcim_iomap_table(pdev)[0];
1382 	if (!ocx->regs) {
1383 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1384 		ret = -ENODEV;
1385 		goto err_free;
1386 	}
1387 
1388 	ocx->pdev = pdev;
1389 
1390 	for (i = 0; i < OCX_INTS; i++) {
1391 		ocx->msix_ent[i].entry = i;
1392 		ocx->msix_ent[i].vector = 0;
1393 	}
1394 
1395 	ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
1396 	if (ret) {
1397 		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
1398 		goto err_free;
1399 	}
1400 
1401 	for (i = 0; i < OCX_INTS; i++) {
1402 		ret = devm_request_threaded_irq(&pdev->dev,
1403 						ocx->msix_ent[i].vector,
1404 						(i == 3) ?
1405 						 thunderx_ocx_com_isr :
1406 						 thunderx_ocx_lnk_isr,
1407 						(i == 3) ?
1408 						 thunderx_ocx_com_threaded_isr :
1409 						 thunderx_ocx_lnk_threaded_isr,
1410 						0, "[EDAC] ThunderX OCX",
1411 						&ocx->msix_ent[i]);
1412 		if (ret)
1413 			goto err_free;
1414 	}
1415 
1416 	edac_dev->dev = &pdev->dev;
1417 	edac_dev->dev_name = dev_name(&pdev->dev);
1418 	edac_dev->mod_name = "thunderx-ocx";
1419 	edac_dev->ctl_name = "thunderx-ocx";
1420 
1421 	ret = edac_device_add_device(edac_dev);
1422 	if (ret) {
1423 		dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
1424 		goto err_free;
1425 	}
1426 
1427 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
1428 		ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1429 
1430 		ret = thunderx_create_debugfs_nodes(ocx->debugfs,
1431 						    ocx_dfs_ents,
1432 						    ocx,
1433 						    ARRAY_SIZE(ocx_dfs_ents));
1434 		if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
1435 			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
1436 				 ret, ret >= 0 ? " created" : "");
1437 		}
1438 	}
1439 
1440 	pci_set_drvdata(pdev, edac_dev);
1441 
1442 	thunderx_ocx_clearstats(ocx);
1443 
1444 	for (i = 0; i < OCX_RX_LANES; i++) {
1445 		writeq(OCX_LNE_INT_ENA_ALL,
1446 		       ocx->regs + OCX_LNE_INT_EN(i));
1447 
1448 		reg = readq(ocx->regs + OCX_LNE_INT(i));
1449 		writeq(reg, ocx->regs + OCX_LNE_INT(i));
1450 
1451 	}
1452 
1453 	for (i = 0; i < OCX_LINK_INTS; i++) {
1454 		reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
1455 		writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
1456 
1457 		writeq(OCX_COM_LINKX_INT_ENA_ALL,
1458 		       ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
1459 	}
1460 
1461 	reg = readq(ocx->regs + OCX_COM_INT);
1462 	writeq(reg, ocx->regs + OCX_COM_INT);
1463 
1464 	writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
1465 
1466 	return 0;
1467 err_free:
1468 	edac_device_free_ctl_info(edac_dev);
1469 
1470 	return ret;
1471 }
1472 
thunderx_ocx_remove(struct pci_dev * pdev)1473 static void thunderx_ocx_remove(struct pci_dev *pdev)
1474 {
1475 	struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
1476 	struct thunderx_ocx *ocx = edac_dev->pvt_info;
1477 	int i;
1478 
1479 	writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
1480 
1481 	for (i = 0; i < OCX_INTS; i++) {
1482 		writeq(OCX_COM_LINKX_INT_ENA_ALL,
1483 		       ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
1484 	}
1485 
1486 	edac_debugfs_remove_recursive(ocx->debugfs);
1487 
1488 	edac_device_del_device(&pdev->dev);
1489 	edac_device_free_ctl_info(edac_dev);
1490 }
1491 
1492 MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
1493 
1494 static struct pci_driver thunderx_ocx_driver = {
1495 	.name     = "thunderx_ocx_edac",
1496 	.probe    = thunderx_ocx_probe,
1497 	.remove   = thunderx_ocx_remove,
1498 	.id_table = thunderx_ocx_pci_tbl,
1499 };
1500 
1501 /*---------------------- L2C driver ---------------------------------*/
1502 
1503 #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
1504 #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
1505 #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
1506 
1507 #define L2C_TAD_INT_W1C		0x40000
1508 #define L2C_TAD_INT_W1S		0x40008
1509 
1510 #define L2C_TAD_INT_ENA_W1C	0x40020
1511 #define L2C_TAD_INT_ENA_W1S	0x40028
1512 
1513 
1514 #define L2C_TAD_INT_L2DDBE	 BIT(1)
1515 #define L2C_TAD_INT_SBFSBE	 BIT(2)
1516 #define L2C_TAD_INT_SBFDBE	 BIT(3)
1517 #define L2C_TAD_INT_FBFSBE	 BIT(4)
1518 #define L2C_TAD_INT_FBFDBE	 BIT(5)
1519 #define L2C_TAD_INT_TAGDBE	 BIT(9)
1520 #define L2C_TAD_INT_RDDISLMC	 BIT(15)
1521 #define L2C_TAD_INT_WRDISLMC	 BIT(16)
1522 #define L2C_TAD_INT_LFBTO	 BIT(17)
1523 #define L2C_TAD_INT_GSYNCTO	 BIT(18)
1524 #define L2C_TAD_INT_RTGSBE	 BIT(32)
1525 #define L2C_TAD_INT_RTGDBE	 BIT(33)
1526 #define L2C_TAD_INT_RDDISOCI	 BIT(34)
1527 #define L2C_TAD_INT_WRDISOCI	 BIT(35)
1528 
1529 #define L2C_TAD_INT_ECC		(L2C_TAD_INT_L2DDBE | \
1530 				 L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
1531 				 L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
1532 
1533 #define L2C_TAD_INT_CE          (L2C_TAD_INT_SBFSBE | \
1534 				 L2C_TAD_INT_FBFSBE)
1535 
1536 #define L2C_TAD_INT_UE          (L2C_TAD_INT_L2DDBE | \
1537 				 L2C_TAD_INT_SBFDBE | \
1538 				 L2C_TAD_INT_FBFDBE | \
1539 				 L2C_TAD_INT_TAGDBE | \
1540 				 L2C_TAD_INT_RTGDBE | \
1541 				 L2C_TAD_INT_WRDISOCI | \
1542 				 L2C_TAD_INT_RDDISOCI | \
1543 				 L2C_TAD_INT_WRDISLMC | \
1544 				 L2C_TAD_INT_RDDISLMC | \
1545 				 L2C_TAD_INT_LFBTO    | \
1546 				 L2C_TAD_INT_GSYNCTO)
1547 
1548 static const struct error_descr l2_tad_errors[] = {
1549 	{
1550 		.type  = ERR_CORRECTED,
1551 		.mask  = L2C_TAD_INT_SBFSBE,
1552 		.descr = "SBF single-bit error",
1553 	},
1554 	{
1555 		.type  = ERR_CORRECTED,
1556 		.mask  = L2C_TAD_INT_FBFSBE,
1557 		.descr = "FBF single-bit error",
1558 	},
1559 	{
1560 		.type  = ERR_UNCORRECTED,
1561 		.mask  = L2C_TAD_INT_L2DDBE,
1562 		.descr = "L2D double-bit error",
1563 	},
1564 	{
1565 		.type  = ERR_UNCORRECTED,
1566 		.mask  = L2C_TAD_INT_SBFDBE,
1567 		.descr = "SBF double-bit error",
1568 	},
1569 	{
1570 		.type  = ERR_UNCORRECTED,
1571 		.mask  = L2C_TAD_INT_FBFDBE,
1572 		.descr = "FBF double-bit error",
1573 	},
1574 	{
1575 		.type  = ERR_UNCORRECTED,
1576 		.mask  = L2C_TAD_INT_TAGDBE,
1577 		.descr = "TAG double-bit error",
1578 	},
1579 	{
1580 		.type  = ERR_UNCORRECTED,
1581 		.mask  = L2C_TAD_INT_RTGDBE,
1582 		.descr = "RTG double-bit error",
1583 	},
1584 	{
1585 		.type  = ERR_UNCORRECTED,
1586 		.mask  = L2C_TAD_INT_WRDISOCI,
1587 		.descr = "Write to a disabled CCPI",
1588 	},
1589 	{
1590 		.type  = ERR_UNCORRECTED,
1591 		.mask  = L2C_TAD_INT_RDDISOCI,
1592 		.descr = "Read from a disabled CCPI",
1593 	},
1594 	{
1595 		.type  = ERR_UNCORRECTED,
1596 		.mask  = L2C_TAD_INT_WRDISLMC,
1597 		.descr = "Write to a disabled LMC",
1598 	},
1599 	{
1600 		.type  = ERR_UNCORRECTED,
1601 		.mask  = L2C_TAD_INT_RDDISLMC,
1602 		.descr = "Read from a disabled LMC",
1603 	},
1604 	{
1605 		.type  = ERR_UNCORRECTED,
1606 		.mask  = L2C_TAD_INT_LFBTO,
1607 		.descr = "LFB entry timeout",
1608 	},
1609 	{
1610 		.type  = ERR_UNCORRECTED,
1611 		.mask  = L2C_TAD_INT_GSYNCTO,
1612 		.descr = "Global sync CCPI timeout",
1613 	},
1614 	{0, 0, NULL},
1615 };
1616 
1617 #define L2C_TAD_INT_TAG		(L2C_TAD_INT_TAGDBE)
1618 
1619 #define L2C_TAD_INT_RTG		(L2C_TAD_INT_RTGDBE)
1620 
1621 #define L2C_TAD_INT_DISLMC	(L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
1622 
1623 #define L2C_TAD_INT_DISOCI	(L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
1624 
1625 #define L2C_TAD_INT_ENA_ALL	(L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
1626 				 L2C_TAD_INT_RTG | \
1627 				 L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
1628 				 L2C_TAD_INT_LFBTO)
1629 
1630 #define L2C_TAD_TIMETWO		0x50000
1631 #define L2C_TAD_TIMEOUT		0x50100
1632 #define L2C_TAD_ERR		0x60000
1633 #define L2C_TAD_TQD_ERR		0x60100
1634 #define L2C_TAD_TTG_ERR		0x60200
1635 
1636 
1637 #define L2C_CBC_INT_W1C		0x60000
1638 
1639 #define L2C_CBC_INT_RSDSBE	 BIT(0)
1640 #define L2C_CBC_INT_RSDDBE	 BIT(1)
1641 
1642 #define L2C_CBC_INT_RSD		 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
1643 
1644 #define L2C_CBC_INT_MIBSBE	 BIT(4)
1645 #define L2C_CBC_INT_MIBDBE	 BIT(5)
1646 
1647 #define L2C_CBC_INT_MIB		 (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
1648 
1649 #define L2C_CBC_INT_IORDDISOCI	 BIT(6)
1650 #define L2C_CBC_INT_IOWRDISOCI	 BIT(7)
1651 
1652 #define L2C_CBC_INT_IODISOCI	 (L2C_CBC_INT_IORDDISOCI | \
1653 				  L2C_CBC_INT_IOWRDISOCI)
1654 
1655 #define L2C_CBC_INT_CE		 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
1656 #define L2C_CBC_INT_UE		 (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
1657 
1658 
1659 static const struct error_descr l2_cbc_errors[] = {
1660 	{
1661 		.type  = ERR_CORRECTED,
1662 		.mask  = L2C_CBC_INT_RSDSBE,
1663 		.descr = "RSD single-bit error",
1664 	},
1665 	{
1666 		.type  = ERR_CORRECTED,
1667 		.mask  = L2C_CBC_INT_MIBSBE,
1668 		.descr = "MIB single-bit error",
1669 	},
1670 	{
1671 		.type  = ERR_UNCORRECTED,
1672 		.mask  = L2C_CBC_INT_RSDDBE,
1673 		.descr = "RSD double-bit error",
1674 	},
1675 	{
1676 		.type  = ERR_UNCORRECTED,
1677 		.mask  = L2C_CBC_INT_MIBDBE,
1678 		.descr = "MIB double-bit error",
1679 	},
1680 	{
1681 		.type  = ERR_UNCORRECTED,
1682 		.mask  = L2C_CBC_INT_IORDDISOCI,
1683 		.descr = "Read from a disabled CCPI",
1684 	},
1685 	{
1686 		.type  = ERR_UNCORRECTED,
1687 		.mask  = L2C_CBC_INT_IOWRDISOCI,
1688 		.descr = "Write to a disabled CCPI",
1689 	},
1690 	{0, 0, NULL},
1691 };
1692 
1693 #define L2C_CBC_INT_W1S		0x60008
1694 #define L2C_CBC_INT_ENA_W1C	0x60020
1695 
1696 #define L2C_CBC_INT_ENA_ALL	 (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
1697 				  L2C_CBC_INT_IODISOCI)
1698 
1699 #define L2C_CBC_INT_ENA_W1S	0x60028
1700 
1701 #define L2C_CBC_IODISOCIERR	0x80008
1702 #define L2C_CBC_IOCERR		0x80010
1703 #define L2C_CBC_RSDERR		0x80018
1704 #define L2C_CBC_MIBERR		0x80020
1705 
1706 
1707 #define L2C_MCI_INT_W1C		0x0
1708 
1709 #define L2C_MCI_INT_VBFSBE	 BIT(0)
1710 #define L2C_MCI_INT_VBFDBE	 BIT(1)
1711 
1712 static const struct error_descr l2_mci_errors[] = {
1713 	{
1714 		.type  = ERR_CORRECTED,
1715 		.mask  = L2C_MCI_INT_VBFSBE,
1716 		.descr = "VBF single-bit error",
1717 	},
1718 	{
1719 		.type  = ERR_UNCORRECTED,
1720 		.mask  = L2C_MCI_INT_VBFDBE,
1721 		.descr = "VBF double-bit error",
1722 	},
1723 	{0, 0, NULL},
1724 };
1725 
1726 #define L2C_MCI_INT_W1S		0x8
1727 #define L2C_MCI_INT_ENA_W1C	0x20
1728 
1729 #define L2C_MCI_INT_ENA_ALL	 (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
1730 
1731 #define L2C_MCI_INT_ENA_W1S	0x28
1732 
1733 #define L2C_MCI_ERR		0x10000
1734 
1735 #define L2C_MESSAGE_SIZE	SZ_1K
1736 #define L2C_OTHER_SIZE		(50 * ARRAY_SIZE(l2_tad_errors))
1737 
1738 struct l2c_err_ctx {
1739 	char *reg_ext_name;
1740 	u64  reg_int;
1741 	u64  reg_ext;
1742 };
1743 
1744 struct thunderx_l2c {
1745 	void __iomem *regs;
1746 	struct pci_dev *pdev;
1747 	struct edac_device_ctl_info *edac_dev;
1748 
1749 	struct dentry *debugfs;
1750 
1751 	int index;
1752 
1753 	struct msix_entry msix_ent;
1754 
1755 	struct l2c_err_ctx err_ctx[RING_ENTRIES];
1756 	unsigned long ring_head;
1757 	unsigned long ring_tail;
1758 };
1759 
thunderx_l2c_tad_isr(int irq,void * irq_id)1760 static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
1761 {
1762 	struct msix_entry *msix = irq_id;
1763 	struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
1764 						msix_ent);
1765 
1766 	unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
1767 	struct l2c_err_ctx *ctx = &tad->err_ctx[head];
1768 
1769 	ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
1770 
1771 	if (ctx->reg_int & L2C_TAD_INT_ECC) {
1772 		ctx->reg_ext_name = "TQD_ERR";
1773 		ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
1774 	} else if (ctx->reg_int & L2C_TAD_INT_TAG) {
1775 		ctx->reg_ext_name = "TTG_ERR";
1776 		ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
1777 	} else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
1778 		ctx->reg_ext_name = "TIMEOUT";
1779 		ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
1780 	} else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
1781 		ctx->reg_ext_name = "ERR";
1782 		ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
1783 	}
1784 
1785 	writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
1786 
1787 	tad->ring_head++;
1788 
1789 	return IRQ_WAKE_THREAD;
1790 }
1791 
thunderx_l2c_cbc_isr(int irq,void * irq_id)1792 static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
1793 {
1794 	struct msix_entry *msix = irq_id;
1795 	struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
1796 						msix_ent);
1797 
1798 	unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
1799 	struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
1800 
1801 	ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
1802 
1803 	if (ctx->reg_int & L2C_CBC_INT_RSD) {
1804 		ctx->reg_ext_name = "RSDERR";
1805 		ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
1806 	} else if (ctx->reg_int & L2C_CBC_INT_MIB) {
1807 		ctx->reg_ext_name = "MIBERR";
1808 		ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
1809 	} else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
1810 		ctx->reg_ext_name = "IODISOCIERR";
1811 		ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
1812 	}
1813 
1814 	writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
1815 
1816 	cbc->ring_head++;
1817 
1818 	return IRQ_WAKE_THREAD;
1819 }
1820 
thunderx_l2c_mci_isr(int irq,void * irq_id)1821 static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
1822 {
1823 	struct msix_entry *msix = irq_id;
1824 	struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
1825 						msix_ent);
1826 
1827 	unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
1828 	struct l2c_err_ctx *ctx = &mci->err_ctx[head];
1829 
1830 	ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
1831 	ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
1832 
1833 	writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
1834 
1835 	ctx->reg_ext_name = "ERR";
1836 
1837 	mci->ring_head++;
1838 
1839 	return IRQ_WAKE_THREAD;
1840 }
1841 
thunderx_l2c_threaded_isr(int irq,void * irq_id)1842 static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
1843 {
1844 	struct msix_entry *msix = irq_id;
1845 	struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
1846 						msix_ent);
1847 
1848 	unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
1849 	struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
1850 	irqreturn_t ret = IRQ_NONE;
1851 
1852 	u64 mask_ue, mask_ce;
1853 	const struct error_descr *l2_errors;
1854 	char *reg_int_name;
1855 
1856 	char *msg;
1857 	char *other;
1858 
1859 	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1860 	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1861 
1862 	if (!msg || !other)
1863 		goto err_free;
1864 
1865 	switch (l2c->pdev->device) {
1866 	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1867 		reg_int_name = "L2C_TAD_INT";
1868 		mask_ue = L2C_TAD_INT_UE;
1869 		mask_ce = L2C_TAD_INT_CE;
1870 		l2_errors = l2_tad_errors;
1871 		break;
1872 	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1873 		reg_int_name = "L2C_CBC_INT";
1874 		mask_ue = L2C_CBC_INT_UE;
1875 		mask_ce = L2C_CBC_INT_CE;
1876 		l2_errors = l2_cbc_errors;
1877 		break;
1878 	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1879 		reg_int_name = "L2C_MCI_INT";
1880 		mask_ue = L2C_MCI_INT_VBFDBE;
1881 		mask_ce = L2C_MCI_INT_VBFSBE;
1882 		l2_errors = l2_mci_errors;
1883 		break;
1884 	default:
1885 		dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
1886 			l2c->pdev->device);
1887 		goto err_free;
1888 	}
1889 
1890 	while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
1891 			ARRAY_SIZE(l2c->err_ctx))) {
1892 		snprintf(msg, L2C_MESSAGE_SIZE,
1893 			 "%s: %s: %016llx, %s: %016llx",
1894 			 l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
1895 			 ctx->reg_ext_name, ctx->reg_ext);
1896 
1897 		decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
1898 
1899 		strncat(msg, other, L2C_MESSAGE_SIZE);
1900 
1901 		if (ctx->reg_int & mask_ue)
1902 			edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
1903 		else if (ctx->reg_int & mask_ce)
1904 			edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
1905 
1906 		l2c->ring_tail++;
1907 	}
1908 
1909 	ret = IRQ_HANDLED;
1910 
1911 err_free:
1912 	kfree(other);
1913 	kfree(msg);
1914 
1915 	return ret;
1916 }
1917 
1918 #define L2C_DEBUGFS_ATTR(_name, _reg)	DEBUGFS_REG_ATTR(l2c, _name, _reg)
1919 
1920 L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
1921 
1922 struct debugfs_entry *l2c_tad_dfs_ents[] = {
1923 	&debugfs_tad_int,
1924 };
1925 
1926 L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
1927 
1928 struct debugfs_entry *l2c_cbc_dfs_ents[] = {
1929 	&debugfs_cbc_int,
1930 };
1931 
1932 L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
1933 
1934 struct debugfs_entry *l2c_mci_dfs_ents[] = {
1935 	&debugfs_mci_int,
1936 };
1937 
1938 static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
1939 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
1940 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
1941 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
1942 	{ 0, },
1943 };
1944 
thunderx_l2c_probe(struct pci_dev * pdev,const struct pci_device_id * id)1945 static int thunderx_l2c_probe(struct pci_dev *pdev,
1946 			      const struct pci_device_id *id)
1947 {
1948 	struct thunderx_l2c *l2c;
1949 	struct edac_device_ctl_info *edac_dev;
1950 	struct debugfs_entry **l2c_devattr;
1951 	size_t dfs_entries;
1952 	irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
1953 	char name[32];
1954 	const char *fmt;
1955 	u64 reg_en_offs, reg_en_mask;
1956 	int idx;
1957 	int ret;
1958 
1959 	ret = pcim_enable_device(pdev);
1960 	if (ret) {
1961 		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1962 		return ret;
1963 	}
1964 
1965 	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
1966 	if (ret) {
1967 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1968 		return ret;
1969 	}
1970 
1971 	switch (pdev->device) {
1972 	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1973 		thunderx_l2c_isr = thunderx_l2c_tad_isr;
1974 		l2c_devattr = l2c_tad_dfs_ents;
1975 		dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
1976 		fmt = "L2C-TAD%d";
1977 		reg_en_offs = L2C_TAD_INT_ENA_W1S;
1978 		reg_en_mask = L2C_TAD_INT_ENA_ALL;
1979 		break;
1980 	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1981 		thunderx_l2c_isr = thunderx_l2c_cbc_isr;
1982 		l2c_devattr = l2c_cbc_dfs_ents;
1983 		dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
1984 		fmt = "L2C-CBC%d";
1985 		reg_en_offs = L2C_CBC_INT_ENA_W1S;
1986 		reg_en_mask = L2C_CBC_INT_ENA_ALL;
1987 		break;
1988 	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1989 		thunderx_l2c_isr = thunderx_l2c_mci_isr;
1990 		l2c_devattr = l2c_mci_dfs_ents;
1991 		dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
1992 		fmt = "L2C-MCI%d";
1993 		reg_en_offs = L2C_MCI_INT_ENA_W1S;
1994 		reg_en_mask = L2C_MCI_INT_ENA_ALL;
1995 		break;
1996 	default:
1997 		//Should never ever get here
1998 		dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
1999 			pdev->device);
2000 		return -EINVAL;
2001 	}
2002 
2003 	idx = edac_device_alloc_index();
2004 	snprintf(name, sizeof(name), fmt, idx);
2005 
2006 	edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
2007 					      name, 1, "L2C", 1, 0,
2008 					      NULL, 0, idx);
2009 	if (!edac_dev) {
2010 		dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
2011 		return -ENOMEM;
2012 	}
2013 
2014 	l2c = edac_dev->pvt_info;
2015 	l2c->edac_dev = edac_dev;
2016 
2017 	l2c->regs = pcim_iomap_table(pdev)[0];
2018 	if (!l2c->regs) {
2019 		dev_err(&pdev->dev, "Cannot map PCI resources\n");
2020 		ret = -ENODEV;
2021 		goto err_free;
2022 	}
2023 
2024 	l2c->pdev = pdev;
2025 
2026 	l2c->ring_head = 0;
2027 	l2c->ring_tail = 0;
2028 
2029 	l2c->msix_ent.entry = 0;
2030 	l2c->msix_ent.vector = 0;
2031 
2032 	ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
2033 	if (ret) {
2034 		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
2035 		goto err_free;
2036 	}
2037 
2038 	ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
2039 					thunderx_l2c_isr,
2040 					thunderx_l2c_threaded_isr,
2041 					0, "[EDAC] ThunderX L2C",
2042 					&l2c->msix_ent);
2043 	if (ret)
2044 		goto err_free;
2045 
2046 	edac_dev->dev = &pdev->dev;
2047 	edac_dev->dev_name = dev_name(&pdev->dev);
2048 	edac_dev->mod_name = "thunderx-l2c";
2049 	edac_dev->ctl_name = "thunderx-l2c";
2050 
2051 	ret = edac_device_add_device(edac_dev);
2052 	if (ret) {
2053 		dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
2054 		goto err_free;
2055 	}
2056 
2057 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
2058 		l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
2059 
2060 		ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
2061 					      l2c, dfs_entries);
2062 
2063 		if (ret != dfs_entries) {
2064 			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
2065 				 ret, ret >= 0 ? " created" : "");
2066 		}
2067 	}
2068 
2069 	pci_set_drvdata(pdev, edac_dev);
2070 
2071 	writeq(reg_en_mask, l2c->regs + reg_en_offs);
2072 
2073 	return 0;
2074 
2075 err_free:
2076 	edac_device_free_ctl_info(edac_dev);
2077 
2078 	return ret;
2079 }
2080 
thunderx_l2c_remove(struct pci_dev * pdev)2081 static void thunderx_l2c_remove(struct pci_dev *pdev)
2082 {
2083 	struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
2084 	struct thunderx_l2c *l2c = edac_dev->pvt_info;
2085 
2086 	switch (pdev->device) {
2087 	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
2088 		writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
2089 		break;
2090 	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
2091 		writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
2092 		break;
2093 	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
2094 		writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
2095 		break;
2096 	}
2097 
2098 	edac_debugfs_remove_recursive(l2c->debugfs);
2099 
2100 	edac_device_del_device(&pdev->dev);
2101 	edac_device_free_ctl_info(edac_dev);
2102 }
2103 
2104 MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
2105 
2106 static struct pci_driver thunderx_l2c_driver = {
2107 	.name     = "thunderx_l2c_edac",
2108 	.probe    = thunderx_l2c_probe,
2109 	.remove   = thunderx_l2c_remove,
2110 	.id_table = thunderx_l2c_pci_tbl,
2111 };
2112 
thunderx_edac_init(void)2113 static int __init thunderx_edac_init(void)
2114 {
2115 	int rc = 0;
2116 
2117 	rc = pci_register_driver(&thunderx_lmc_driver);
2118 	if (rc)
2119 		return rc;
2120 
2121 	rc = pci_register_driver(&thunderx_ocx_driver);
2122 	if (rc)
2123 		goto err_lmc;
2124 
2125 	rc = pci_register_driver(&thunderx_l2c_driver);
2126 	if (rc)
2127 		goto err_ocx;
2128 
2129 	return rc;
2130 err_ocx:
2131 	pci_unregister_driver(&thunderx_ocx_driver);
2132 err_lmc:
2133 	pci_unregister_driver(&thunderx_lmc_driver);
2134 
2135 	return rc;
2136 }
2137 
thunderx_edac_exit(void)2138 static void __exit thunderx_edac_exit(void)
2139 {
2140 	pci_unregister_driver(&thunderx_l2c_driver);
2141 	pci_unregister_driver(&thunderx_ocx_driver);
2142 	pci_unregister_driver(&thunderx_lmc_driver);
2143 
2144 }
2145 
2146 module_init(thunderx_edac_init);
2147 module_exit(thunderx_edac_exit);
2148 
2149 MODULE_LICENSE("GPL v2");
2150 MODULE_AUTHOR("Cavium, Inc.");
2151 MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
2152