[linux-yocto] [PATCH 08/35] Checking in AXM5500 EDAC driver support

Daniel Dragomir daniel.dragomir at windriver.com
Thu Nov 13 09:19:35 PST 2014


From: SangeethaRao <sangeetha.rao at lsi.com>

Signed-off-by: SangeethaRao <sangeetha.rao at lsi.com>
---
 arch/arm/boot/dts/axm5516-amarillo.dts |   4 +
 arch/arm/boot/dts/axm55xx.dtsi         |  11 +-
 drivers/edac/Kconfig                   |   5 +
 drivers/edac/Makefile                  |   2 +-
 drivers/edac/axxia_edac.c              | 405 +++++++++++++++++++++++++++++++++
 5 files changed, 425 insertions(+), 2 deletions(-)
 create mode 100644 drivers/edac/axxia_edac.c

diff --git a/arch/arm/boot/dts/axm5516-amarillo.dts b/arch/arm/boot/dts/axm5516-amarillo.dts
index ac49364..631974b 100644
--- a/arch/arm/boot/dts/axm5516-amarillo.dts
+++ b/arch/arm/boot/dts/axm5516-amarillo.dts
@@ -26,6 +26,10 @@
 	};
 };
 
+&edac {
+        status = "okay";
+};
+
 &femac {
 	status = "okay";
 	mdio-clock-offset = <0x1c>;
diff --git a/arch/arm/boot/dts/axm55xx.dtsi b/arch/arm/boot/dts/axm55xx.dtsi
index 5ec77f8..0f868e6 100644
--- a/arch/arm/boot/dts/axm55xx.dtsi
+++ b/arch/arm/boot/dts/axm55xx.dtsi
@@ -65,7 +65,16 @@
 			     <1 11 0xf08>,
 			     <1 10 0xf08>;
 	};
-
+        edac: edac0 at 0x1000 {
+                compatible = "lsi,edac";
+                reg = <0 0x00220000 0 0x1000>,
+                        <0 0x000f0000 0 0x1000>,
+                        <0 0x01e00020 0 0x1000>;
+                interrupts = <0 160 4>,
+                        <0 161 4>;
+                device_type = "edac";
+                status = "disabled";
+        };
 	sm0: sm0 at 00220000 {
 		compatible = "lsi,smmon";
 		reg = <0 0x00220000 0 0x1000>;
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e443f2c1..6e829fe 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -368,4 +368,9 @@ config EDAC_OCTEON_PCI
 	  Support for error detection and correction on the
 	  Cavium Octeon family of SOCs.
 
+config EDAC_AXXIA
+	tristate "AXXIA EDAC Controller"
+	help
+	  Support for error detection on AXXIA AXM55xx devices
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 4154ed6..72e26c4 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -15,7 +15,7 @@ edac_core-y	+= edac_module.o edac_device_sysfs.o
 ifdef CONFIG_PCI
 edac_core-y	+= edac_pci.o edac_pci_sysfs.o
 endif
-
+obj-$(CONFIG_EDAC_AXXIA)		+= axxia_edac.o
 obj-$(CONFIG_EDAC_GHES)			+= ghes_edac.o
 obj-$(CONFIG_EDAC_MCE_INJ)		+= mce_amd_inj.o
 
diff --git a/drivers/edac/axxia_edac.c b/drivers/edac/axxia_edac.c
new file mode 100644
index 0000000..ee62e3e
--- /dev/null
+++ b/drivers/edac/axxia_edac.c
@@ -0,0 +1,405 @@
+/*
+* This file is subject to the terms and conditions of the GNU General Public
+* License.  See the file "COPYING" in the main directory of this archive
+* for more details.
+*
+* Copyright (C) 2012 Cavium, Inc.
+*
+* Copyright (C) 2009 Wind River Systems,
+*   written by Ralf Baechle <ralf at linux-mips.org>
+*/
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+#include <mach/ncr.h>
+#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define LSI_EDAC_MOD_STR     "lsi_edac"
+#define CORES_PER_CLUSTER 4
+
+/* Private structure for common edac device */
+struct lsi_edac_dev_info {
+	void __iomem *vbase;
+	struct platform_device *pdev;
+	char *ctl_name;
+	char *blk_name;
+	int edac_idx;
+	u32 sm0_region;
+	u32 sm1_region;
+	u32 dickens_L3;
+	struct edac_device_ctl_info *edac_dev;
+	void (*init)(struct lsi_edac_dev_info *dev_info);
+	void (*exit)(struct lsi_edac_dev_info *dev_info);
+	void (*check)(struct edac_device_ctl_info *edac_dev);
+};
+
+static void lsi_error_init(struct lsi_edac_dev_info *dev_info)
+{
+}
+
+static void lsi_error_exit(struct lsi_edac_dev_info *dev_info)
+{
+}
+
+void log_cpumerrsr(void *edac)
+{
+	struct edac_device_ctl_info *edac_dev =
+		(struct edac_device_ctl_info *)edac;
+	u32 tmp1, tmp2, count0, count1;
+	int i;
+
+	/* Read cp15 for CPUMERRSR counts */
+	asm volatile("mrrc\tp15, 0, %0, %1, c15" : "=r"(tmp1),
+		"=r"(tmp2));
+	if (tmp1 & 0x80000000) {
+		count0 = (tmp2) & 0x000000ff;
+		count1 = ((tmp2) & 0x0000ff00) >> 8;
+
+		/* increment correctable error counts */
+		for (i = 0; i < count0+count1; i++) {
+			edac_device_handle_ce(edac_dev, 0,
+				smp_processor_id(), edac_dev->ctl_name);
+		}
+
+		/* Clear the valid bit */
+		tmp1 = 0x80000000;
+		tmp2 = 0;
+		asm volatile("mcrr\tp15, 0, %0, %1, c15" : : "r"(tmp1),
+			"r"(tmp2));
+	}
+	if (tmp2 & 0x80000000) {
+		pr_info("CPU uncorrectable error\n");
+		machine_restart(NULL);
+	}
+}
+
+
+/* Check for CPU Errors */
+static void lsi_cpu_error_check(struct edac_device_ctl_info *edac_dev)
+{
+	/* execute on current cpu */
+	log_cpumerrsr(edac_dev);
+
+	/* send ipis to execute on other cpus */
+	smp_call_function(log_cpumerrsr, edac_dev, 1);
+
+}
+
+void log_l2merrsr(void *edac)
+{
+	struct edac_device_ctl_info *edac_dev =
+			(struct edac_device_ctl_info *)edac;
+	u32 tmp1, tmp2, count0, count1;
+	int i;
+
+	/* Read cp15 for L2MERRSR counts */
+	asm volatile("mrrc\tp15, 1, %0, %1, c15" : "=r"(tmp1),
+		"=r"(tmp2));
+	if (tmp1 & 0x80000000) {
+		count0 = (tmp2) & 0x000000ff;
+		count1 = ((tmp2) & 0x0000ff00) >> 8;
+
+		/* increment correctable error counts */
+		for (i = 0; i < count0+count1; i++) {
+			edac_device_handle_ce(edac_dev, 0,
+				smp_processor_id()/CORES_PER_CLUSTER,
+				edac_dev->ctl_name);
+		}
+
+		/* Clear the valid bit */
+		tmp1 = 0x80000000;
+		tmp2 = 0;
+		asm volatile("mcrr\tp15, 1, %0, %1, c15" : : "r"(tmp1),
+			"r"(tmp2));
+	}
+	if (tmp2 & 0x80000000) {
+		pr_info("L2 uncorrectable error\n");
+		machine_restart(NULL);
+	}
+}
+
+/* Check for L2 Errors */
+static void lsi_l2_error_check(struct edac_device_ctl_info *edac_dev)
+{
+	/* 4 cores per cluster */
+	int nr_cluster_ids = ((nr_cpu_ids - 1) / CORES_PER_CLUSTER) + 1;
+	int i, j, cpu;
+
+	/* execute on current cpu */
+	log_l2merrsr(edac_dev);
+
+	for (i = 0; i < nr_cluster_ids; i++) {
+		/* No need to run on local cluster. */
+		if (i == (smp_processor_id() / CORES_PER_CLUSTER))
+			continue;
+		/*
+		 * Have some core in each cluster execute this,
+		 * Start with the first core on that cluster.
+		 */
+		cpu = i * CORES_PER_CLUSTER;
+		for (j = cpu; j < cpu + CORES_PER_CLUSTER; j++) {
+			if (cpu_online(j)) {
+				smp_call_function_single(j, log_l2merrsr,
+					edac_dev, 1);
+				break;
+			}
+		}
+	}
+}
+
+/* Check for L3 Errors */
+static void lsi_l3_error_check(struct edac_device_ctl_info *edac_dev)
+{
+	unsigned long regVal1, regVal2, clearVal;
+	unsigned count = 0;
+	int i, instance;
+	struct lsi_edac_dev_info *dev_info;
+
+	dev_info = (struct lsi_edac_dev_info *) edac_dev->pvt_info;
+
+	for (instance = 0; instance < 8; instance++) {
+		ncr_read(dev_info->dickens_L3+instance, 0x400, 4, &regVal1);
+		ncr_read(dev_info->dickens_L3+instance, 0x404, 4, &regVal2);
+		/* First error valid */
+		if (regVal2 & 0x40000000) {
+			if (regVal2 & 0x30000000) {
+				/* Fatal error */
+				pr_info("L3 uncorrectable error\n");
+				machine_restart(NULL);
+			}
+			count = (regVal2 & 0x07fff800) >> 11;
+			for (i = 0; i < count; i++)
+				edac_device_handle_ce(edac_dev, 0,
+					instance, edac_dev->ctl_name);
+			clearVal = 0x48000000;
+			/* clear the valid bit */
+			ncr_write(NCP_REGION_ID(0x1e0, 0x20+instance),
+				0x484, 4, &clearVal);
+		}
+	}
+}
+
+/* Check for SysMem Errors */
+static void lsi_sm_error_check(struct edac_device_ctl_info *edac_dev)
+{
+	unsigned long sm0_regVal, sm1_regVal, clearVal;
+	struct lsi_edac_dev_info *dev_info;
+
+	dev_info = (struct lsi_edac_dev_info *) edac_dev->pvt_info;
+
+	/* SM0 is instance 0 */
+	ncr_read(dev_info->sm0_region, 0x410, 4, &sm0_regVal);
+	if (sm0_regVal & 0x8) {
+		/* single bit and multiple bit correctable errors */
+		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
+		/* Clear bits */
+		clearVal = 0x8;
+		ncr_write(dev_info->sm0_region, 0x548, 4, &clearVal);
+	}
+	if (sm0_regVal & 0x40) {
+		/* single bit and multiple bit uncorrectable errors */
+		pr_info("SM0 uncorrectable error\n");
+		machine_restart(NULL);
+	}
+
+	/* SM1 is instance 1 */
+	ncr_read(dev_info->sm1_region, 0x410, 4, &sm1_regVal);
+	if (sm1_regVal & 0x8) {
+		/* single bit and multiple bit correctable errors */
+		edac_device_handle_ce(edac_dev, 0, 1, edac_dev->ctl_name);
+		/* Clear bits */
+		clearVal = 0x8;
+		ncr_write(dev_info->sm1_region, 0x548, 4, &clearVal);
+	}
+	if (sm1_regVal & 0x40) {
+		/* single bit and multiple bit uncorrectable errors */
+		pr_info("SM1 uncorrectable error\n");
+		machine_restart(NULL);
+	}
+}
+
+
+static struct lsi_edac_dev_info lsi_edac_devs[] = {
+	{
+		.ctl_name = "LSI_CPU",
+		.blk_name = "cpumerrsr",
+		.init = lsi_error_init,
+		.exit = lsi_error_exit,
+		.check = lsi_cpu_error_check
+	},
+	{
+		.ctl_name = "LSI_L2",
+		.blk_name = "l2merrsr",
+		.init = lsi_error_init,
+		.exit = lsi_error_exit,
+		.check = lsi_l2_error_check
+	},
+	{
+		.ctl_name = "LSI_L3",
+		.blk_name = "l3merrsr",
+		.init = lsi_error_init,
+		.exit = lsi_error_exit,
+		.check = lsi_l3_error_check
+	},
+	{
+		.ctl_name = "LSI_SM",
+		.blk_name = "ECC",
+		.init = lsi_error_init,
+		.exit = lsi_error_exit,
+		.check = lsi_sm_error_check
+	},
+	{0} /* Terminated by NULL */
+};
+
+
+
+/* static void lsi_add_edac_devices(void __iomem *vbase) */
+static void lsi_add_edac_devices(struct platform_device *pdev)
+{
+	struct lsi_edac_dev_info *dev_info;
+	/* 4 cores per cluster */
+	int nr_cluster_ids = ((nr_cpu_ids - 1) / CORES_PER_CLUSTER) + 1;
+	struct resource *io0, *io1, *io2;
+
+	for (dev_info = &lsi_edac_devs[0]; dev_info->init; dev_info++) {
+		dev_info->pdev = platform_device_register_simple(
+		dev_info->ctl_name, 0, NULL, 0);
+		if (IS_ERR(dev_info->pdev)) {
+			pr_info("Can't register platform device for %s\n",
+				dev_info->ctl_name);
+			continue;
+		}
+		/*
+		 * Don't have to allocate private structure but
+		 * make use of cpc925_devs[] instead.
+		 */
+		dev_info->edac_idx = edac_device_alloc_index();
+
+		if (strcmp(dev_info->ctl_name, "LSI_SM") == 0) {
+			io0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+			if (!io0) {
+				dev_err(&pdev->dev, "Unable to get mem resource\n");
+				goto err2;
+			}
+			io1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+			if (!io1) {
+				dev_err(&pdev->dev, "Unable to get mem resource\n");
+				goto err2;
+			}
+
+			dev_info->sm0_region = io0->start;
+			dev_info->sm1_region = io1->start;
+
+			dev_info->edac_dev =
+			edac_device_alloc_ctl_info(0, dev_info->ctl_name,
+			1, dev_info->blk_name, 2, 0,
+				NULL, 0, dev_info->edac_idx);
+
+		} else if (strcmp(dev_info->ctl_name, "LSI_L3") == 0) {
+			io2 = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+			if (!io2) {
+				dev_err(&pdev->dev, "Unable to get mem resource\n");
+				goto err2;
+			}
+
+			dev_info->dickens_L3 = io2->start;
+			dev_info->edac_dev =
+			edac_device_alloc_ctl_info(0, dev_info->ctl_name,
+			1, dev_info->blk_name, 8, 0, NULL, 0,
+			dev_info->edac_idx);
+		} else if (strcmp(dev_info->ctl_name, "LSI_CPU") == 0) {
+			dev_info->edac_dev =
+			edac_device_alloc_ctl_info(0, dev_info->ctl_name,
+			1, dev_info->blk_name, num_possible_cpus(), 0, NULL,
+			0, dev_info->edac_idx);
+		} else if (strcmp(dev_info->ctl_name, "LSI_L2") == 0) {
+			dev_info->edac_dev =
+			edac_device_alloc_ctl_info(0, dev_info->ctl_name,
+				1, dev_info->blk_name, nr_cluster_ids, 0, NULL,
+				0, dev_info->edac_idx);
+		} else {
+			dev_info->edac_dev =
+			edac_device_alloc_ctl_info(0, dev_info->ctl_name,
+			1, dev_info->blk_name, 1, 0,
+			NULL, 0, dev_info->edac_idx);
+		}
+		if (!dev_info->edac_dev) {
+			pr_info("No memory for edac device\n");
+			goto err1;
+		}
+
+		dev_info->edac_dev->pvt_info = dev_info;
+		dev_info->edac_dev->dev = &dev_info->pdev->dev;
+		dev_info->edac_dev->ctl_name = dev_info->ctl_name;
+		dev_info->edac_dev->mod_name = LSI_EDAC_MOD_STR;
+		dev_info->edac_dev->dev_name = dev_name(&dev_info->pdev->dev);
+
+		if (edac_op_state == EDAC_OPSTATE_POLL)
+			dev_info->edac_dev->edac_check = dev_info->check;
+
+		if (dev_info->init)
+			dev_info->init(dev_info);
+
+		if (edac_device_add_device(dev_info->edac_dev) > 0) {
+			pr_info("Unable to add edac device for %s\n",
+					dev_info->ctl_name);
+			goto err2;
+		}
+		pr_info("Successfully added edac device for %s\n",
+				dev_info->ctl_name);
+
+		continue;
+err2:
+		if (dev_info->exit)
+			dev_info->exit(dev_info);
+		edac_device_free_ctl_info(dev_info->edac_dev);
+err1:
+		platform_device_unregister(dev_info->pdev);
+	}
+}
+
+
+static int lsi_edac_probe(struct platform_device *pdev)
+{
+	edac_op_state = EDAC_OPSTATE_POLL;
+	lsi_add_edac_devices(pdev);
+	return 0;
+}
+
+static int lsi_edac_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static struct of_device_id lsi_edac_match[] = {
+	{
+	.type   = "edac",
+	.compatible = "lsi,edac",
+	},
+	{},
+};
+
+static struct platform_driver lsi_edac_driver = {
+	.probe = lsi_edac_probe,
+	.remove = lsi_edac_remove,
+	.driver = {
+		.name = "lsi_edac",
+		.of_match_table = lsi_edac_match,
+	}
+};
+
+module_platform_driver(lsi_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sangeetha Rao <sangeetha.rao at avagotech.com>");
-- 
1.8.1.4



More information about the linux-yocto mailing list