[linux-yocto] [PATCH 35/35] arch/arm/mach-axxia: fixed hang in preempt kernel

Daniel Dragomir daniel.dragomir at windriver.com
Thu Nov 13 09:20:02 PST 2014


From: John Jacques <john.jacques at lsi.com>

Fixed the hang in the affinity code. The selection of a cpu on a
clear affinity did not have the right mask, causing the affinity
to be moved to a cpu that was not powered up.

Fixed the intermittent failure to power up the cluster by
setting the L2CTLR, L2ACTLR and ACTLR2 registers. Also updated
the power on GIC process to only power on the current cpu when
powering up a GIC cluster

Signed-off-by: John Jacques <john.jacques at lsi.com>
---
 arch/arm/mach-axxia/axxia-gic.c            | 242 ++++++++++++++++-------------
 arch/arm/mach-axxia/hotplug.c              |  30 ++--
 arch/arm/mach-axxia/lsi_power_management.c | 189 +++++++++++++---------
 arch/arm/mach-axxia/lsi_power_management.h |   9 +-
 arch/arm/mach-axxia/platsmp.c              |  36 ++---
 5 files changed, 282 insertions(+), 224 deletions(-)

diff --git a/arch/arm/mach-axxia/axxia-gic.c b/arch/arm/mach-axxia/axxia-gic.c
index 6345a99..34764b0 100644
--- a/arch/arm/mach-axxia/axxia-gic.c
+++ b/arch/arm/mach-axxia/axxia-gic.c
@@ -228,22 +228,23 @@ static void axxia_gic_flush_affinity_queue(struct work_struct *dummy)
 	void *qdata;
 	struct gic_rpc_data *rpc_data;
 
-
 	while (axxia_get_item(&axxia_circ_q, &qdata) != -1) {
+
 		rpc_data = (struct gic_rpc_data *) qdata;
 		if (rpc_data->func_mask == SET_AFFINITY) {
-			smp_call_function_single(rpc_data->cpu,
-					gic_set_affinity_remote,
-					qdata, 1);
-
+			if (cpu_online(rpc_data->cpu)) {
+				smp_call_function_single(rpc_data->cpu, gic_set_affinity_remote,
+						qdata, 1);
+			}
 		} else if (rpc_data->func_mask == CLR_AFFINITY) {
-
-			smp_call_function_single(rpc_data->cpu,
-					gic_clr_affinity_remote,
-					qdata, 1);
+			if (cpu_online(rpc_data->cpu)) {
+				smp_call_function_single(rpc_data->cpu, gic_clr_affinity_remote,
+						qdata, 1);
+			}
 		}
 		kfree(qdata);
 	}
+
 }
 
 /*
@@ -482,35 +483,45 @@ static int gic_retrigger(struct irq_data *d)
 	return -ENXIO;
 }
 
-static int _gic_clear_affinity(struct irq_data *d, u32 cpu, bool update_enable)
+static void gic_set_irq_target(void __iomem *dist_base,
+		u32 irqid, u32 cpu, bool set)
 {
-
-	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
-	unsigned int shift = (gic_irq(d) % 4) * 8;
+	void __iomem *reg;
+	unsigned int shift;
 	u32 val;
 	u32 mask = 0;
-	u32 enable_mask, enable_offset;
+	u32 bit;
 
+	reg =  dist_base + GIC_DIST_TARGET + (irqid & ~3);
+	shift = (irqid % 4) * 8;
 	mask = 0xff << shift;
 
-	enable_mask = 1 << (gic_irq(d) % 32);
-	enable_offset = 4 * (gic_irq(d) / 32);
+	val = readl_relaxed(reg) & ~mask;
+
+	if (!set)
+		/* Clear affinity, mask IRQ. */
+		writel_relaxed(val, reg);
+	else {
+		bit = 1 << ((cpu_logical_map(cpu) % CORES_PER_CLUSTER) + shift);
+		writel_relaxed(val | bit, reg);
+	}
+
+}
+
+static int _gic_clear_affinity(struct irq_data *d, u32 cpu, bool update_enable)
+{
+
+	u32 enable_mask, enable_offset;
 
 	raw_spin_lock(&irq_controller_lock);
 
-	val = readl_relaxed(reg) & ~mask;
-	/* Clear affinity, mask IRQ. */
-	writel_relaxed(val, reg);
+	gic_set_irq_target(gic_dist_base(d), gic_irq(d), cpu, false);
 
 	if (update_enable) {
-
-		writel_relaxed(enable_mask,
-				gic_data_dist_base(&gic_data) + GIC_DIST_PENDING_CLEAR + enable_offset);
-		writel_relaxed(enable_mask,
-				gic_data_dist_base(&gic_data) + GIC_DIST_ACTIVE_CLEAR + enable_offset);
+		enable_mask = 1 << (gic_irq(d) % 32);
+		enable_offset = 4 * (gic_irq(d) / 32);
 		writel_relaxed(enable_mask,
 				gic_data_dist_base(&gic_data) + GIC_DIST_ENABLE_CLEAR + enable_offset);
-
 	}
 
 	raw_spin_unlock(&irq_controller_lock);
@@ -523,33 +534,17 @@ static int _gic_set_affinity(struct irq_data *d,
 			     u32 cpu,
 			     bool update_enable)
 {
-	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
-	unsigned int shift = (gic_irq(d) % 4) * 8;
-	u32 val;
-	u32 mask = 0;
-	u32 bit;
 	u32 enable_mask, enable_offset;
 
-	/*
-	 * Normalize the cpu number as seen by Linux (0-15) to a
-	 * number as seen by a cluster (0-3).
-	 */
-	bit = 1 << ((cpu_logical_map(cpu) % CORES_PER_CLUSTER) + shift);
-	mask = 0xff << shift;
-
-	enable_mask = 1 << (gic_irq(d) % 32);
-	enable_offset = 4 * (gic_irq(d) / 32);
-
 	raw_spin_lock(&irq_controller_lock);
 
-	val = readl_relaxed(reg) & ~mask;
-	/* Set affinity, mask IRQ. */
-	writel_relaxed(val | bit, reg);
+	gic_set_irq_target(gic_dist_base(d), gic_irq(d), cpu, true);
 
 	if (update_enable) {
+		enable_mask = 1 << (gic_irq(d) % 32);
+		enable_offset = 4 * (gic_irq(d) / 32);
 		writel_relaxed(enable_mask,
-			gic_data_dist_base(&gic_data) + GIC_DIST_ENABLE_SET
-					+ enable_offset);
+				gic_data_dist_base(&gic_data) + GIC_DIST_ENABLE_SET + enable_offset);
 	}
 
 	raw_spin_unlock(&irq_controller_lock);
@@ -580,11 +575,11 @@ static int gic_set_affinity(struct irq_data *d,
 			    const struct cpumask *mask_val,
 			    bool force)
 {
-	u32 pcpu = cpu_logical_map(smp_processor_id());
-	unsigned int irqid = gic_irq(d);
-	struct cpumask *affinity_mask = (struct cpumask *)mask_val;
+	u32 pcpu;
+	unsigned int irqid;
+	struct cpumask *affinity_mask;
 	u32 mask;
-	u32 oldcpu = irq_cpuid[irqid];
+	u32 oldcpu;
 	struct gic_rpc_data *gic_rpc_ptr;
 	int rval;
 	bool new_same_core = false;
@@ -597,6 +592,12 @@ static int gic_set_affinity(struct irq_data *d,
 
 	BUG_ON(!irqs_disabled());
 
+
+	pcpu = cpu_logical_map(smp_processor_id());
+	irqid = gic_irq(d);
+	affinity_mask = (struct cpumask *)mask_val;
+	oldcpu = irq_cpuid[irqid];
+
 	if (irqid >= MAX_GIC_INTERRUPTS)
 		return -EINVAL;
 
@@ -604,7 +605,6 @@ static int gic_set_affinity(struct irq_data *d,
 	if ((irqid >= IPI0_CPU0) && (irqid < MAX_AXM_IPI_NUM))
 		return IRQ_SET_MASK_OK;
 
-
 	if (force)
 		add_cpu = cpumask_any(cpu_online_mask);
 	else
@@ -650,25 +650,21 @@ static int gic_set_affinity(struct irq_data *d,
 		}
 	}
 
-	mutex_lock(&affinity_lock);
-
-	/* Update Axxia IRQ affinity table with the new physical CPU number. */
-	irq_cpuid[irqid] = cpu_logical_map(add_cpu);
 
 	/*
 	 * We clear first to make sure the affinity mask always has a bit set,
 	 * especially when the two cpus are in the same cluster.
 	 */
 	if (irqid != IRQ_PMU) {
-
 		if (clear_needed == AFFINITY_CLEAR_LOCAL) {
 
 			_gic_clear_affinity(d, del_cpu, update_enable);
 
 		} else if (clear_needed == AFFINITY_CLEAR_OTHER_CLUSTER) {
 
-			mask = 0xf << (oldcpu / CORES_PER_CLUSTER);
-			del_cpu = cpumask_any_and((struct cpumask *)&mask, cpu_online_mask);
+			mask = 0xf << ((oldcpu / CORES_PER_CLUSTER) * 4);
+			del_cpu = cpumask_any_and((struct cpumask *)&mask,
+					cpu_online_mask);
 
 			if (del_cpu < nr_cpu_ids) {
 
@@ -685,19 +681,23 @@ static int gic_set_affinity(struct irq_data *d,
 				gic_rpc_ptr->oldcpu = oldcpu;
 				gic_rpc_ptr->d = d;
 				gic_rpc_ptr->update_enable = update_enable;
+				get_cpu();
 				rval = axxia_put_item(&axxia_circ_q, (void *) gic_rpc_ptr);
+				put_cpu();
 				if (rval) {
 					pr_err(
 							"ERROR: failed to add CLR_AFFINITY request for cpu: %d\n",
 							del_cpu);
 					kfree((void *) gic_rpc_ptr);
+					mutex_unlock(&affinity_lock);
+					return rval;
 				}
 				schedule_work_on(0, &axxia_gic_affinity_work);
-			}
+			} else
+				pr_err("ERROR: no CPUs left\n");
 		}
 	}
 
-
 	if (set_needed == AFFINITY_SET_LOCAL) {
 
 		_gic_set_affinity(d, add_cpu, update_enable);
@@ -716,19 +716,22 @@ static int gic_set_affinity(struct irq_data *d,
 		gic_rpc_ptr->cpu = add_cpu;
 		gic_rpc_ptr->update_enable = update_enable;
 		gic_rpc_ptr->d = d;
+		get_cpu();
 		rval = axxia_put_item(&axxia_circ_q, (void *) gic_rpc_ptr);
+		put_cpu();
 		if (rval) {
 			pr_err("ERROR: failed to add SET_AFFINITY request for cpu: %d\n",
 					add_cpu);
 			kfree((void *) gic_rpc_ptr);
+			mutex_unlock(&affinity_lock);
+			return rval;
 		}
 		schedule_work_on(0, &axxia_gic_affinity_work);
 
 	}
 
-
-	mutex_unlock(&affinity_lock);
-
+	/* Update Axxia IRQ affinity table with the new physical CPU number. */
+	irq_cpuid[irqid] = cpu_logical_map(add_cpu);
 
 	return IRQ_SET_MASK_OK;
 }
@@ -1206,10 +1209,9 @@ static void __init gic_axxia_init(struct gic_chip_data *gic)
 		writel_relaxed(cpumask, ipi_mask_reg_base + 0x40 + i * 4);
 }
 
-static void __cpuinit gic_dist_init(struct gic_chip_data *gic)
+static void  gic_dist_init(struct gic_chip_data *gic)
 {
 	unsigned int i;
-	u32 cpumask;
 	unsigned int gic_irqs = gic->gic_irqs;
 	void __iomem *base = gic_data_dist_base(gic);
 	u32 cpu = cpu_logical_map(smp_processor_id());
@@ -1220,17 +1222,17 @@ static void __cpuinit gic_dist_init(struct gic_chip_data *gic)
 	u32 enableoff;
 	u32 val;
 	u32 this_cluster = get_cluster_id();
+	u32 powered_on;
+	u32 ccpu;
 
 	/* Initialize the distributor interface once per CPU cluster */
 	if ((test_and_set_bit(get_cluster_id(), &gic->dist_init_done)) && (!cluster_power_up[this_cluster]))
 		return;
 
-	cpumask = 1 << cpu;
-	cpumask |= cpumask << 8;
-	cpumask |= cpumask << 16;
-
 	writel_relaxed(0, base + GIC_DIST_CTRL);
 
+	/*################################# CONFIG IRQS ####################################*/
+
 	/*
 	 * Set all global interrupts to be level triggered, active low.
 	 */
@@ -1238,13 +1240,23 @@ static void __cpuinit gic_dist_init(struct gic_chip_data *gic)
 		writel_relaxed(0, base + GIC_DIST_CONFIG + i * 4 / 16);
 
 	/*
-	 * Set all global interrupts to this CPU only.
-	 * (Only do this for the first core on cluster 0).
+	 * Set Axxia IPI interrupts to be edge triggered.
 	 */
-	if (cpu == 0)
-		for (i = 32; i < gic_irqs; i += 4)
-			writel_relaxed(cpumask,
-				       base + GIC_DIST_TARGET + i * 4 / 4);
+	for (i = IPI0_CPU0; i < MAX_AXM_IPI_NUM; i++) {
+		confmask = 0x2 << ((i % 16) * 2);
+		confoff = (i / 16) * 4;
+		val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
+		val |= confmask;
+		writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
+	}
+
+	/*################################# PRIORITY  ####################################*/
+	/*
+	 * Set priority on PPI and SGI interrupts
+	 */
+	for (i = 0; i < 32; i += 4)
+		writel_relaxed(0xa0a0a0a0,
+				base + GIC_DIST_PRI + i * 4 / 4);
 
 	/*
 	 * Set priority on all global interrupts.
@@ -1252,52 +1264,43 @@ static void __cpuinit gic_dist_init(struct gic_chip_data *gic)
 	for (i = 32; i < gic_irqs; i += 4)
 		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
 
+
+	/*################################# TARGET ####################################*/
 	/*
-	 * Disable all interrupts.  Leave the PPI and SGIs alone
-	 * as these enables are banked registers.
+	 * Set all global interrupts to this CPU only.
+	 * (Only do this for the first core on cluster 0).
 	 */
-	for (i = 32; i < gic_irqs; i += 32) {
-		writel_relaxed(0xffffffff,
-			       base + GIC_DIST_ACTIVE_CLEAR + i * 4 / 32);
-		writel_relaxed(0xffffffff,
-			       base + GIC_DIST_PENDING_CLEAR + i * 4 / 32);
-		writel_relaxed(0xffffffff,
-			       base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
-	}
+	if (cpu == 0)
+		for (i = 32; i < gic_irqs; i += 4)
+			writel_relaxed(0x01010101, base + GIC_DIST_TARGET + i * 4 / 4);
 
 	/*
 	 * Set Axxia IPI interrupts for all CPUs in this cluster.
 	 */
+	powered_on = (~pm_cpu_powered_down) & 0xFFFF;
 	for (i = IPI0_CPU0; i < MAX_AXM_IPI_NUM; i++) {
 		cpumask_8 = 1 << ((i - IPI0_CPU0) % 4);
-		writeb_relaxed(cpumask_8, base + GIC_DIST_TARGET + i);
-	}
-
-	/*
-	 * Set the PMU IRQ to the first cpu in this cluster.
-	 */
-	writeb_relaxed(0x01, base + GIC_DIST_TARGET + IRQ_PMU);
-
-	/*
-	 * Set Axxia IPI interrupts to be edge triggered.
-	 */
-	for (i = IPI0_CPU0; i < MAX_AXM_IPI_NUM; i++) {
-		confmask = 0x2 << ((i % 16) * 2);
-		confoff = (i / 16) * 4;
-		val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
-		val |= confmask;
-		writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
+		ccpu = (this_cluster * 4) + ((i - IPI0_CPU0) % CORES_PER_CLUSTER);
+		if ((1 << ccpu) & powered_on)
+			writeb_relaxed(cpumask_8, base + GIC_DIST_TARGET + i);
+		else
+			writeb_relaxed(0x00, base + GIC_DIST_TARGET + i);
 	}
 
+	/*################################# ENABLE IRQS ####################################*/
 	/*
 	 * Do the initial enable of the Axxia IPI interrupts here.
 	 * NOTE: Writing a 0 to this register has no effect, so
 	 * no need to read and OR in bits, just writing is OK.
 	 */
+
+	powered_on = (~pm_cpu_powered_down) & 0xFFFF;
 	for (i = IPI0_CPU0; i < MAX_AXM_IPI_NUM; i++) {
 		enablemask = 1 << (i % 32);
 		enableoff = (i / 32) * 4;
-		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
+		ccpu = (this_cluster * 4) + ((i - IPI0_CPU0) % CORES_PER_CLUSTER);
+		if ((1 << ccpu) & powered_on)
+			writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
 	}
 
 	/*
@@ -1307,32 +1310,47 @@ static void __cpuinit gic_dist_init(struct gic_chip_data *gic)
 	enableoff = (IRQ_PMU / 32) * 4;
 	writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
 
+
 	writel_relaxed(1, base + GIC_DIST_CTRL);
+
 }
 
-static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
+static void  gic_cpu_init(struct gic_chip_data *gic)
 {
 	void __iomem *dist_base = gic_data_dist_base(gic);
 	void __iomem *base = gic_data_cpu_base(gic);
 	int i;
-
+	u32 enablemask;
+	u32 enableoff;
+	u32 ccpu;
+	u32 cpu = smp_processor_id();
+	u32 cluster = cpu / CORES_PER_CLUSTER;
+	u32 cpumask_8;
 
 	/*
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, and also all SGI interrupts (we don't use
 	 * SGIs in the Axxia).
 	 */
-
 	writel_relaxed(0xffffffff, dist_base + GIC_DIST_ENABLE_CLEAR);
 
-	/*
-	 * Set priority on PPI and SGI interrupts
-	 */
-	for (i = 0; i < 32; i += 4)
-		writel_relaxed(0xa0a0a0a0,
-			       dist_base + GIC_DIST_PRI + i * 4 / 4);
+	if (!cluster_power_up[cluster]) {
+		writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+		for (i = IPI0_CPU0; i < MAX_AXM_IPI_NUM; i++) {
+			cpumask_8 = 1 << ((i - IPI0_CPU0) % 4);
+			enablemask = 1 << (i % 32);
+			enableoff = (i / 32) * 4;
+			ccpu = (cluster * 4) + ((i - IPI0_CPU0) % CORES_PER_CLUSTER);
+			if (ccpu == cpu) {
+				writeb_relaxed(cpumask_8, dist_base + GIC_DIST_TARGET + i);
+				writel_relaxed(enablemask, dist_base + GIC_DIST_ENABLE_SET + enableoff);
+			}
+		}
+		writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+	}
 
 	writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
+
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
@@ -1526,7 +1544,7 @@ void __init axxia_gic_init_bases(int irq_start,
 
 }
 
-void __cpuinit axxia_gic_secondary_init(void)
+void  axxia_gic_secondary_init(void)
 {
 	struct gic_chip_data *gic = &gic_data;
 
diff --git a/arch/arm/mach-axxia/hotplug.c b/arch/arm/mach-axxia/hotplug.c
index d44fbb3..5def7c3 100644
--- a/arch/arm/mach-axxia/hotplug.c
+++ b/arch/arm/mach-axxia/hotplug.c
@@ -85,13 +85,19 @@ static inline void pm_L2_logical_shutdown(u32 cpu)
 			: "Ir" (0x400)
 			: "cc");
 
+	asm volatile(
+	"	mrc		p15, 1, %0, c15, c0, 4\n"
+	"	orr	%0, %0, %1\n"
+	"	mcr		p15, 1, %0, c15, c0, 4\n"
+	: "=&r" (val)
+	: "Ir" (0x1)
+	: "cc");
+
 	isb();
 	dsb();
 
-	/* Clear and invalidate all L1 and L2 data cache */
 	flush_cache_all();
 
-
 	/* Turn the DBG Double Lock quiet */
 	asm volatile(
 			/*
@@ -163,6 +169,8 @@ static inline void cpu_leave_lowpower(void)
 			: "=&r" (v)
 			: "Ir" (CR_C), "Ir" (0x40)
 			: "cc");
+	isb();
+	dsb();
 }
 
 static void __ref platform_do_lowpower(unsigned int cpu, int *spurious)
@@ -211,7 +219,9 @@ int axxia_platform_cpu_kill(unsigned int cpu)
 {
 
 #ifdef CONFIG_HOTPLUG_CPU_COMPLETE_POWER_DOWN
+	get_cpu();
 	pm_cpu_shutdown(cpu);
+	put_cpu();
 #endif
 	return 1;
 }
@@ -227,21 +237,6 @@ void axxia_platform_cpu_die(unsigned int cpu)
 #ifdef CONFIG_HOTPLUG_CPU_COMPLETE_POWER_DOWN
 	bool last_cpu;
 
-	int timeout;
-	timeout = 30;
-
-	/* make sure no migrations are happening */
-	while (!axxia_is_empty(&axxia_circ_q)) {
-
-		if (timeout-- == 0)
-			break;
-
-		mdelay(1);
-	}
-
-	if (timeout == 0)
-		pr_err("ERROR: tried to shut down and Q was still full\n");
-
 	last_cpu = pm_cpu_last_of_cluster(cpu);
 	if (last_cpu)
 		pm_L2_logical_shutdown(cpu);
@@ -251,7 +246,6 @@ void axxia_platform_cpu_die(unsigned int cpu)
 	for (;;)
 		wfi();
 
-
 #else /* CPU low power mode */
 
 	int spurious = 0;
diff --git a/arch/arm/mach-axxia/lsi_power_management.c b/arch/arm/mach-axxia/lsi_power_management.c
index 3ae4afc..6deea07 100644
--- a/arch/arm/mach-axxia/lsi_power_management.c
+++ b/arch/arm/mach-axxia/lsi_power_management.c
@@ -53,6 +53,13 @@ PORESET_CLUSTER1,
 PORESET_CLUSTER2,
 PORESET_CLUSTER3 };
 
+static const u32 cluster_to_mask[MAX_CLUSTER] = {
+		IPI0_MASK,
+		IPI1_MASK,
+		IPI2_MASK,
+		IPI3_MASK
+};
+
 static const u32 ipi_register[MAX_IPI] = {
 		NCP_SYSCON_MASK_IPI0,
 		NCP_SYSCON_MASK_IPI1,
@@ -92,6 +99,7 @@ u32 pm_cpu_powered_down;
 
 /*======================= LOCAL FUNCTIONS ==============================*/
 static void pm_set_bits_syscon_register(u32 reg, u32 data);
+static void pm_or_bits_syscon_register(u32 reg, u32 data);
 static void pm_clear_bits_syscon_register(u32 reg, u32 data);
 static bool pm_test_for_bit_with_timeout(u32 reg, u32 bit);
 static bool pm_wait_for_bit_clear_with_timeout(u32 reg,
@@ -237,6 +245,11 @@ bool pm_cpu_last_of_cluster(u32 cpu)
 
 static void pm_set_bits_syscon_register(u32 reg, u32 data)
 {
+	writel(data, syscon + reg);
+}
+
+static void pm_or_bits_syscon_register(u32 reg, u32 data)
+{
 	u32 tmp;
 
 	tmp = readl(syscon + reg);
@@ -244,6 +257,7 @@ static void pm_set_bits_syscon_register(u32 reg, u32 data)
 	writel(tmp, syscon + reg);
 }
 
+
 static void pm_clear_bits_syscon_register(u32 reg, u32 data)
 {
 	u32 tmp;
@@ -419,23 +433,25 @@ dickens_power_up:
 	return rval;
 }
 
-static int pm_enable_ipi_interrupts(u32 cpu)
+static void pm_disable_ipi_interrupts(u32 cpu)
+{
+	pm_clear_bits_syscon_register(ipi_register[cpu], IPI_IRQ_MASK);
+}
+
+static void pm_enable_ipi_interrupts(u32 cpu)
 {
 
 	u32 i;
-	u32 cpumask = 1 << cpu;
 	u32 powered_on_cpu = (~(pm_cpu_powered_down) & IPI_IRQ_MASK);
 
-	/* Enable the CPU IPI */
 	pm_set_bits_syscon_register(ipi_register[cpu], powered_on_cpu);
 
-	for (i = 0; i < MAX_IPI; i++) {
+	for (i = 0; i < MAX_CPUS; i++) {
 		if ((1 << i) & powered_on_cpu)
-			pm_set_bits_syscon_register(ipi_register[i], cpumask);
+			pm_or_bits_syscon_register(ipi_register[i], (1 << cpu));
 	}
 
-	return 0;
-
+	return;
 }
 
 void pm_init_syscon(void)
@@ -483,7 +499,7 @@ void pm_cpu_shutdown(u32 cpu)
 	if (last_cpu) {
 
 		/* Disable all the interrupts to the cluster gic */
-		pm_set_bits_syscon_register(NCP_SYSCON_GIC_DISABLE, cluster_mask);
+		pm_or_bits_syscon_register(NCP_SYSCON_GIC_DISABLE, cluster_mask);
 
 		/* Remove the cluster from the Dickens coherency domain */
 		pm_dickens_logical_shutdown(cluster);
@@ -501,7 +517,7 @@ void pm_cpu_shutdown(u32 cpu)
 		}
 
 		/* Turn off the ACE */
-		pm_set_bits_syscon_register(NCP_SYSCON_PWR_ACEPWRDNRQ, cluster_mask);
+		pm_or_bits_syscon_register(NCP_SYSCON_PWR_ACEPWRDNRQ, cluster_mask);
 
 		/* Wait for ACE to complete power off */
 		success = pm_wait_for_bit_clear_with_timeout(NCP_SYSCON_PWR_NACEPWRDNACK, cluster);
@@ -512,7 +528,7 @@ void pm_cpu_shutdown(u32 cpu)
 		}
 
 		/* Isolate the cluster */
-		pm_set_bits_syscon_register(NCP_SYSCON_PWR_ISOLATEL2MISC, cluster_mask);
+		pm_or_bits_syscon_register(NCP_SYSCON_PWR_ISOLATEL2MISC, cluster_mask);
 
 		/* Wait for WFI L2 to go to standby */
 		success = pm_test_for_bit_with_timeout(NCP_SYSCON_PWR_STANDBYWFIL2, cluster);
@@ -555,21 +571,10 @@ int pm_cpu_powerup(u32 cpu)
 	u32 reqcpu = cpu_logical_map(cpu);
 	u32 cluster = reqcpu / CORES_PER_CLUSTER;
 	u32 cluster_mask = (0x01 << cluster);
-	u32 timeout;
 
 	pm_init_syscon();
 
 	/*
-	 * The key value has to be written before the CPU RST can be written.
-	 */
-	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
-	pm_set_bits_syscon_register(NCP_SYSCON_PWRUP_CPU_RST, cpu_mask);
-
-	/* Hold the CPU in reset */
-	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
-	pm_set_bits_syscon_register(NCP_SYSCON_HOLD_CPU, cpu_mask);
-
-	/*
 	 * Is this the first cpu of a cluster to come back on?
 	 * Then power up the L2 cache.
 	 */
@@ -581,11 +586,18 @@ int pm_cpu_powerup(u32 cpu)
 			pr_err("CPU: Failed the logical L2 power up\n");
 			goto pm_power_up;
 		}
-		pm_clear_bits_syscon_register(NCP_SYSCON_GIC_DISABLE, cluster_mask);
 		cluster_power_up[cluster] = true;
+		pm_clear_bits_syscon_register(NCP_SYSCON_GIC_DISABLE, cluster_mask);
+
+
+	} else {
+		/* Set the CPU into reset */
+		pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
+		pm_or_bits_syscon_register(NCP_SYSCON_PWRUP_CPU_RST, cpu_mask);
 
 	}
 
+
 	/*
 	 * Power up the CPU
 	 */
@@ -595,22 +607,6 @@ int pm_cpu_powerup(u32 cpu)
 		goto pm_power_up;
 	}
 
-	timeout = 30;
-
-	/* wait max 10 ms until cpuX is on */
-	while (!pm_cpu_active(cpu)) {
-
-		if (timeout-- == 0)
-			break;
-
-		mdelay(1);
-	}
-
-	if (timeout == 0) {
-		rval =  -ETIMEDOUT;
-		goto pm_power_up;
-	}
-
 	/*
 	 * The key value must be written before the CPU RST can be written.
 	 */
@@ -618,12 +614,6 @@ int pm_cpu_powerup(u32 cpu)
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWRUP_CPU_RST,	cpu_mask);
 
 	/*
-	 * The key value must be written before HOLD CPU can be written.
-	 */
-	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
-	pm_clear_bits_syscon_register(NCP_SYSCON_HOLD_CPU, cpu_mask);
-
-	/*
 	 * Clear the powered down mask
 	 */
 	pm_cpu_powered_down &= ~(1 << cpu);
@@ -631,8 +621,9 @@ int pm_cpu_powerup(u32 cpu)
 	/* Enable the CPU IPI */
 	pm_enable_ipi_interrupts(cpu);
 
-pm_power_up:
 
+
+pm_power_up:
 	iounmap(syscon);
 	return rval;
 }
@@ -654,19 +645,61 @@ inline void pm_cpu_logical_powerup(void)
 	"	mrc	p15, 0, %0, c1, c0, 0\n"
 	"	orr	%0, %0, %2\n"
 	"	mcr	p15, 0, %0, c1, c0, 0\n"
-	"	mrc	p15, 0, %0, c1, c0, 1\n"
-	"	orr	%0, %0, %3\n"
-	"	mcr	p15, 0, %0, c1, c0, 1\n"
 	  : "=&r" (v)
-	  : "Ir" (CR_C), "Ir" (CR_I), "Ir" (0x40)
+	  : "Ir" (CR_C), "Ir" (CR_I)
 	  : "cc");
 
+	/*
+	 *  Iniitalize the ACTLR2 register (all cores).
+	 */
+
 	asm volatile(
-	"       mrc     p15, 1, %0, c9, c0, 2\n"
+	"	mrc		p15, 1, %0, c15, c0, 4\n"
+	"	bic	%0, %0, %1\n"
+	"	mcr		p15, 1, %0, c15, c0, 4\n"
 	: "=&r" (v)
 	: "Ir" (0x1)
 	: "cc");
 
+	isb();
+	dsb();
+}
+
+inline void pm_cluster_logical_powerup(void)
+{
+	unsigned int v;
+
+	/*
+	 * Initialize the L2CTLR register (primary core in each cluster).
+	 */
+	asm volatile(
+	"	mrc	p15, 1, %0, c9, c0, 2\n"
+	"	orr	%0, %0, %1\n"
+	"	orr	%0, %0, %2\n"
+	"	mcr	p15, 1, %0, c9, c0, 2"
+	  : "=&r" (v)
+	  : "Ir" (0x01), "Ir" (0x1 << 21)
+	  : "cc");
+	isb();
+	dsb();
+
+	/*
+	 * Initialize the L2ACTLR register (primary core in each cluster).
+	 */
+	asm volatile(
+	"	mrc	p15, 1, r0, c15, c0, 0\n"
+	"	orr	%0, %0, %1\n"
+	"	orr	%0, %0, %2\n"
+	"	orr	%0, %0, %3\n"
+	"	orr	%0, %0, %4\n"
+	"	orr	%0, %0, %5\n"
+	"	mcr	p15, 1, %0, c15, c0, 0"
+	  : "=&r" (v)
+	  : "Ir" (0x1 << 3), "Ir" (0x1 << 7), "Ir" (0x1 << 12), "Ir" (0x1 << 13), "Ir" (0x1 << 14)
+	  : "cc");
+	isb();
+	dsb();
+
 }
 
 static int pm_cpu_physical_isolation_and_power_down(int cpu)
@@ -678,7 +711,7 @@ static int pm_cpu_physical_isolation_and_power_down(int cpu)
 	u32 mask = (0x01 << cpu);
 
 	/* Disable the CPU IPI */
-	pm_clear_bits_syscon_register(ipi_register[cpu], IPI_IRQ_MASK);
+	pm_disable_ipi_interrupts(cpu);
 
 	/* Initiate power down of the CPU's HS Rams */
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPURAM, mask);
@@ -692,12 +725,12 @@ static int pm_cpu_physical_isolation_and_power_down(int cpu)
 	}
 
 	/* Activate the CPU's isolation clamps */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_ISOLATECPU, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_ISOLATECPU, mask);
 
 	/* Initiate power down of the CPU logic */
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPUSTG2, mask);
 
-	udelay(10);
+	udelay(16);
 
 	/* Continue power down of the CPU logic */
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPUSTG1, mask);
@@ -722,7 +755,7 @@ static int pm_cpu_physical_connection_and_power_up(int cpu)
 	u32 mask = (0x01 << cpu);
 
 	/* Initiate power up of the CPU */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPUSTG1, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPUSTG1, mask);
 
 	/* Wait until CPU logic power is compete */
 	success = pm_wait_for_bit_clear_with_timeout(NCP_SYSCON_PWR_NPWRUPCPUSTG1_ACK, cpu);
@@ -733,12 +766,12 @@ static int pm_cpu_physical_connection_and_power_up(int cpu)
 	}
 
 	/* Continue stage 2 power up of the CPU*/
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPUSTG2, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPUSTG2, mask);
 
-	udelay(20);
+	udelay(16);
 
 	/* Initiate power up of HS Rams */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPURAM, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_PWRUPCPURAM, mask);
 
 	/* Wait until the RAM power up is complete */
 	success = pm_wait_for_bit_clear_with_timeout(NCP_SYSCON_PWR_NPWRUPCPURAM_ACK, cpu);
@@ -751,6 +784,8 @@ static int pm_cpu_physical_connection_and_power_up(int cpu)
 	/* Release the CPU's isolation clamps */
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWR_ISOLATECPU, mask);
 
+	udelay(16);
+
 power_up_cleanup:
 
 
@@ -765,7 +800,7 @@ static void pm_L2_isolation_and_power_down(int cluster)
 	u32 mask = (0x1 << cluster);
 
 	/* Enable the chip select for the cluster */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_CHIPSELECTEN, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_CHIPSELECTEN, mask);
 
 	/* Disable the hsram */
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2HSRAM, mask);
@@ -900,7 +935,7 @@ static int pm_L2_physical_connection_and_power_up(u32 cluster)
 	int rval = 0;
 
 	/* Power up stage 1 */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2LGCSTG1, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2LGCSTG1, mask);
 
 	/* Wait for the stage 1 power up to complete */
 	success = pm_wait_for_bit_clear_with_timeout(NCP_SYSCON_PWR_NPWRUPL2LGCSTG1_ACK, cluster);
@@ -911,13 +946,13 @@ static int pm_L2_physical_connection_and_power_up(u32 cluster)
 	}
 
 	/* Power on stage 2 */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2LGCSTG2, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2LGCSTG2, mask);
 
 	/* Set the chip select */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_CHIPSELECTEN, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_CHIPSELECTEN, mask);
 
 	/* Power up the snoop ram */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2HSRAM, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_PWRUPL2HSRAM, mask);
 
 	/* Wait for the stage 1 power up to complete */
 	success = pm_wait_for_bit_clear_with_timeout(NCP_SYSCON_PWR_NPWRUPL2HSRAM_ACK, cluster);
@@ -961,7 +996,7 @@ static int pm_L2_physical_connection_and_power_up(u32 cluster)
 
 		pm_set_bits_syscon_register(syscon,
 				NCP_SYSCON_PWR_PWRUPL21RAM_PWRUPL2RAM1, RAM_BANK0_MASK);
-		udelay(20);
+	udelay(20);
 		pm_set_bits_syscon_register(syscon,
 				NCP_SYSCON_PWR_PWRUPL21RAM_PWRUPL2RAM1, RAM_BANK1_LS_MASK);
 		pm_set_bits_syscon_register(syscon,
@@ -1059,28 +1094,37 @@ static int pm_L2_logical_powerup(u32 cluster, u32 cpu)
 
 	u32 mask = (0x1 << cluster);
 	int rval = 0;
+	u32 cluster_mask;
+
+	if (cluster == 0)
+		cluster_mask = 0xe;
+	else
+		cluster_mask = 0xf << (cluster * 4);
 
 	/* put the cluster into a cpu hold */
-	pm_set_bits_syscon_register(NCP_SYSCON_RESET_AXIS,
+	pm_or_bits_syscon_register(NCP_SYSCON_RESET_AXIS,
 			cluster_to_poreset[cluster]);
 
-	/* Allow the L2 to be reset */
-	pm_clear_bits_syscon_register(NCP_SYSCON_LRSTDISABLE, mask);
+	/*
+	 * The key value has to be written before the CPU RST can be written.
+	 */
+	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWRUP_CPU_RST, cluster_mask);
 
 	/* Hold the chip debug cluster */
 	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
-	pm_set_bits_syscon_register(NCP_SYSCON_HOLD_DBG, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_HOLD_DBG, mask);
 
 	/* Hold the L2 cluster */
 	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
-	pm_set_bits_syscon_register(NCP_SYSCON_HOLD_L2, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_HOLD_L2, mask);
+
 
 	/* Cluster physical power up */
 	rval = pm_L2_physical_connection_and_power_up(cluster);
 	if (rval)
 		goto exit_pm_L2_logical_powerup;
 
-
 	udelay(16);
 
 	/* take the cluster out of a cpu hold */
@@ -1090,7 +1134,7 @@ static int pm_L2_logical_powerup(u32 cluster, u32 cpu)
 	udelay(64);
 
 	/* Enable the system counter */
-	pm_set_bits_syscon_register(NCP_SYSCON_PWR_CSYSREQ_CNT, mask);
+	pm_or_bits_syscon_register(NCP_SYSCON_PWR_CSYSREQ_CNT, mask);
 
 	/* Release the L2 cluster */
 	pm_set_bits_syscon_register(NCP_SYSCON_KEY, VALID_KEY_VALUE);
@@ -1108,11 +1152,6 @@ static int pm_L2_logical_powerup(u32 cluster, u32 cpu)
 	/* start L2 */
 	pm_clear_bits_syscon_register(NCP_SYSCON_PWR_ACINACTM, mask);
 
-	/* Disable the L2 reset */
-	pm_set_bits_syscon_register(NCP_SYSCON_LRSTDISABLE, mask);
-
-	udelay(64);
-
 exit_pm_L2_logical_powerup:
 
 	return rval;
diff --git a/arch/arm/mach-axxia/lsi_power_management.h b/arch/arm/mach-axxia/lsi_power_management.h
index 4de6bd7..ef70af3 100644
--- a/arch/arm/mach-axxia/lsi_power_management.h
+++ b/arch/arm/mach-axxia/lsi_power_management.h
@@ -150,12 +150,19 @@
 #define		PORESET_CLUSTER2		(0x40000)
 #define		PORESET_CLUSTER3		(0x80000)
 
+/* IPI Masks */
+#define		IPI0_MASK				(0x1111)
+#define		IPI1_MASK				(0x2222)
+#define		IPI2_MASK				(0x4444)
+#define 	IPI3_MASK				(0x8888)
+
 /* SYSCON KEY Value */
 #define VALID_KEY_VALUE			(0xAB)
 
 #define MAX_NUM_CLUSTERS    (4)
 #define CORES_PER_CLUSTER   (4)
 #define MAX_IPI				(19)
+#define MAX_CPUS			(MAX_NUM_CLUSTERS * CORES_PER_CLUSTER)
 
 typedef struct {
 	u32 cpu;
@@ -174,9 +181,9 @@ bool pm_cpu_last_of_cluster(u32 cpu);
 void pm_dump_dickens(void);
 void pm_init_cpu(u32 cpu);
 void pm_cpu_logical_powerup(void);
+void pm_cluster_logical_powerup(void);
 bool pm_cpu_active(u32 cpu);
 void pm_init_syscon(void);
-
 extern bool pm_in_progress[];
 extern bool cluster_power_up[];
 extern u32 pm_cpu_powered_down;
diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c
index 5781a3c..2804fce 100644
--- a/arch/arm/mach-axxia/platsmp.c
+++ b/arch/arm/mach-axxia/platsmp.c
@@ -24,11 +24,13 @@
 
 #include "axxia.h"
 #include "lsi_power_management.h"
+#include "axxia_circular_queue.h"
 #include <mach/axxia-gic.h>
 
 extern void axxia_secondary_startup(void);
 extern void axxia_cpu_power_management_gic_control(u32 cpu, bool enable);
 extern void axxia_dist_power_management_gic_control(bool enable);
+extern struct circular_queue_t axxia_circ_q;
 
 #define SYSCON_PHYS_ADDR 0x002010030000ULL
 
@@ -62,7 +64,7 @@ static void __init check_fixup_sev(void __iomem *syscon)
 	pr_info("axxia: Cross-cluster SEV fixup: %s\n", wfe_fixup ? "yes" : "no");
 }
 
-static void __cpuinit do_fixup_sev(void)
+static void  do_fixup_sev(void)
 {
 	u32 tmp;
 
@@ -79,7 +81,7 @@ static void __cpuinit do_fixup_sev(void)
  * observers, irrespective of whether they're taking part in coherency
  * or not.  This is necessary for the hotplug code to work reliably.
  */
-static void __cpuinit write_pen_release(int val)
+static void  write_pen_release(int val)
 {
 	pen_release = val;
 	smp_wmb();
@@ -89,7 +91,7 @@ static void __cpuinit write_pen_release(int val)
 
 static DEFINE_RAW_SPINLOCK(boot_lock);
 
-void __cpuinit axxia_secondary_init(unsigned int cpu)
+void  axxia_secondary_init(unsigned int cpu)
 {
 	int phys_cpu;
 	int phys_cluster;
@@ -100,7 +102,7 @@ void __cpuinit axxia_secondary_init(unsigned int cpu)
 	/*
 	 * Only execute this when powering up a cpu for hotplug.
 	 */
-	if (!pm_in_progress[cpu]) {
+	if (!pm_in_progress[phys_cpu]) {
 		/* Fixup for cross-cluster SEV */
 		do_fixup_sev();
 
@@ -108,16 +110,16 @@ void __cpuinit axxia_secondary_init(unsigned int cpu)
 	} else {
 
 #ifdef CONFIG_HOTPLUG_CPU_COMPLETE_POWER_DOWN
+		if (cluster_power_up[phys_cluster])
+			pm_cluster_logical_powerup();
 		pm_cpu_logical_powerup();
-		mdelay(16);
 #endif
-
+		get_cpu();
 		axxia_gic_secondary_init();
+		put_cpu();
 
 #ifdef CONFIG_HOTPLUG_CPU_COMPLETE_POWER_DOWN
-		pm_cpu_logical_powerup();
-		if (cluster_power_up[phys_cluster])
-			cluster_power_up[phys_cluster] = false;
+		cluster_power_up[phys_cluster] = false;
 		pm_in_progress[phys_cpu] = false;
 #endif
 	}
@@ -135,13 +137,14 @@ void __cpuinit axxia_secondary_init(unsigned int cpu)
 	_raw_spin_unlock(&boot_lock);
 }
 
-int __cpuinit axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
+int  axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 
 	int phys_cpu, cluster;
 	unsigned long timeout;
 	unsigned long powered_down_cpu;
-	int rVal = 0;
+	u32 i;
+	u32 dummy;
 
 
 	/*
@@ -156,12 +159,7 @@ int __cpuinit axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
 	if (powered_down_cpu & (1 << phys_cpu)) {
 		pm_in_progress[phys_cpu] = true;
-
-		rVal = pm_cpu_powerup(phys_cpu);
-		if (rVal) {
-			_raw_spin_unlock(&boot_lock);
-			return rVal;
-		}
+		pm_cpu_powerup(phys_cpu);
 	}
 
 	/*
@@ -200,7 +198,9 @@ int __cpuinit axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
 		if (pen_release == -1)
 			break;
 
-		udelay(10);
+		/* Wait 10 cycles */
+		for (i = 0; i < 10; i++)
+			dummy = i;
 	}
 
 	/*
-- 
1.8.1.4



More information about the linux-yocto mailing list