1 /*
2  *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
3  *
4  *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <linux/atomic.h>
11 #include <linux/completion.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/spinlock.h>
15 #include <linux/sched.h>
16 #include <linux/interrupt.h>
17 #include <linux/cache.h>
18 #include <linux/profile.h>
19 #include <linux/errno.h>
20 #include <linux/mm.h>
21 #include <linux/err.h>
22 #include <linux/cpu.h>
23 #include <linux/smp.h>
24 #include <linux/seq_file.h>
25 #include <linux/irq.h>
26 #include <linux/bootmem.h>
27 
28 #include <asm/cacheflush.h>
29 #include <asm/cachepart.h>
30 #include <asm/core_reg.h>
31 #include <asm/cpu.h>
32 #include <asm/global_lock.h>
33 #include <asm/metag_mem.h>
34 #include <asm/mmu_context.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/processor.h>
38 #include <asm/setup.h>
39 #include <asm/tlbflush.h>
40 #include <asm/hwthread.h>
41 #include <asm/traps.h>
42 
43 #define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
44 #define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
45 
46 DECLARE_PER_CPU(PTBI, pTBI);
47 
48 void *secondary_data_stack;
49 
50 /*
51  * structures for inter-processor calls
52  * - A collection of single bit ipi messages.
53  */
54 struct ipi_data {
55 	spinlock_t lock;
56 	unsigned long ipi_count;
57 	unsigned long bits;
58 };
59 
60 static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
61 	.lock	= __SPIN_LOCK_UNLOCKED(ipi_data.lock),
62 };
63 
64 static DEFINE_SPINLOCK(boot_lock);
65 
66 static DECLARE_COMPLETION(cpu_running);
67 
68 /*
69  * "thread" is assumed to be a valid Meta hardware thread ID.
70  */
boot_secondary(unsigned int thread,struct task_struct * idle)71 static int boot_secondary(unsigned int thread, struct task_struct *idle)
72 {
73 	u32 val;
74 
75 	/*
76 	 * set synchronisation state between this boot processor
77 	 * and the secondary one
78 	 */
79 	spin_lock(&boot_lock);
80 
81 	core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
82 	core_reg_write(TXUPC_ID, 1, thread, 0);
83 
84 	/*
85 	 * Give the thread privilege (PSTAT) and clear potentially problematic
86 	 * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
87 	 */
88 	core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
89 
90 	/* Clear the minim enable bit. */
91 	val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
92 	core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
93 
94 	/*
95 	 * set the ThreadEnable bit (0x1) in the TXENABLE register
96 	 * for the specified thread - off it goes!
97 	 */
98 	val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
99 	core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
100 
101 	/*
102 	 * now the secondary core is starting up let it run its
103 	 * calibrations, then wait for it to finish
104 	 */
105 	spin_unlock(&boot_lock);
106 
107 	return 0;
108 }
109 
110 /**
111  * describe_cachepart_change: describe a change to cache partitions.
112  * @thread:	Hardware thread number.
113  * @label:	Label of cache type, e.g. "dcache" or "icache".
114  * @sz:		Total size of the cache.
115  * @old:	Old cache partition configuration (*CPART* register).
116  * @new:	New cache partition configuration (*CPART* register).
117  *
118  * If the cache partition has changed, prints a message to the log describing
119  * those changes.
120  */
describe_cachepart_change(unsigned int thread,const char * label,unsigned int sz,unsigned int old,unsigned int new)121 static void describe_cachepart_change(unsigned int thread, const char *label,
122 				      unsigned int sz, unsigned int old,
123 				      unsigned int new)
124 {
125 	unsigned int lor1, land1, gor1, gand1;
126 	unsigned int lor2, land2, gor2, gand2;
127 	unsigned int diff = old ^ new;
128 
129 	if (!diff)
130 		return;
131 
132 	pr_info("Thread %d: %s partition changed:", thread, label);
133 	if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
134 		lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
135 		lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
136 		land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
137 		land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
138 		pr_cont(" L:%#x+%#x->%#x+%#x",
139 			(lor1 * sz) >> 4,
140 			((land1 + 1) * sz) >> 4,
141 			(lor2 * sz) >> 4,
142 			((land2 + 1) * sz) >> 4);
143 	}
144 	if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
145 		gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
146 		gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
147 		gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
148 		gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
149 		pr_cont(" G:%#x+%#x->%#x+%#x",
150 			(gor1 * sz) >> 4,
151 			((gand1 + 1) * sz) >> 4,
152 			(gor2 * sz) >> 4,
153 			((gand2 + 1) * sz) >> 4);
154 	}
155 	if (diff & SYSC_CWRMODE_BIT)
156 		pr_cont(" %sWR",
157 			(new & SYSC_CWRMODE_BIT) ? "+" : "-");
158 	if (diff & SYSC_DCPART_GCON_BIT)
159 		pr_cont(" %sGCOn",
160 			(new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
161 	pr_cont("\n");
162 }
163 
164 /**
165  * setup_smp_cache: ensure cache coherency for new SMP thread.
166  * @thread:	New hardware thread number.
167  *
168  * Ensures that coherency is enabled and that the threads share the same cache
169  * partitions.
170  */
setup_smp_cache(unsigned int thread)171 static void setup_smp_cache(unsigned int thread)
172 {
173 	unsigned int this_thread, lflags;
174 	unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
175 	unsigned int icsz, icpart_old, icpart_new;
176 
177 	/*
178 	 * Copy over the current thread's cache partition configuration to the
179 	 * new thread so that they share cache partitions.
180 	 */
181 	__global_lock2(lflags);
182 	this_thread = hard_processor_id();
183 	/* Share dcache partition */
184 	dcpart_this = metag_in32(SYSC_DCPART(this_thread));
185 	dcpart_old = metag_in32(SYSC_DCPART(thread));
186 	dcpart_new = dcpart_this;
187 #if PAGE_OFFSET < LINGLOBAL_BASE
188 	/*
189 	 * For the local data cache to be coherent the threads must also have
190 	 * GCOn enabled.
191 	 */
192 	dcpart_new |= SYSC_DCPART_GCON_BIT;
193 	metag_out32(dcpart_new, SYSC_DCPART(this_thread));
194 #endif
195 	metag_out32(dcpart_new, SYSC_DCPART(thread));
196 	/* Share icache partition too */
197 	icpart_new = metag_in32(SYSC_ICPART(this_thread));
198 	icpart_old = metag_in32(SYSC_ICPART(thread));
199 	metag_out32(icpart_new, SYSC_ICPART(thread));
200 	__global_unlock2(lflags);
201 
202 	/*
203 	 * Log if the cache partitions were altered so the user is aware of any
204 	 * potential unintentional cache wastage.
205 	 */
206 	dcsz = get_dcache_size();
207 	icsz = get_dcache_size();
208 	describe_cachepart_change(this_thread, "dcache", dcsz,
209 				  dcpart_this, dcpart_new);
210 	describe_cachepart_change(thread, "dcache", dcsz,
211 				  dcpart_old, dcpart_new);
212 	describe_cachepart_change(thread, "icache", icsz,
213 				  icpart_old, icpart_new);
214 }
215 
__cpu_up(unsigned int cpu,struct task_struct * idle)216 int __cpu_up(unsigned int cpu, struct task_struct *idle)
217 {
218 	unsigned int thread = cpu_2_hwthread_id[cpu];
219 	int ret;
220 
221 	load_pgd(swapper_pg_dir, thread);
222 
223 	flush_tlb_all();
224 
225 	setup_smp_cache(thread);
226 
227 	/*
228 	 * Tell the secondary CPU where to find its idle thread's stack.
229 	 */
230 	secondary_data_stack = task_stack_page(idle);
231 
232 	wmb();
233 
234 	/*
235 	 * Now bring the CPU into our world.
236 	 */
237 	ret = boot_secondary(thread, idle);
238 	if (ret == 0) {
239 		/*
240 		 * CPU was successfully started, wait for it
241 		 * to come online or time out.
242 		 */
243 		wait_for_completion_timeout(&cpu_running,
244 					    msecs_to_jiffies(1000));
245 
246 		if (!cpu_online(cpu))
247 			ret = -EIO;
248 	}
249 
250 	secondary_data_stack = NULL;
251 
252 	if (ret) {
253 		pr_crit("CPU%u: processor failed to boot\n", cpu);
254 
255 		/*
256 		 * FIXME: We need to clean up the new idle thread. --rmk
257 		 */
258 	}
259 
260 	return ret;
261 }
262 
263 #ifdef CONFIG_HOTPLUG_CPU
264 
265 /*
266  * __cpu_disable runs on the processor to be shutdown.
267  */
__cpu_disable(void)268 int __cpu_disable(void)
269 {
270 	unsigned int cpu = smp_processor_id();
271 
272 	/*
273 	 * Take this CPU offline.  Once we clear this, we can't return,
274 	 * and we must not schedule until we're ready to give up the cpu.
275 	 */
276 	set_cpu_online(cpu, false);
277 
278 	/*
279 	 * OK - migrate IRQs away from this CPU
280 	 */
281 	migrate_irqs();
282 
283 	/*
284 	 * Flush user cache and TLB mappings, and then remove this CPU
285 	 * from the vm mask set of all processes.
286 	 */
287 	flush_cache_all();
288 	local_flush_tlb_all();
289 
290 	clear_tasks_mm_cpumask(cpu);
291 
292 	return 0;
293 }
294 
295 /*
296  * called on the thread which is asking for a CPU to be shutdown -
297  * waits until shutdown has completed, or it is timed out.
298  */
__cpu_die(unsigned int cpu)299 void __cpu_die(unsigned int cpu)
300 {
301 	if (!cpu_wait_death(cpu, 1))
302 		pr_err("CPU%u: unable to kill\n", cpu);
303 }
304 
305 /*
306  * Called from the idle thread for the CPU which has been shutdown.
307  *
308  * Note that we do not return from this function. If this cpu is
309  * brought online again it will need to run secondary_startup().
310  */
cpu_die(void)311 void cpu_die(void)
312 {
313 	local_irq_disable();
314 	idle_task_exit();
315 
316 	(void)cpu_report_death();
317 
318 	asm ("XOR	TXENABLE, D0Re0,D0Re0\n");
319 }
320 #endif /* CONFIG_HOTPLUG_CPU */
321 
322 /*
323  * Called by both boot and secondaries to move global data into
324  * per-processor storage.
325  */
smp_store_cpu_info(unsigned int cpuid)326 void smp_store_cpu_info(unsigned int cpuid)
327 {
328 	struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
329 
330 	cpu_info->loops_per_jiffy = loops_per_jiffy;
331 }
332 
333 /*
334  * This is the secondary CPU boot entry.  We're using this CPUs
335  * idle thread stack and the global page tables.
336  */
secondary_start_kernel(void)337 asmlinkage void secondary_start_kernel(void)
338 {
339 	struct mm_struct *mm = &init_mm;
340 	unsigned int cpu = smp_processor_id();
341 
342 	/*
343 	 * All kernel threads share the same mm context; grab a
344 	 * reference and switch to it.
345 	 */
346 	atomic_inc(&mm->mm_users);
347 	atomic_inc(&mm->mm_count);
348 	current->active_mm = mm;
349 	cpumask_set_cpu(cpu, mm_cpumask(mm));
350 	enter_lazy_tlb(mm, current);
351 	local_flush_tlb_all();
352 
353 	/*
354 	 * TODO: Some day it might be useful for each Linux CPU to
355 	 * have its own TBI structure. That would allow each Linux CPU
356 	 * to run different interrupt handlers for the same IRQ
357 	 * number.
358 	 *
359 	 * For now, simply copying the pointer to the boot CPU's TBI
360 	 * structure is sufficient because we always want to run the
361 	 * same interrupt handler whatever CPU takes the interrupt.
362 	 */
363 	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
364 
365 	if (!per_cpu(pTBI, cpu))
366 		panic("No TBI found!");
367 
368 	per_cpu_trap_init(cpu);
369 
370 	preempt_disable();
371 
372 	setup_priv();
373 
374 	notify_cpu_starting(cpu);
375 
376 	pr_info("CPU%u (thread %u): Booted secondary processor\n",
377 		cpu, cpu_2_hwthread_id[cpu]);
378 
379 	calibrate_delay();
380 	smp_store_cpu_info(cpu);
381 
382 	/*
383 	 * OK, now it's safe to let the boot CPU continue
384 	 */
385 	set_cpu_online(cpu, true);
386 	complete(&cpu_running);
387 
388 	/*
389 	 * Enable local interrupts.
390 	 */
391 	tbi_startup_interrupt(TBID_SIGNUM_TRT);
392 	local_irq_enable();
393 
394 	/*
395 	 * OK, it's off to the idle thread for us
396 	 */
397 	cpu_startup_entry(CPUHP_ONLINE);
398 }
399 
smp_cpus_done(unsigned int max_cpus)400 void __init smp_cpus_done(unsigned int max_cpus)
401 {
402 	int cpu;
403 	unsigned long bogosum = 0;
404 
405 	for_each_online_cpu(cpu)
406 		bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
407 
408 	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
409 		num_online_cpus(),
410 		bogosum / (500000/HZ),
411 		(bogosum / (5000/HZ)) % 100);
412 }
413 
smp_prepare_cpus(unsigned int max_cpus)414 void __init smp_prepare_cpus(unsigned int max_cpus)
415 {
416 	unsigned int cpu = smp_processor_id();
417 
418 	init_new_context(current, &init_mm);
419 	current_thread_info()->cpu = cpu;
420 
421 	smp_store_cpu_info(cpu);
422 	init_cpu_present(cpu_possible_mask);
423 }
424 
smp_prepare_boot_cpu(void)425 void __init smp_prepare_boot_cpu(void)
426 {
427 	unsigned int cpu = smp_processor_id();
428 
429 	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
430 
431 	if (!per_cpu(pTBI, cpu))
432 		panic("No TBI found!");
433 }
434 
435 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
436 
send_ipi_message(const struct cpumask * mask,enum ipi_msg_type msg)437 static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
438 {
439 	unsigned long flags;
440 	unsigned int cpu;
441 	cpumask_t map;
442 
443 	cpumask_clear(&map);
444 	local_irq_save(flags);
445 
446 	for_each_cpu(cpu, mask) {
447 		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
448 
449 		spin_lock(&ipi->lock);
450 
451 		/*
452 		 * KICK interrupts are queued in hardware so we'll get
453 		 * multiple interrupts if we call smp_cross_call()
454 		 * multiple times for one msg. The problem is that we
455 		 * only have one bit for each message - we can't queue
456 		 * them in software.
457 		 *
458 		 * The first time through ipi_handler() we'll clear
459 		 * the msg bit, having done all the work. But when we
460 		 * return we'll get _another_ interrupt (and another,
461 		 * and another until we've handled all the queued
462 		 * KICKs). Running ipi_handler() when there's no work
463 		 * to do is bad because that's how kick handler
464 		 * chaining detects who the KICK was intended for.
465 		 * See arch/metag/kernel/kick.c for more details.
466 		 *
467 		 * So only add 'cpu' to 'map' if we haven't already
468 		 * queued a KICK interrupt for 'msg'.
469 		 */
470 		if (!(ipi->bits & (1 << msg))) {
471 			ipi->bits |= 1 << msg;
472 			cpumask_set_cpu(cpu, &map);
473 		}
474 
475 		spin_unlock(&ipi->lock);
476 	}
477 
478 	/*
479 	 * Call the platform specific cross-CPU call function.
480 	 */
481 	smp_cross_call(map, msg);
482 
483 	local_irq_restore(flags);
484 }
485 
arch_send_call_function_ipi_mask(const struct cpumask * mask)486 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
487 {
488 	send_ipi_message(mask, IPI_CALL_FUNC);
489 }
490 
arch_send_call_function_single_ipi(int cpu)491 void arch_send_call_function_single_ipi(int cpu)
492 {
493 	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
494 }
495 
show_ipi_list(struct seq_file * p)496 void show_ipi_list(struct seq_file *p)
497 {
498 	unsigned int cpu;
499 
500 	seq_puts(p, "IPI:");
501 
502 	for_each_present_cpu(cpu)
503 		seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
504 
505 	seq_putc(p, '\n');
506 }
507 
508 static DEFINE_SPINLOCK(stop_lock);
509 
510 /*
511  * Main handler for inter-processor interrupts
512  *
513  * For Meta, the ipimask now only identifies a single
514  * category of IPI (Bit 1 IPIs have been replaced by a
515  * different mechanism):
516  *
517  *  Bit 0 - Inter-processor function call
518  */
do_IPI(void)519 static int do_IPI(void)
520 {
521 	unsigned int cpu = smp_processor_id();
522 	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
523 	unsigned long msgs, nextmsg;
524 	int handled = 0;
525 
526 	ipi->ipi_count++;
527 
528 	spin_lock(&ipi->lock);
529 	msgs = ipi->bits;
530 	nextmsg = msgs & -msgs;
531 	ipi->bits &= ~nextmsg;
532 	spin_unlock(&ipi->lock);
533 
534 	if (nextmsg) {
535 		handled = 1;
536 
537 		nextmsg = ffz(~nextmsg);
538 		switch (nextmsg) {
539 		case IPI_RESCHEDULE:
540 			scheduler_ipi();
541 			break;
542 
543 		case IPI_CALL_FUNC:
544 			generic_smp_call_function_interrupt();
545 			break;
546 
547 		default:
548 			pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
549 				cpu, nextmsg);
550 			break;
551 		}
552 	}
553 
554 	return handled;
555 }
556 
smp_send_reschedule(int cpu)557 void smp_send_reschedule(int cpu)
558 {
559 	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
560 }
561 
stop_this_cpu(void * data)562 static void stop_this_cpu(void *data)
563 {
564 	unsigned int cpu = smp_processor_id();
565 
566 	if (system_state == SYSTEM_BOOTING ||
567 	    system_state == SYSTEM_RUNNING) {
568 		spin_lock(&stop_lock);
569 		pr_crit("CPU%u: stopping\n", cpu);
570 		dump_stack();
571 		spin_unlock(&stop_lock);
572 	}
573 
574 	set_cpu_online(cpu, false);
575 
576 	local_irq_disable();
577 
578 	hard_processor_halt(HALT_OK);
579 }
580 
smp_send_stop(void)581 void smp_send_stop(void)
582 {
583 	smp_call_function(stop_this_cpu, NULL, 0);
584 }
585 
586 /*
587  * not supported here
588  */
setup_profiling_timer(unsigned int multiplier)589 int setup_profiling_timer(unsigned int multiplier)
590 {
591 	return -EINVAL;
592 }
593 
594 /*
595  * We use KICKs for inter-processor interrupts.
596  *
597  * For every CPU in "callmap" the IPI data must already have been
598  * stored in that CPU's "ipi_data" member prior to calling this
599  * function.
600  */
kick_raise_softirq(cpumask_t callmap,unsigned int irq)601 static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
602 {
603 	int cpu;
604 
605 	for_each_cpu(cpu, &callmap) {
606 		unsigned int thread;
607 
608 		thread = cpu_2_hwthread_id[cpu];
609 
610 		BUG_ON(thread == BAD_HWTHREAD_ID);
611 
612 		metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
613 	}
614 }
615 
ipi_handler(TBIRES State,int SigNum,int Triggers,int Inst,PTBI pTBI,int * handled)616 static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
617 		   int Inst, PTBI pTBI, int *handled)
618 {
619 	*handled = do_IPI();
620 
621 	return State;
622 }
623 
624 static struct kick_irq_handler ipi_irq = {
625 	.func = ipi_handler,
626 };
627 
smp_cross_call(cpumask_t callmap,enum ipi_msg_type msg)628 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
629 {
630 	kick_raise_softirq(callmap, 1);
631 }
632 
get_core_count(void)633 static inline unsigned int get_core_count(void)
634 {
635 	int i;
636 	unsigned int ret = 0;
637 
638 	for (i = 0; i < CONFIG_NR_CPUS; i++) {
639 		if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
640 			ret++;
641 	}
642 
643 	return ret;
644 }
645 
646 /*
647  * Initialise the CPU possible map early - this describes the CPUs
648  * which may be present or become present in the system.
649  */
smp_init_cpus(void)650 void __init smp_init_cpus(void)
651 {
652 	unsigned int i, ncores = get_core_count();
653 
654 	/* If no hwthread_map early param was set use default mapping */
655 	for (i = 0; i < NR_CPUS; i++)
656 		if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
657 			cpu_2_hwthread_id[i] = i;
658 			hwthread_id_2_cpu[i] = i;
659 		}
660 
661 	for (i = 0; i < ncores; i++)
662 		set_cpu_possible(i, true);
663 
664 	kick_register_func(&ipi_irq);
665 }
666