1/*
2 *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
3 *
4 *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/atomic.h>
11#include <linux/completion.h>
12#include <linux/delay.h>
13#include <linux/init.h>
14#include <linux/spinlock.h>
15#include <linux/sched.h>
16#include <linux/interrupt.h>
17#include <linux/cache.h>
18#include <linux/profile.h>
19#include <linux/errno.h>
20#include <linux/mm.h>
21#include <linux/err.h>
22#include <linux/cpu.h>
23#include <linux/smp.h>
24#include <linux/seq_file.h>
25#include <linux/irq.h>
26#include <linux/bootmem.h>
27
28#include <asm/cacheflush.h>
29#include <asm/cachepart.h>
30#include <asm/core_reg.h>
31#include <asm/cpu.h>
32#include <asm/global_lock.h>
33#include <asm/metag_mem.h>
34#include <asm/mmu_context.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/processor.h>
38#include <asm/setup.h>
39#include <asm/tlbflush.h>
40#include <asm/hwthread.h>
41#include <asm/traps.h>
42
43#define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
44#define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
45
46DECLARE_PER_CPU(PTBI, pTBI);
47
48void *secondary_data_stack;
49
50/*
51 * structures for inter-processor calls
52 * - A collection of single bit ipi messages.
53 */
54struct ipi_data {
55	spinlock_t lock;
56	unsigned long ipi_count;
57	unsigned long bits;
58};
59
60static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
61	.lock	= __SPIN_LOCK_UNLOCKED(ipi_data.lock),
62};
63
64static DEFINE_SPINLOCK(boot_lock);
65
66static DECLARE_COMPLETION(cpu_running);
67
68/*
69 * "thread" is assumed to be a valid Meta hardware thread ID.
70 */
71static int boot_secondary(unsigned int thread, struct task_struct *idle)
72{
73	u32 val;
74
75	/*
76	 * set synchronisation state between this boot processor
77	 * and the secondary one
78	 */
79	spin_lock(&boot_lock);
80
81	core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
82	core_reg_write(TXUPC_ID, 1, thread, 0);
83
84	/*
85	 * Give the thread privilege (PSTAT) and clear potentially problematic
86	 * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
87	 */
88	core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
89
90	/* Clear the minim enable bit. */
91	val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
92	core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
93
94	/*
95	 * set the ThreadEnable bit (0x1) in the TXENABLE register
96	 * for the specified thread - off it goes!
97	 */
98	val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
99	core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
100
101	/*
102	 * now the secondary core is starting up let it run its
103	 * calibrations, then wait for it to finish
104	 */
105	spin_unlock(&boot_lock);
106
107	return 0;
108}
109
110/**
111 * describe_cachepart_change: describe a change to cache partitions.
112 * @thread:	Hardware thread number.
113 * @label:	Label of cache type, e.g. "dcache" or "icache".
114 * @sz:		Total size of the cache.
115 * @old:	Old cache partition configuration (*CPART* register).
116 * @new:	New cache partition configuration (*CPART* register).
117 *
118 * If the cache partition has changed, prints a message to the log describing
119 * those changes.
120 */
121static void describe_cachepart_change(unsigned int thread, const char *label,
122				      unsigned int sz, unsigned int old,
123				      unsigned int new)
124{
125	unsigned int lor1, land1, gor1, gand1;
126	unsigned int lor2, land2, gor2, gand2;
127	unsigned int diff = old ^ new;
128
129	if (!diff)
130		return;
131
132	pr_info("Thread %d: %s partition changed:", thread, label);
133	if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
134		lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
135		lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
136		land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
137		land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
138		pr_cont(" L:%#x+%#x->%#x+%#x",
139			(lor1 * sz) >> 4,
140			((land1 + 1) * sz) >> 4,
141			(lor2 * sz) >> 4,
142			((land2 + 1) * sz) >> 4);
143	}
144	if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
145		gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
146		gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
147		gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
148		gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
149		pr_cont(" G:%#x+%#x->%#x+%#x",
150			(gor1 * sz) >> 4,
151			((gand1 + 1) * sz) >> 4,
152			(gor2 * sz) >> 4,
153			((gand2 + 1) * sz) >> 4);
154	}
155	if (diff & SYSC_CWRMODE_BIT)
156		pr_cont(" %sWR",
157			(new & SYSC_CWRMODE_BIT) ? "+" : "-");
158	if (diff & SYSC_DCPART_GCON_BIT)
159		pr_cont(" %sGCOn",
160			(new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
161	pr_cont("\n");
162}
163
164/**
165 * setup_smp_cache: ensure cache coherency for new SMP thread.
166 * @thread:	New hardware thread number.
167 *
168 * Ensures that coherency is enabled and that the threads share the same cache
169 * partitions.
170 */
171static void setup_smp_cache(unsigned int thread)
172{
173	unsigned int this_thread, lflags;
174	unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
175	unsigned int icsz, icpart_old, icpart_new;
176
177	/*
178	 * Copy over the current thread's cache partition configuration to the
179	 * new thread so that they share cache partitions.
180	 */
181	__global_lock2(lflags);
182	this_thread = hard_processor_id();
183	/* Share dcache partition */
184	dcpart_this = metag_in32(SYSC_DCPART(this_thread));
185	dcpart_old = metag_in32(SYSC_DCPART(thread));
186	dcpart_new = dcpart_this;
187#if PAGE_OFFSET < LINGLOBAL_BASE
188	/*
189	 * For the local data cache to be coherent the threads must also have
190	 * GCOn enabled.
191	 */
192	dcpart_new |= SYSC_DCPART_GCON_BIT;
193	metag_out32(dcpart_new, SYSC_DCPART(this_thread));
194#endif
195	metag_out32(dcpart_new, SYSC_DCPART(thread));
196	/* Share icache partition too */
197	icpart_new = metag_in32(SYSC_ICPART(this_thread));
198	icpart_old = metag_in32(SYSC_ICPART(thread));
199	metag_out32(icpart_new, SYSC_ICPART(thread));
200	__global_unlock2(lflags);
201
202	/*
203	 * Log if the cache partitions were altered so the user is aware of any
204	 * potential unintentional cache wastage.
205	 */
206	dcsz = get_dcache_size();
207	icsz = get_dcache_size();
208	describe_cachepart_change(this_thread, "dcache", dcsz,
209				  dcpart_this, dcpart_new);
210	describe_cachepart_change(thread, "dcache", dcsz,
211				  dcpart_old, dcpart_new);
212	describe_cachepart_change(thread, "icache", icsz,
213				  icpart_old, icpart_new);
214}
215
216int __cpu_up(unsigned int cpu, struct task_struct *idle)
217{
218	unsigned int thread = cpu_2_hwthread_id[cpu];
219	int ret;
220
221	load_pgd(swapper_pg_dir, thread);
222
223	flush_tlb_all();
224
225	setup_smp_cache(thread);
226
227	/*
228	 * Tell the secondary CPU where to find its idle thread's stack.
229	 */
230	secondary_data_stack = task_stack_page(idle);
231
232	wmb();
233
234	/*
235	 * Now bring the CPU into our world.
236	 */
237	ret = boot_secondary(thread, idle);
238	if (ret == 0) {
239		/*
240		 * CPU was successfully started, wait for it
241		 * to come online or time out.
242		 */
243		wait_for_completion_timeout(&cpu_running,
244					    msecs_to_jiffies(1000));
245
246		if (!cpu_online(cpu))
247			ret = -EIO;
248	}
249
250	secondary_data_stack = NULL;
251
252	if (ret) {
253		pr_crit("CPU%u: processor failed to boot\n", cpu);
254
255		/*
256		 * FIXME: We need to clean up the new idle thread. --rmk
257		 */
258	}
259
260	return ret;
261}
262
263#ifdef CONFIG_HOTPLUG_CPU
264
265/*
266 * __cpu_disable runs on the processor to be shutdown.
267 */
268int __cpu_disable(void)
269{
270	unsigned int cpu = smp_processor_id();
271
272	/*
273	 * Take this CPU offline.  Once we clear this, we can't return,
274	 * and we must not schedule until we're ready to give up the cpu.
275	 */
276	set_cpu_online(cpu, false);
277
278	/*
279	 * OK - migrate IRQs away from this CPU
280	 */
281	migrate_irqs();
282
283	/*
284	 * Flush user cache and TLB mappings, and then remove this CPU
285	 * from the vm mask set of all processes.
286	 */
287	flush_cache_all();
288	local_flush_tlb_all();
289
290	clear_tasks_mm_cpumask(cpu);
291
292	return 0;
293}
294
295/*
296 * called on the thread which is asking for a CPU to be shutdown -
297 * waits until shutdown has completed, or it is timed out.
298 */
299void __cpu_die(unsigned int cpu)
300{
301	if (!cpu_wait_death(cpu, 1))
302		pr_err("CPU%u: unable to kill\n", cpu);
303}
304
305/*
306 * Called from the idle thread for the CPU which has been shutdown.
307 *
308 * Note that we do not return from this function. If this cpu is
309 * brought online again it will need to run secondary_startup().
310 */
311void cpu_die(void)
312{
313	local_irq_disable();
314	idle_task_exit();
315
316	(void)cpu_report_death();
317
318	asm ("XOR	TXENABLE, D0Re0,D0Re0\n");
319}
320#endif /* CONFIG_HOTPLUG_CPU */
321
322/*
323 * Called by both boot and secondaries to move global data into
324 * per-processor storage.
325 */
326void smp_store_cpu_info(unsigned int cpuid)
327{
328	struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
329
330	cpu_info->loops_per_jiffy = loops_per_jiffy;
331}
332
333/*
334 * This is the secondary CPU boot entry.  We're using this CPUs
335 * idle thread stack and the global page tables.
336 */
337asmlinkage void secondary_start_kernel(void)
338{
339	struct mm_struct *mm = &init_mm;
340	unsigned int cpu = smp_processor_id();
341
342	/*
343	 * All kernel threads share the same mm context; grab a
344	 * reference and switch to it.
345	 */
346	atomic_inc(&mm->mm_users);
347	atomic_inc(&mm->mm_count);
348	current->active_mm = mm;
349	cpumask_set_cpu(cpu, mm_cpumask(mm));
350	enter_lazy_tlb(mm, current);
351	local_flush_tlb_all();
352
353	/*
354	 * TODO: Some day it might be useful for each Linux CPU to
355	 * have its own TBI structure. That would allow each Linux CPU
356	 * to run different interrupt handlers for the same IRQ
357	 * number.
358	 *
359	 * For now, simply copying the pointer to the boot CPU's TBI
360	 * structure is sufficient because we always want to run the
361	 * same interrupt handler whatever CPU takes the interrupt.
362	 */
363	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
364
365	if (!per_cpu(pTBI, cpu))
366		panic("No TBI found!");
367
368	per_cpu_trap_init(cpu);
369
370	preempt_disable();
371
372	setup_priv();
373
374	notify_cpu_starting(cpu);
375
376	pr_info("CPU%u (thread %u): Booted secondary processor\n",
377		cpu, cpu_2_hwthread_id[cpu]);
378
379	calibrate_delay();
380	smp_store_cpu_info(cpu);
381
382	/*
383	 * OK, now it's safe to let the boot CPU continue
384	 */
385	set_cpu_online(cpu, true);
386	complete(&cpu_running);
387
388	/*
389	 * Enable local interrupts.
390	 */
391	tbi_startup_interrupt(TBID_SIGNUM_TRT);
392	local_irq_enable();
393
394	/*
395	 * OK, it's off to the idle thread for us
396	 */
397	cpu_startup_entry(CPUHP_ONLINE);
398}
399
400void __init smp_cpus_done(unsigned int max_cpus)
401{
402	int cpu;
403	unsigned long bogosum = 0;
404
405	for_each_online_cpu(cpu)
406		bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
407
408	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
409		num_online_cpus(),
410		bogosum / (500000/HZ),
411		(bogosum / (5000/HZ)) % 100);
412}
413
414void __init smp_prepare_cpus(unsigned int max_cpus)
415{
416	unsigned int cpu = smp_processor_id();
417
418	init_new_context(current, &init_mm);
419	current_thread_info()->cpu = cpu;
420
421	smp_store_cpu_info(cpu);
422	init_cpu_present(cpu_possible_mask);
423}
424
425void __init smp_prepare_boot_cpu(void)
426{
427	unsigned int cpu = smp_processor_id();
428
429	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
430
431	if (!per_cpu(pTBI, cpu))
432		panic("No TBI found!");
433}
434
435static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
436
437static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
438{
439	unsigned long flags;
440	unsigned int cpu;
441	cpumask_t map;
442
443	cpumask_clear(&map);
444	local_irq_save(flags);
445
446	for_each_cpu(cpu, mask) {
447		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
448
449		spin_lock(&ipi->lock);
450
451		/*
452		 * KICK interrupts are queued in hardware so we'll get
453		 * multiple interrupts if we call smp_cross_call()
454		 * multiple times for one msg. The problem is that we
455		 * only have one bit for each message - we can't queue
456		 * them in software.
457		 *
458		 * The first time through ipi_handler() we'll clear
459		 * the msg bit, having done all the work. But when we
460		 * return we'll get _another_ interrupt (and another,
461		 * and another until we've handled all the queued
462		 * KICKs). Running ipi_handler() when there's no work
463		 * to do is bad because that's how kick handler
464		 * chaining detects who the KICK was intended for.
465		 * See arch/metag/kernel/kick.c for more details.
466		 *
467		 * So only add 'cpu' to 'map' if we haven't already
468		 * queued a KICK interrupt for 'msg'.
469		 */
470		if (!(ipi->bits & (1 << msg))) {
471			ipi->bits |= 1 << msg;
472			cpumask_set_cpu(cpu, &map);
473		}
474
475		spin_unlock(&ipi->lock);
476	}
477
478	/*
479	 * Call the platform specific cross-CPU call function.
480	 */
481	smp_cross_call(map, msg);
482
483	local_irq_restore(flags);
484}
485
486void arch_send_call_function_ipi_mask(const struct cpumask *mask)
487{
488	send_ipi_message(mask, IPI_CALL_FUNC);
489}
490
491void arch_send_call_function_single_ipi(int cpu)
492{
493	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
494}
495
496void show_ipi_list(struct seq_file *p)
497{
498	unsigned int cpu;
499
500	seq_puts(p, "IPI:");
501
502	for_each_present_cpu(cpu)
503		seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
504
505	seq_putc(p, '\n');
506}
507
508static DEFINE_SPINLOCK(stop_lock);
509
510/*
511 * Main handler for inter-processor interrupts
512 *
513 * For Meta, the ipimask now only identifies a single
514 * category of IPI (Bit 1 IPIs have been replaced by a
515 * different mechanism):
516 *
517 *  Bit 0 - Inter-processor function call
518 */
519static int do_IPI(void)
520{
521	unsigned int cpu = smp_processor_id();
522	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
523	unsigned long msgs, nextmsg;
524	int handled = 0;
525
526	ipi->ipi_count++;
527
528	spin_lock(&ipi->lock);
529	msgs = ipi->bits;
530	nextmsg = msgs & -msgs;
531	ipi->bits &= ~nextmsg;
532	spin_unlock(&ipi->lock);
533
534	if (nextmsg) {
535		handled = 1;
536
537		nextmsg = ffz(~nextmsg);
538		switch (nextmsg) {
539		case IPI_RESCHEDULE:
540			scheduler_ipi();
541			break;
542
543		case IPI_CALL_FUNC:
544			generic_smp_call_function_interrupt();
545			break;
546
547		default:
548			pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
549				cpu, nextmsg);
550			break;
551		}
552	}
553
554	return handled;
555}
556
557void smp_send_reschedule(int cpu)
558{
559	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
560}
561
562static void stop_this_cpu(void *data)
563{
564	unsigned int cpu = smp_processor_id();
565
566	if (system_state == SYSTEM_BOOTING ||
567	    system_state == SYSTEM_RUNNING) {
568		spin_lock(&stop_lock);
569		pr_crit("CPU%u: stopping\n", cpu);
570		dump_stack();
571		spin_unlock(&stop_lock);
572	}
573
574	set_cpu_online(cpu, false);
575
576	local_irq_disable();
577
578	hard_processor_halt(HALT_OK);
579}
580
581void smp_send_stop(void)
582{
583	smp_call_function(stop_this_cpu, NULL, 0);
584}
585
586/*
587 * not supported here
588 */
589int setup_profiling_timer(unsigned int multiplier)
590{
591	return -EINVAL;
592}
593
594/*
595 * We use KICKs for inter-processor interrupts.
596 *
597 * For every CPU in "callmap" the IPI data must already have been
598 * stored in that CPU's "ipi_data" member prior to calling this
599 * function.
600 */
601static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
602{
603	int cpu;
604
605	for_each_cpu(cpu, &callmap) {
606		unsigned int thread;
607
608		thread = cpu_2_hwthread_id[cpu];
609
610		BUG_ON(thread == BAD_HWTHREAD_ID);
611
612		metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
613	}
614}
615
616static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
617		   int Inst, PTBI pTBI, int *handled)
618{
619	*handled = do_IPI();
620
621	return State;
622}
623
624static struct kick_irq_handler ipi_irq = {
625	.func = ipi_handler,
626};
627
628static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
629{
630	kick_raise_softirq(callmap, 1);
631}
632
633static inline unsigned int get_core_count(void)
634{
635	int i;
636	unsigned int ret = 0;
637
638	for (i = 0; i < CONFIG_NR_CPUS; i++) {
639		if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
640			ret++;
641	}
642
643	return ret;
644}
645
646/*
647 * Initialise the CPU possible map early - this describes the CPUs
648 * which may be present or become present in the system.
649 */
650void __init smp_init_cpus(void)
651{
652	unsigned int i, ncores = get_core_count();
653
654	/* If no hwthread_map early param was set use default mapping */
655	for (i = 0; i < NR_CPUS; i++)
656		if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
657			cpu_2_hwthread_id[i] = i;
658			hwthread_id_2_cpu[i] = i;
659		}
660
661	for (i = 0; i < ncores; i++)
662		set_cpu_possible(i, true);
663
664	kick_register_func(&ipi_irq);
665}
666