1/*
2 * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
5 *	Moved from arch/x86/kernel/apic/io_apic.c.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/interrupt.h>
12#include <linux/init.h>
13#include <linux/compiler.h>
14#include <linux/irqdomain.h>
15#include <linux/slab.h>
16#include <asm/hw_irq.h>
17#include <asm/apic.h>
18#include <asm/i8259.h>
19#include <asm/desc.h>
20#include <asm/irq_remapping.h>
21
22static DEFINE_RAW_SPINLOCK(vector_lock);
23
24void lock_vector_lock(void)
25{
26	/* Used to the online set of cpus does not change
27	 * during assign_irq_vector.
28	 */
29	raw_spin_lock(&vector_lock);
30}
31
32void unlock_vector_lock(void)
33{
34	raw_spin_unlock(&vector_lock);
35}
36
37struct irq_cfg *irq_cfg(unsigned int irq)
38{
39	return irq_get_chip_data(irq);
40}
41
42struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
43{
44	return irq_data->chip_data;
45}
46
47static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
48{
49	struct irq_cfg *cfg;
50
51	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
52	if (!cfg)
53		return NULL;
54	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
55		goto out_cfg;
56	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
57		goto out_domain;
58#ifdef	CONFIG_X86_IO_APIC
59	INIT_LIST_HEAD(&cfg->irq_2_pin);
60#endif
61	return cfg;
62out_domain:
63	free_cpumask_var(cfg->domain);
64out_cfg:
65	kfree(cfg);
66	return NULL;
67}
68
69struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
70{
71	int res = irq_alloc_desc_at(at, node);
72	struct irq_cfg *cfg;
73
74	if (res < 0) {
75		if (res != -EEXIST)
76			return NULL;
77		cfg = irq_cfg(at);
78		if (cfg)
79			return cfg;
80	}
81
82	cfg = alloc_irq_cfg(at, node);
83	if (cfg)
84		irq_set_chip_data(at, cfg);
85	else
86		irq_free_desc(at);
87	return cfg;
88}
89
90static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
91{
92	if (!cfg)
93		return;
94	irq_set_chip_data(at, NULL);
95	free_cpumask_var(cfg->domain);
96	free_cpumask_var(cfg->old_domain);
97	kfree(cfg);
98}
99
100static int
101__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
102{
103	/*
104	 * NOTE! The local APIC isn't very good at handling
105	 * multiple interrupts at the same interrupt level.
106	 * As the interrupt level is determined by taking the
107	 * vector number and shifting that right by 4, we
108	 * want to spread these out a bit so that they don't
109	 * all fall in the same interrupt level.
110	 *
111	 * Also, we've got to be careful not to trash gate
112	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
113	 */
114	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
115	static int current_offset = VECTOR_OFFSET_START % 16;
116	int cpu, err;
117	cpumask_var_t tmp_mask;
118
119	if (cfg->move_in_progress)
120		return -EBUSY;
121
122	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
123		return -ENOMEM;
124
125	/* Only try and allocate irqs on cpus that are present */
126	err = -ENOSPC;
127	cpumask_clear(cfg->old_domain);
128	cpu = cpumask_first_and(mask, cpu_online_mask);
129	while (cpu < nr_cpu_ids) {
130		int new_cpu, vector, offset;
131
132		apic->vector_allocation_domain(cpu, tmp_mask, mask);
133
134		if (cpumask_subset(tmp_mask, cfg->domain)) {
135			err = 0;
136			if (cpumask_equal(tmp_mask, cfg->domain))
137				break;
138			/*
139			 * New cpumask using the vector is a proper subset of
140			 * the current in use mask. So cleanup the vector
141			 * allocation for the members that are not used anymore.
142			 */
143			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
144			cfg->move_in_progress =
145			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
146			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
147			break;
148		}
149
150		vector = current_vector;
151		offset = current_offset;
152next:
153		vector += 16;
154		if (vector >= first_system_vector) {
155			offset = (offset + 1) % 16;
156			vector = FIRST_EXTERNAL_VECTOR + offset;
157		}
158
159		if (unlikely(current_vector == vector)) {
160			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
161			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
162			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
163			continue;
164		}
165
166		if (test_bit(vector, used_vectors))
167			goto next;
168
169		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
170			if (per_cpu(vector_irq, new_cpu)[vector] >
171			    VECTOR_UNDEFINED)
172				goto next;
173		}
174		/* Found one! */
175		current_vector = vector;
176		current_offset = offset;
177		if (cfg->vector) {
178			cpumask_copy(cfg->old_domain, cfg->domain);
179			cfg->move_in_progress =
180			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
181		}
182		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
183			per_cpu(vector_irq, new_cpu)[vector] = irq;
184		cfg->vector = vector;
185		cpumask_copy(cfg->domain, tmp_mask);
186		err = 0;
187		break;
188	}
189	free_cpumask_var(tmp_mask);
190
191	return err;
192}
193
194int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
195{
196	int err;
197	unsigned long flags;
198
199	raw_spin_lock_irqsave(&vector_lock, flags);
200	err = __assign_irq_vector(irq, cfg, mask);
201	raw_spin_unlock_irqrestore(&vector_lock, flags);
202	return err;
203}
204
205void clear_irq_vector(int irq, struct irq_cfg *cfg)
206{
207	int cpu, vector;
208	unsigned long flags;
209
210	raw_spin_lock_irqsave(&vector_lock, flags);
211	BUG_ON(!cfg->vector);
212
213	vector = cfg->vector;
214	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
215		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
216
217	cfg->vector = 0;
218	cpumask_clear(cfg->domain);
219
220	if (likely(!cfg->move_in_progress)) {
221		raw_spin_unlock_irqrestore(&vector_lock, flags);
222		return;
223	}
224
225	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
226		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
227		     vector++) {
228			if (per_cpu(vector_irq, cpu)[vector] != irq)
229				continue;
230			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
231			break;
232		}
233	}
234	cfg->move_in_progress = 0;
235	raw_spin_unlock_irqrestore(&vector_lock, flags);
236}
237
238int __init arch_probe_nr_irqs(void)
239{
240	int nr;
241
242	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
243		nr_irqs = NR_VECTORS * nr_cpu_ids;
244
245	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
246#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
247	/*
248	 * for MSI and HT dyn irq
249	 */
250	if (gsi_top <= NR_IRQS_LEGACY)
251		nr +=  8 * nr_cpu_ids;
252	else
253		nr += gsi_top * 16;
254#endif
255	if (nr < nr_irqs)
256		nr_irqs = nr;
257
258	return nr_legacy_irqs();
259}
260
261int __init arch_early_irq_init(void)
262{
263	return arch_early_ioapic_init();
264}
265
266static void __setup_vector_irq(int cpu)
267{
268	/* Initialize vector_irq on a new cpu */
269	int irq, vector;
270	struct irq_cfg *cfg;
271
272	/*
273	 * vector_lock will make sure that we don't run into irq vector
274	 * assignments that might be happening on another cpu in parallel,
275	 * while we setup our initial vector to irq mappings.
276	 */
277	raw_spin_lock(&vector_lock);
278	/* Mark the inuse vectors */
279	for_each_active_irq(irq) {
280		cfg = irq_cfg(irq);
281		if (!cfg)
282			continue;
283
284		if (!cpumask_test_cpu(cpu, cfg->domain))
285			continue;
286		vector = cfg->vector;
287		per_cpu(vector_irq, cpu)[vector] = irq;
288	}
289	/* Mark the free vectors */
290	for (vector = 0; vector < NR_VECTORS; ++vector) {
291		irq = per_cpu(vector_irq, cpu)[vector];
292		if (irq <= VECTOR_UNDEFINED)
293			continue;
294
295		cfg = irq_cfg(irq);
296		if (!cpumask_test_cpu(cpu, cfg->domain))
297			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
298	}
299	raw_spin_unlock(&vector_lock);
300}
301
302/*
303 * Setup the vector to irq mappings.
304 */
305void setup_vector_irq(int cpu)
306{
307	int irq;
308
309	/*
310	 * On most of the platforms, legacy PIC delivers the interrupts on the
311	 * boot cpu. But there are certain platforms where PIC interrupts are
312	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
313	 * legacy PIC, for the new cpu that is coming online, setup the static
314	 * legacy vector to irq mapping:
315	 */
316	for (irq = 0; irq < nr_legacy_irqs(); irq++)
317		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
318
319	__setup_vector_irq(cpu);
320}
321
322int apic_retrigger_irq(struct irq_data *data)
323{
324	struct irq_cfg *cfg = irqd_cfg(data);
325	unsigned long flags;
326	int cpu;
327
328	raw_spin_lock_irqsave(&vector_lock, flags);
329	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
330	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
331	raw_spin_unlock_irqrestore(&vector_lock, flags);
332
333	return 1;
334}
335
336void apic_ack_edge(struct irq_data *data)
337{
338	irq_complete_move(irqd_cfg(data));
339	irq_move_irq(data);
340	ack_APIC_irq();
341}
342
343/*
344 * Either sets data->affinity to a valid value, and returns
345 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
346 * leaves data->affinity untouched.
347 */
348int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
349		      unsigned int *dest_id)
350{
351	struct irq_cfg *cfg = irqd_cfg(data);
352	unsigned int irq = data->irq;
353	int err;
354
355	if (!config_enabled(CONFIG_SMP))
356		return -EPERM;
357
358	if (!cpumask_intersects(mask, cpu_online_mask))
359		return -EINVAL;
360
361	err = assign_irq_vector(irq, cfg, mask);
362	if (err)
363		return err;
364
365	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
366	if (err) {
367		if (assign_irq_vector(irq, cfg, data->affinity))
368			pr_err("Failed to recover vector for irq %d\n", irq);
369		return err;
370	}
371
372	cpumask_copy(data->affinity, mask);
373
374	return 0;
375}
376
377#ifdef CONFIG_SMP
378void send_cleanup_vector(struct irq_cfg *cfg)
379{
380	cpumask_var_t cleanup_mask;
381
382	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
383		unsigned int i;
384
385		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
386			apic->send_IPI_mask(cpumask_of(i),
387					    IRQ_MOVE_CLEANUP_VECTOR);
388	} else {
389		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
390		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
391		free_cpumask_var(cleanup_mask);
392	}
393	cfg->move_in_progress = 0;
394}
395
396asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
397{
398	unsigned vector, me;
399
400	ack_APIC_irq();
401	irq_enter();
402	exit_idle();
403
404	me = smp_processor_id();
405	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
406		int irq;
407		unsigned int irr;
408		struct irq_desc *desc;
409		struct irq_cfg *cfg;
410
411		irq = __this_cpu_read(vector_irq[vector]);
412
413		if (irq <= VECTOR_UNDEFINED)
414			continue;
415
416		desc = irq_to_desc(irq);
417		if (!desc)
418			continue;
419
420		cfg = irq_cfg(irq);
421		if (!cfg)
422			continue;
423
424		raw_spin_lock(&desc->lock);
425
426		/*
427		 * Check if the irq migration is in progress. If so, we
428		 * haven't received the cleanup request yet for this irq.
429		 */
430		if (cfg->move_in_progress)
431			goto unlock;
432
433		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
434			goto unlock;
435
436		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
437		/*
438		 * Check if the vector that needs to be cleanedup is
439		 * registered at the cpu's IRR. If so, then this is not
440		 * the best time to clean it up. Lets clean it up in the
441		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
442		 * to myself.
443		 */
444		if (irr  & (1 << (vector % 32))) {
445			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
446			goto unlock;
447		}
448		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
449unlock:
450		raw_spin_unlock(&desc->lock);
451	}
452
453	irq_exit();
454}
455
456static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
457{
458	unsigned me;
459
460	if (likely(!cfg->move_in_progress))
461		return;
462
463	me = smp_processor_id();
464
465	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
466		send_cleanup_vector(cfg);
467}
468
469void irq_complete_move(struct irq_cfg *cfg)
470{
471	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
472}
473
474void irq_force_complete_move(int irq)
475{
476	struct irq_cfg *cfg = irq_cfg(irq);
477
478	if (!cfg)
479		return;
480
481	__irq_complete_move(cfg, cfg->vector);
482}
483#endif
484
485/*
486 * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
487 */
488int arch_setup_hwirq(unsigned int irq, int node)
489{
490	struct irq_cfg *cfg;
491	unsigned long flags;
492	int ret;
493
494	cfg = alloc_irq_cfg(irq, node);
495	if (!cfg)
496		return -ENOMEM;
497
498	raw_spin_lock_irqsave(&vector_lock, flags);
499	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
500	raw_spin_unlock_irqrestore(&vector_lock, flags);
501
502	if (!ret)
503		irq_set_chip_data(irq, cfg);
504	else
505		free_irq_cfg(irq, cfg);
506	return ret;
507}
508
509void arch_teardown_hwirq(unsigned int irq)
510{
511	struct irq_cfg *cfg = irq_cfg(irq);
512
513	free_remapped_irq(irq);
514	clear_irq_vector(irq, cfg);
515	free_irq_cfg(irq, cfg);
516}
517
518static void __init print_APIC_field(int base)
519{
520	int i;
521
522	printk(KERN_DEBUG);
523
524	for (i = 0; i < 8; i++)
525		pr_cont("%08x", apic_read(base + i*0x10));
526
527	pr_cont("\n");
528}
529
530static void __init print_local_APIC(void *dummy)
531{
532	unsigned int i, v, ver, maxlvt;
533	u64 icr;
534
535	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
536		 smp_processor_id(), hard_smp_processor_id());
537	v = apic_read(APIC_ID);
538	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
539	v = apic_read(APIC_LVR);
540	pr_info("... APIC VERSION: %08x\n", v);
541	ver = GET_APIC_VERSION(v);
542	maxlvt = lapic_get_maxlvt();
543
544	v = apic_read(APIC_TASKPRI);
545	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
546
547	/* !82489DX */
548	if (APIC_INTEGRATED(ver)) {
549		if (!APIC_XAPIC(ver)) {
550			v = apic_read(APIC_ARBPRI);
551			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
552				 v, v & APIC_ARBPRI_MASK);
553		}
554		v = apic_read(APIC_PROCPRI);
555		pr_debug("... APIC PROCPRI: %08x\n", v);
556	}
557
558	/*
559	 * Remote read supported only in the 82489DX and local APIC for
560	 * Pentium processors.
561	 */
562	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
563		v = apic_read(APIC_RRR);
564		pr_debug("... APIC RRR: %08x\n", v);
565	}
566
567	v = apic_read(APIC_LDR);
568	pr_debug("... APIC LDR: %08x\n", v);
569	if (!x2apic_enabled()) {
570		v = apic_read(APIC_DFR);
571		pr_debug("... APIC DFR: %08x\n", v);
572	}
573	v = apic_read(APIC_SPIV);
574	pr_debug("... APIC SPIV: %08x\n", v);
575
576	pr_debug("... APIC ISR field:\n");
577	print_APIC_field(APIC_ISR);
578	pr_debug("... APIC TMR field:\n");
579	print_APIC_field(APIC_TMR);
580	pr_debug("... APIC IRR field:\n");
581	print_APIC_field(APIC_IRR);
582
583	/* !82489DX */
584	if (APIC_INTEGRATED(ver)) {
585		/* Due to the Pentium erratum 3AP. */
586		if (maxlvt > 3)
587			apic_write(APIC_ESR, 0);
588
589		v = apic_read(APIC_ESR);
590		pr_debug("... APIC ESR: %08x\n", v);
591	}
592
593	icr = apic_icr_read();
594	pr_debug("... APIC ICR: %08x\n", (u32)icr);
595	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
596
597	v = apic_read(APIC_LVTT);
598	pr_debug("... APIC LVTT: %08x\n", v);
599
600	if (maxlvt > 3) {
601		/* PC is LVT#4. */
602		v = apic_read(APIC_LVTPC);
603		pr_debug("... APIC LVTPC: %08x\n", v);
604	}
605	v = apic_read(APIC_LVT0);
606	pr_debug("... APIC LVT0: %08x\n", v);
607	v = apic_read(APIC_LVT1);
608	pr_debug("... APIC LVT1: %08x\n", v);
609
610	if (maxlvt > 2) {
611		/* ERR is LVT#3. */
612		v = apic_read(APIC_LVTERR);
613		pr_debug("... APIC LVTERR: %08x\n", v);
614	}
615
616	v = apic_read(APIC_TMICT);
617	pr_debug("... APIC TMICT: %08x\n", v);
618	v = apic_read(APIC_TMCCT);
619	pr_debug("... APIC TMCCT: %08x\n", v);
620	v = apic_read(APIC_TDCR);
621	pr_debug("... APIC TDCR: %08x\n", v);
622
623	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
624		v = apic_read(APIC_EFEAT);
625		maxlvt = (v >> 16) & 0xff;
626		pr_debug("... APIC EFEAT: %08x\n", v);
627		v = apic_read(APIC_ECTRL);
628		pr_debug("... APIC ECTRL: %08x\n", v);
629		for (i = 0; i < maxlvt; i++) {
630			v = apic_read(APIC_EILVTn(i));
631			pr_debug("... APIC EILVT%d: %08x\n", i, v);
632		}
633	}
634	pr_cont("\n");
635}
636
637static void __init print_local_APICs(int maxcpu)
638{
639	int cpu;
640
641	if (!maxcpu)
642		return;
643
644	preempt_disable();
645	for_each_online_cpu(cpu) {
646		if (cpu >= maxcpu)
647			break;
648		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
649	}
650	preempt_enable();
651}
652
653static void __init print_PIC(void)
654{
655	unsigned int v;
656	unsigned long flags;
657
658	if (!nr_legacy_irqs())
659		return;
660
661	pr_debug("\nprinting PIC contents\n");
662
663	raw_spin_lock_irqsave(&i8259A_lock, flags);
664
665	v = inb(0xa1) << 8 | inb(0x21);
666	pr_debug("... PIC  IMR: %04x\n", v);
667
668	v = inb(0xa0) << 8 | inb(0x20);
669	pr_debug("... PIC  IRR: %04x\n", v);
670
671	outb(0x0b, 0xa0);
672	outb(0x0b, 0x20);
673	v = inb(0xa0) << 8 | inb(0x20);
674	outb(0x0a, 0xa0);
675	outb(0x0a, 0x20);
676
677	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
678
679	pr_debug("... PIC  ISR: %04x\n", v);
680
681	v = inb(0x4d1) << 8 | inb(0x4d0);
682	pr_debug("... PIC ELCR: %04x\n", v);
683}
684
685static int show_lapic __initdata = 1;
686static __init int setup_show_lapic(char *arg)
687{
688	int num = -1;
689
690	if (strcmp(arg, "all") == 0) {
691		show_lapic = CONFIG_NR_CPUS;
692	} else {
693		get_option(&arg, &num);
694		if (num >= 0)
695			show_lapic = num;
696	}
697
698	return 1;
699}
700__setup("show_lapic=", setup_show_lapic);
701
702static int __init print_ICs(void)
703{
704	if (apic_verbosity == APIC_QUIET)
705		return 0;
706
707	print_PIC();
708
709	/* don't print out if apic is not there */
710	if (!cpu_has_apic && !apic_from_smp_config())
711		return 0;
712
713	print_local_APICs(show_lapic);
714	print_IO_APICs();
715
716	return 0;
717}
718
719late_initcall(print_ICs);
720