1/*
2 * linux/kernel/time/tick-broadcast.c
3 *
4 * This file contains functions which emulate a local clock-event
5 * device via a broadcast event source.
6 *
7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10 *
11 * This code is licenced under the GPL version 2. For details see
12 * kernel-base/COPYING.
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/percpu.h>
19#include <linux/profile.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/module.h>
23
24#include "tick-internal.h"
25
26/*
27 * Broadcast support for broken x86 hardware, where the local apic
28 * timer stops in C3 state.
29 */
30
31static struct tick_device tick_broadcast_device;
32static cpumask_var_t tick_broadcast_mask;
33static cpumask_var_t tick_broadcast_on;
34static cpumask_var_t tmpmask;
35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36static int tick_broadcast_forced;
37
38#ifdef CONFIG_TICK_ONESHOT
39static void tick_broadcast_clear_oneshot(int cpu);
40static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
41#else
42static inline void tick_broadcast_clear_oneshot(int cpu) { }
43static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
44#endif
45
46/*
47 * Debugging: see timer_list.c
48 */
49struct tick_device *tick_get_broadcast_device(void)
50{
51	return &tick_broadcast_device;
52}
53
54struct cpumask *tick_get_broadcast_mask(void)
55{
56	return tick_broadcast_mask;
57}
58
59/*
60 * Start the device in periodic mode
61 */
62static void tick_broadcast_start_periodic(struct clock_event_device *bc)
63{
64	if (bc)
65		tick_setup_periodic(bc, 1);
66}
67
68/*
69 * Check, if the device can be utilized as broadcast device:
70 */
71static bool tick_check_broadcast_device(struct clock_event_device *curdev,
72					struct clock_event_device *newdev)
73{
74	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
75	    (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
76	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
77		return false;
78
79	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
80	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
81		return false;
82
83	return !curdev || newdev->rating > curdev->rating;
84}
85
86/*
87 * Conditionally install/replace broadcast device
88 */
89void tick_install_broadcast_device(struct clock_event_device *dev)
90{
91	struct clock_event_device *cur = tick_broadcast_device.evtdev;
92
93	if (!tick_check_broadcast_device(cur, dev))
94		return;
95
96	if (!try_module_get(dev->owner))
97		return;
98
99	clockevents_exchange_device(cur, dev);
100	if (cur)
101		cur->event_handler = clockevents_handle_noop;
102	tick_broadcast_device.evtdev = dev;
103	if (!cpumask_empty(tick_broadcast_mask))
104		tick_broadcast_start_periodic(dev);
105	/*
106	 * Inform all cpus about this. We might be in a situation
107	 * where we did not switch to oneshot mode because the per cpu
108	 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
109	 * of a oneshot capable broadcast device. Without that
110	 * notification the systems stays stuck in periodic mode
111	 * forever.
112	 */
113	if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
114		tick_clock_notify();
115}
116
117/*
118 * Check, if the device is the broadcast device
119 */
120int tick_is_broadcast_device(struct clock_event_device *dev)
121{
122	return (dev && tick_broadcast_device.evtdev == dev);
123}
124
125int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
126{
127	int ret = -ENODEV;
128
129	if (tick_is_broadcast_device(dev)) {
130		raw_spin_lock(&tick_broadcast_lock);
131		ret = __clockevents_update_freq(dev, freq);
132		raw_spin_unlock(&tick_broadcast_lock);
133	}
134	return ret;
135}
136
137
138static void err_broadcast(const struct cpumask *mask)
139{
140	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
141}
142
143static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
144{
145	if (!dev->broadcast)
146		dev->broadcast = tick_broadcast;
147	if (!dev->broadcast) {
148		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
149			     dev->name);
150		dev->broadcast = err_broadcast;
151	}
152}
153
154/*
155 * Check, if the device is disfunctional and a place holder, which
156 * needs to be handled by the broadcast device.
157 */
158int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
159{
160	struct clock_event_device *bc = tick_broadcast_device.evtdev;
161	unsigned long flags;
162	int ret;
163
164	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
165
166	/*
167	 * Devices might be registered with both periodic and oneshot
168	 * mode disabled. This signals, that the device needs to be
169	 * operated from the broadcast device and is a placeholder for
170	 * the cpu local device.
171	 */
172	if (!tick_device_is_functional(dev)) {
173		dev->event_handler = tick_handle_periodic;
174		tick_device_setup_broadcast_func(dev);
175		cpumask_set_cpu(cpu, tick_broadcast_mask);
176		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
177			tick_broadcast_start_periodic(bc);
178		else
179			tick_broadcast_setup_oneshot(bc);
180		ret = 1;
181	} else {
182		/*
183		 * Clear the broadcast bit for this cpu if the
184		 * device is not power state affected.
185		 */
186		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
187			cpumask_clear_cpu(cpu, tick_broadcast_mask);
188		else
189			tick_device_setup_broadcast_func(dev);
190
191		/*
192		 * Clear the broadcast bit if the CPU is not in
193		 * periodic broadcast on state.
194		 */
195		if (!cpumask_test_cpu(cpu, tick_broadcast_on))
196			cpumask_clear_cpu(cpu, tick_broadcast_mask);
197
198		switch (tick_broadcast_device.mode) {
199		case TICKDEV_MODE_ONESHOT:
200			/*
201			 * If the system is in oneshot mode we can
202			 * unconditionally clear the oneshot mask bit,
203			 * because the CPU is running and therefore
204			 * not in an idle state which causes the power
205			 * state affected device to stop. Let the
206			 * caller initialize the device.
207			 */
208			tick_broadcast_clear_oneshot(cpu);
209			ret = 0;
210			break;
211
212		case TICKDEV_MODE_PERIODIC:
213			/*
214			 * If the system is in periodic mode, check
215			 * whether the broadcast device can be
216			 * switched off now.
217			 */
218			if (cpumask_empty(tick_broadcast_mask) && bc)
219				clockevents_shutdown(bc);
220			/*
221			 * If we kept the cpu in the broadcast mask,
222			 * tell the caller to leave the per cpu device
223			 * in shutdown state. The periodic interrupt
224			 * is delivered by the broadcast device.
225			 */
226			ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
227			break;
228		default:
229			/* Nothing to do */
230			ret = 0;
231			break;
232		}
233	}
234	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
235	return ret;
236}
237
238#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
239int tick_receive_broadcast(void)
240{
241	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
242	struct clock_event_device *evt = td->evtdev;
243
244	if (!evt)
245		return -ENODEV;
246
247	if (!evt->event_handler)
248		return -EINVAL;
249
250	evt->event_handler(evt);
251	return 0;
252}
253#endif
254
255/*
256 * Broadcast the event to the cpus, which are set in the mask (mangled).
257 */
258static void tick_do_broadcast(struct cpumask *mask)
259{
260	int cpu = smp_processor_id();
261	struct tick_device *td;
262
263	/*
264	 * Check, if the current cpu is in the mask
265	 */
266	if (cpumask_test_cpu(cpu, mask)) {
267		cpumask_clear_cpu(cpu, mask);
268		td = &per_cpu(tick_cpu_device, cpu);
269		td->evtdev->event_handler(td->evtdev);
270	}
271
272	if (!cpumask_empty(mask)) {
273		/*
274		 * It might be necessary to actually check whether the devices
275		 * have different broadcast functions. For now, just use the
276		 * one of the first device. This works as long as we have this
277		 * misfeature only on x86 (lapic)
278		 */
279		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
280		td->evtdev->broadcast(mask);
281	}
282}
283
284/*
285 * Periodic broadcast:
286 * - invoke the broadcast handlers
287 */
288static void tick_do_periodic_broadcast(void)
289{
290	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
291	tick_do_broadcast(tmpmask);
292}
293
294/*
295 * Event handler for periodic broadcast ticks
296 */
297static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
298{
299	ktime_t next;
300
301	raw_spin_lock(&tick_broadcast_lock);
302
303	tick_do_periodic_broadcast();
304
305	/*
306	 * The device is in periodic mode. No reprogramming necessary:
307	 */
308	if (dev->state == CLOCK_EVT_STATE_PERIODIC)
309		goto unlock;
310
311	/*
312	 * Setup the next period for devices, which do not have
313	 * periodic mode. We read dev->next_event first and add to it
314	 * when the event already expired. clockevents_program_event()
315	 * sets dev->next_event only when the event is really
316	 * programmed to the device.
317	 */
318	for (next = dev->next_event; ;) {
319		next = ktime_add(next, tick_period);
320
321		if (!clockevents_program_event(dev, next, false))
322			goto unlock;
323		tick_do_periodic_broadcast();
324	}
325unlock:
326	raw_spin_unlock(&tick_broadcast_lock);
327}
328
329/**
330 * tick_broadcast_control - Enable/disable or force broadcast mode
331 * @mode:	The selected broadcast mode
332 *
333 * Called when the system enters a state where affected tick devices
334 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
335 *
336 * Called with interrupts disabled, so clockevents_lock is not
337 * required here because the local clock event device cannot go away
338 * under us.
339 */
340void tick_broadcast_control(enum tick_broadcast_mode mode)
341{
342	struct clock_event_device *bc, *dev;
343	struct tick_device *td;
344	int cpu, bc_stopped;
345
346	td = this_cpu_ptr(&tick_cpu_device);
347	dev = td->evtdev;
348
349	/*
350	 * Is the device not affected by the powerstate ?
351	 */
352	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
353		return;
354
355	if (!tick_device_is_functional(dev))
356		return;
357
358	raw_spin_lock(&tick_broadcast_lock);
359	cpu = smp_processor_id();
360	bc = tick_broadcast_device.evtdev;
361	bc_stopped = cpumask_empty(tick_broadcast_mask);
362
363	switch (mode) {
364	case TICK_BROADCAST_FORCE:
365		tick_broadcast_forced = 1;
366	case TICK_BROADCAST_ON:
367		cpumask_set_cpu(cpu, tick_broadcast_on);
368		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
369			if (tick_broadcast_device.mode ==
370			    TICKDEV_MODE_PERIODIC)
371				clockevents_shutdown(dev);
372		}
373		break;
374
375	case TICK_BROADCAST_OFF:
376		if (tick_broadcast_forced)
377			break;
378		cpumask_clear_cpu(cpu, tick_broadcast_on);
379		if (!tick_device_is_functional(dev))
380			break;
381		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
382			if (tick_broadcast_device.mode ==
383			    TICKDEV_MODE_PERIODIC)
384				tick_setup_periodic(dev, 0);
385		}
386		break;
387	}
388
389	if (cpumask_empty(tick_broadcast_mask)) {
390		if (!bc_stopped)
391			clockevents_shutdown(bc);
392	} else if (bc_stopped) {
393		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
394			tick_broadcast_start_periodic(bc);
395		else
396			tick_broadcast_setup_oneshot(bc);
397	}
398	raw_spin_unlock(&tick_broadcast_lock);
399}
400EXPORT_SYMBOL_GPL(tick_broadcast_control);
401
402/*
403 * Set the periodic handler depending on broadcast on/off
404 */
405void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
406{
407	if (!broadcast)
408		dev->event_handler = tick_handle_periodic;
409	else
410		dev->event_handler = tick_handle_periodic_broadcast;
411}
412
413#ifdef CONFIG_HOTPLUG_CPU
414/*
415 * Remove a CPU from broadcasting
416 */
417void tick_shutdown_broadcast(unsigned int cpu)
418{
419	struct clock_event_device *bc;
420	unsigned long flags;
421
422	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
423
424	bc = tick_broadcast_device.evtdev;
425	cpumask_clear_cpu(cpu, tick_broadcast_mask);
426	cpumask_clear_cpu(cpu, tick_broadcast_on);
427
428	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
429		if (bc && cpumask_empty(tick_broadcast_mask))
430			clockevents_shutdown(bc);
431	}
432
433	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
434}
435#endif
436
437void tick_suspend_broadcast(void)
438{
439	struct clock_event_device *bc;
440	unsigned long flags;
441
442	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
443
444	bc = tick_broadcast_device.evtdev;
445	if (bc)
446		clockevents_shutdown(bc);
447
448	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
449}
450
451/*
452 * This is called from tick_resume_local() on a resuming CPU. That's
453 * called from the core resume function, tick_unfreeze() and the magic XEN
454 * resume hackery.
455 *
456 * In none of these cases the broadcast device mode can change and the
457 * bit of the resuming CPU in the broadcast mask is safe as well.
458 */
459bool tick_resume_check_broadcast(void)
460{
461	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
462		return false;
463	else
464		return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
465}
466
467void tick_resume_broadcast(void)
468{
469	struct clock_event_device *bc;
470	unsigned long flags;
471
472	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
473
474	bc = tick_broadcast_device.evtdev;
475
476	if (bc) {
477		clockevents_tick_resume(bc);
478
479		switch (tick_broadcast_device.mode) {
480		case TICKDEV_MODE_PERIODIC:
481			if (!cpumask_empty(tick_broadcast_mask))
482				tick_broadcast_start_periodic(bc);
483			break;
484		case TICKDEV_MODE_ONESHOT:
485			if (!cpumask_empty(tick_broadcast_mask))
486				tick_resume_broadcast_oneshot(bc);
487			break;
488		}
489	}
490	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
491}
492
493#ifdef CONFIG_TICK_ONESHOT
494
495static cpumask_var_t tick_broadcast_oneshot_mask;
496static cpumask_var_t tick_broadcast_pending_mask;
497static cpumask_var_t tick_broadcast_force_mask;
498
499/*
500 * Exposed for debugging: see timer_list.c
501 */
502struct cpumask *tick_get_broadcast_oneshot_mask(void)
503{
504	return tick_broadcast_oneshot_mask;
505}
506
507/*
508 * Called before going idle with interrupts disabled. Checks whether a
509 * broadcast event from the other core is about to happen. We detected
510 * that in tick_broadcast_oneshot_control(). The callsite can use this
511 * to avoid a deep idle transition as we are about to get the
512 * broadcast IPI right away.
513 */
514int tick_check_broadcast_expired(void)
515{
516	return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
517}
518
519/*
520 * Set broadcast interrupt affinity
521 */
522static void tick_broadcast_set_affinity(struct clock_event_device *bc,
523					const struct cpumask *cpumask)
524{
525	if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
526		return;
527
528	if (cpumask_equal(bc->cpumask, cpumask))
529		return;
530
531	bc->cpumask = cpumask;
532	irq_set_affinity(bc->irq, bc->cpumask);
533}
534
535static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
536				    ktime_t expires, int force)
537{
538	int ret;
539
540	if (bc->state != CLOCK_EVT_STATE_ONESHOT)
541		clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
542
543	ret = clockevents_program_event(bc, expires, force);
544	if (!ret)
545		tick_broadcast_set_affinity(bc, cpumask_of(cpu));
546	return ret;
547}
548
549static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
550{
551	clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
552}
553
554/*
555 * Called from irq_enter() when idle was interrupted to reenable the
556 * per cpu device.
557 */
558void tick_check_oneshot_broadcast_this_cpu(void)
559{
560	if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
561		struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
562
563		/*
564		 * We might be in the middle of switching over from
565		 * periodic to oneshot. If the CPU has not yet
566		 * switched over, leave the device alone.
567		 */
568		if (td->mode == TICKDEV_MODE_ONESHOT) {
569			clockevents_set_state(td->evtdev,
570					      CLOCK_EVT_STATE_ONESHOT);
571		}
572	}
573}
574
575/*
576 * Handle oneshot mode broadcasting
577 */
578static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
579{
580	struct tick_device *td;
581	ktime_t now, next_event;
582	int cpu, next_cpu = 0;
583
584	raw_spin_lock(&tick_broadcast_lock);
585again:
586	dev->next_event.tv64 = KTIME_MAX;
587	next_event.tv64 = KTIME_MAX;
588	cpumask_clear(tmpmask);
589	now = ktime_get();
590	/* Find all expired events */
591	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
592		td = &per_cpu(tick_cpu_device, cpu);
593		if (td->evtdev->next_event.tv64 <= now.tv64) {
594			cpumask_set_cpu(cpu, tmpmask);
595			/*
596			 * Mark the remote cpu in the pending mask, so
597			 * it can avoid reprogramming the cpu local
598			 * timer in tick_broadcast_oneshot_control().
599			 */
600			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
601		} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
602			next_event.tv64 = td->evtdev->next_event.tv64;
603			next_cpu = cpu;
604		}
605	}
606
607	/*
608	 * Remove the current cpu from the pending mask. The event is
609	 * delivered immediately in tick_do_broadcast() !
610	 */
611	cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
612
613	/* Take care of enforced broadcast requests */
614	cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
615	cpumask_clear(tick_broadcast_force_mask);
616
617	/*
618	 * Sanity check. Catch the case where we try to broadcast to
619	 * offline cpus.
620	 */
621	if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
622		cpumask_and(tmpmask, tmpmask, cpu_online_mask);
623
624	/*
625	 * Wakeup the cpus which have an expired event.
626	 */
627	tick_do_broadcast(tmpmask);
628
629	/*
630	 * Two reasons for reprogram:
631	 *
632	 * - The global event did not expire any CPU local
633	 * events. This happens in dyntick mode, as the maximum PIT
634	 * delta is quite small.
635	 *
636	 * - There are pending events on sleeping CPUs which were not
637	 * in the event mask
638	 */
639	if (next_event.tv64 != KTIME_MAX) {
640		/*
641		 * Rearm the broadcast device. If event expired,
642		 * repeat the above
643		 */
644		if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
645			goto again;
646	}
647	raw_spin_unlock(&tick_broadcast_lock);
648}
649
650static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
651{
652	if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
653		return 0;
654	if (bc->next_event.tv64 == KTIME_MAX)
655		return 0;
656	return bc->bound_on == cpu ? -EBUSY : 0;
657}
658
659static void broadcast_shutdown_local(struct clock_event_device *bc,
660				     struct clock_event_device *dev)
661{
662	/*
663	 * For hrtimer based broadcasting we cannot shutdown the cpu
664	 * local device if our own event is the first one to expire or
665	 * if we own the broadcast timer.
666	 */
667	if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
668		if (broadcast_needs_cpu(bc, smp_processor_id()))
669			return;
670		if (dev->next_event.tv64 < bc->next_event.tv64)
671			return;
672	}
673	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
674}
675
676/**
677 * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
678 * @state:	The target state (enter/exit)
679 *
680 * The system enters/leaves a state, where affected devices might stop
681 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
682 *
683 * Called with interrupts disabled, so clockevents_lock is not
684 * required here because the local clock event device cannot go away
685 * under us.
686 */
687int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
688{
689	struct clock_event_device *bc, *dev;
690	struct tick_device *td;
691	int cpu, ret = 0;
692	ktime_t now;
693
694	/*
695	 * Periodic mode does not care about the enter/exit of power
696	 * states
697	 */
698	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
699		return 0;
700
701	/*
702	 * We are called with preemtion disabled from the depth of the
703	 * idle code, so we can't be moved away.
704	 */
705	td = this_cpu_ptr(&tick_cpu_device);
706	dev = td->evtdev;
707
708	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
709		return 0;
710
711	raw_spin_lock(&tick_broadcast_lock);
712	bc = tick_broadcast_device.evtdev;
713	cpu = smp_processor_id();
714
715	if (state == TICK_BROADCAST_ENTER) {
716		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
717			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
718			broadcast_shutdown_local(bc, dev);
719			/*
720			 * We only reprogram the broadcast timer if we
721			 * did not mark ourself in the force mask and
722			 * if the cpu local event is earlier than the
723			 * broadcast event. If the current CPU is in
724			 * the force mask, then we are going to be
725			 * woken by the IPI right away.
726			 */
727			if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
728			    dev->next_event.tv64 < bc->next_event.tv64)
729				tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
730		}
731		/*
732		 * If the current CPU owns the hrtimer broadcast
733		 * mechanism, it cannot go deep idle and we remove the
734		 * CPU from the broadcast mask. We don't have to go
735		 * through the EXIT path as the local timer is not
736		 * shutdown.
737		 */
738		ret = broadcast_needs_cpu(bc, cpu);
739		if (ret)
740			cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
741	} else {
742		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
743			clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
744			/*
745			 * The cpu which was handling the broadcast
746			 * timer marked this cpu in the broadcast
747			 * pending mask and fired the broadcast
748			 * IPI. So we are going to handle the expired
749			 * event anyway via the broadcast IPI
750			 * handler. No need to reprogram the timer
751			 * with an already expired event.
752			 */
753			if (cpumask_test_and_clear_cpu(cpu,
754				       tick_broadcast_pending_mask))
755				goto out;
756
757			/*
758			 * Bail out if there is no next event.
759			 */
760			if (dev->next_event.tv64 == KTIME_MAX)
761				goto out;
762			/*
763			 * If the pending bit is not set, then we are
764			 * either the CPU handling the broadcast
765			 * interrupt or we got woken by something else.
766			 *
767			 * We are not longer in the broadcast mask, so
768			 * if the cpu local expiry time is already
769			 * reached, we would reprogram the cpu local
770			 * timer with an already expired event.
771			 *
772			 * This can lead to a ping-pong when we return
773			 * to idle and therefor rearm the broadcast
774			 * timer before the cpu local timer was able
775			 * to fire. This happens because the forced
776			 * reprogramming makes sure that the event
777			 * will happen in the future and depending on
778			 * the min_delta setting this might be far
779			 * enough out that the ping-pong starts.
780			 *
781			 * If the cpu local next_event has expired
782			 * then we know that the broadcast timer
783			 * next_event has expired as well and
784			 * broadcast is about to be handled. So we
785			 * avoid reprogramming and enforce that the
786			 * broadcast handler, which did not run yet,
787			 * will invoke the cpu local handler.
788			 *
789			 * We cannot call the handler directly from
790			 * here, because we might be in a NOHZ phase
791			 * and we did not go through the irq_enter()
792			 * nohz fixups.
793			 */
794			now = ktime_get();
795			if (dev->next_event.tv64 <= now.tv64) {
796				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
797				goto out;
798			}
799			/*
800			 * We got woken by something else. Reprogram
801			 * the cpu local timer device.
802			 */
803			tick_program_event(dev->next_event, 1);
804		}
805	}
806out:
807	raw_spin_unlock(&tick_broadcast_lock);
808	return ret;
809}
810EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
811
812/*
813 * Reset the one shot broadcast for a cpu
814 *
815 * Called with tick_broadcast_lock held
816 */
817static void tick_broadcast_clear_oneshot(int cpu)
818{
819	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
820	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
821}
822
823static void tick_broadcast_init_next_event(struct cpumask *mask,
824					   ktime_t expires)
825{
826	struct tick_device *td;
827	int cpu;
828
829	for_each_cpu(cpu, mask) {
830		td = &per_cpu(tick_cpu_device, cpu);
831		if (td->evtdev)
832			td->evtdev->next_event = expires;
833	}
834}
835
836/**
837 * tick_broadcast_setup_oneshot - setup the broadcast device
838 */
839void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
840{
841	int cpu = smp_processor_id();
842
843	/* Set it up only once ! */
844	if (bc->event_handler != tick_handle_oneshot_broadcast) {
845		int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
846
847		bc->event_handler = tick_handle_oneshot_broadcast;
848
849		/*
850		 * We must be careful here. There might be other CPUs
851		 * waiting for periodic broadcast. We need to set the
852		 * oneshot_mask bits for those and program the
853		 * broadcast device to fire.
854		 */
855		cpumask_copy(tmpmask, tick_broadcast_mask);
856		cpumask_clear_cpu(cpu, tmpmask);
857		cpumask_or(tick_broadcast_oneshot_mask,
858			   tick_broadcast_oneshot_mask, tmpmask);
859
860		if (was_periodic && !cpumask_empty(tmpmask)) {
861			clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
862			tick_broadcast_init_next_event(tmpmask,
863						       tick_next_period);
864			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
865		} else
866			bc->next_event.tv64 = KTIME_MAX;
867	} else {
868		/*
869		 * The first cpu which switches to oneshot mode sets
870		 * the bit for all other cpus which are in the general
871		 * (periodic) broadcast mask. So the bit is set and
872		 * would prevent the first broadcast enter after this
873		 * to program the bc device.
874		 */
875		tick_broadcast_clear_oneshot(cpu);
876	}
877}
878
879/*
880 * Select oneshot operating mode for the broadcast device
881 */
882void tick_broadcast_switch_to_oneshot(void)
883{
884	struct clock_event_device *bc;
885	unsigned long flags;
886
887	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
888
889	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
890	bc = tick_broadcast_device.evtdev;
891	if (bc)
892		tick_broadcast_setup_oneshot(bc);
893
894	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
895}
896
897#ifdef CONFIG_HOTPLUG_CPU
898void hotplug_cpu__broadcast_tick_pull(int deadcpu)
899{
900	struct clock_event_device *bc;
901	unsigned long flags;
902
903	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
904	bc = tick_broadcast_device.evtdev;
905
906	if (bc && broadcast_needs_cpu(bc, deadcpu)) {
907		/* This moves the broadcast assignment to this CPU: */
908		clockevents_program_event(bc, bc->next_event, 1);
909	}
910	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
911}
912
913/*
914 * Remove a dead CPU from broadcasting
915 */
916void tick_shutdown_broadcast_oneshot(unsigned int cpu)
917{
918	unsigned long flags;
919
920	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
921
922	/*
923	 * Clear the broadcast masks for the dead cpu, but do not stop
924	 * the broadcast device!
925	 */
926	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
927	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
928	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
929
930	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
931}
932#endif
933
934/*
935 * Check, whether the broadcast device is in one shot mode
936 */
937int tick_broadcast_oneshot_active(void)
938{
939	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
940}
941
942/*
943 * Check whether the broadcast device supports oneshot.
944 */
945bool tick_broadcast_oneshot_available(void)
946{
947	struct clock_event_device *bc = tick_broadcast_device.evtdev;
948
949	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
950}
951
952#endif
953
954void __init tick_broadcast_init(void)
955{
956	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
957	zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
958	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
959#ifdef CONFIG_TICK_ONESHOT
960	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
961	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
962	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
963#endif
964}
965