This source file includes following definitions.
- posix_cputimers_group_init
- update_rlimit_cpu
- lookup_task
- __get_task_for_clock
- get_task_for_clock
- get_task_for_clock_get
- validate_clock_permissions
- bump_cpu_timer
- expiry_cache_is_inactive
- posix_cpu_clock_getres
- posix_cpu_clock_set
- cpu_clock_sample
- store_samples
- task_sample_cputime
- proc_sample_cputime_atomic
- __update_gt_cputime
- update_gt_cputime
- thread_group_sample_cputime
- thread_group_start_cputime
- __thread_group_cputime
- cpu_clock_sample_group
- posix_cpu_clock_get
- posix_cpu_timer_create
- posix_cpu_timer_del
- cleanup_timerqueue
- cleanup_timers
- posix_cpu_timers_exit
- posix_cpu_timers_exit_group
- arm_timer
- cpu_timer_fire
- posix_cpu_timer_set
- posix_cpu_timer_get
- collect_timerqueue
- collect_posix_cputimers
- check_dl_overrun
- check_rlimit
- check_thread_timers
- stop_process_timers
- check_cpu_itimer
- check_process_timers
- posix_cpu_timer_rearm
- task_cputimers_expired
- fastpath_timer_check
- run_posix_cpu_timers
- set_process_cpu_timer
- do_cpu_nanosleep
- posix_cpu_nsleep
- posix_cpu_nsleep_restart
- process_cpu_clock_getres
- process_cpu_clock_get
- process_cpu_timer_create
- process_cpu_nsleep
- thread_cpu_clock_getres
- thread_cpu_clock_get
- thread_cpu_timer_create
1
2
3
4
5
6 #include <linux/sched/signal.h>
7 #include <linux/sched/cputime.h>
8 #include <linux/posix-timers.h>
9 #include <linux/errno.h>
10 #include <linux/math64.h>
11 #include <linux/uaccess.h>
12 #include <linux/kernel_stat.h>
13 #include <trace/events/timer.h>
14 #include <linux/tick.h>
15 #include <linux/workqueue.h>
16 #include <linux/compat.h>
17 #include <linux/sched/deadline.h>
18
19 #include "posix-timers.h"
20
21 static void posix_cpu_timer_rearm(struct k_itimer *timer);
22
23 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
24 {
25 posix_cputimers_init(pct);
26 if (cpu_limit != RLIM_INFINITY) {
27 pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
28 pct->timers_active = true;
29 }
30 }
31
32
33
34
35
36
37
38 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
39 {
40 u64 nsecs = rlim_new * NSEC_PER_SEC;
41
42 spin_lock_irq(&task->sighand->siglock);
43 set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
44 spin_unlock_irq(&task->sighand->siglock);
45 }
46
47
48
49
50 static struct task_struct *lookup_task(const pid_t pid, bool thread,
51 bool gettime)
52 {
53 struct task_struct *p;
54
55
56
57
58
59 if (!pid)
60 return thread ? current : current->group_leader;
61
62 p = find_task_by_vpid(pid);
63 if (!p)
64 return p;
65
66 if (thread)
67 return same_thread_group(p, current) ? p : NULL;
68
69 if (gettime) {
70
71
72
73
74
75
76
77
78
79 return (p == current || thread_group_leader(p)) ? p : NULL;
80 }
81
82
83
84
85 return has_group_leader_pid(p) ? p : NULL;
86 }
87
88 static struct task_struct *__get_task_for_clock(const clockid_t clock,
89 bool getref, bool gettime)
90 {
91 const bool thread = !!CPUCLOCK_PERTHREAD(clock);
92 const pid_t pid = CPUCLOCK_PID(clock);
93 struct task_struct *p;
94
95 if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
96 return NULL;
97
98 rcu_read_lock();
99 p = lookup_task(pid, thread, gettime);
100 if (p && getref)
101 get_task_struct(p);
102 rcu_read_unlock();
103 return p;
104 }
105
106 static inline struct task_struct *get_task_for_clock(const clockid_t clock)
107 {
108 return __get_task_for_clock(clock, true, false);
109 }
110
111 static inline struct task_struct *get_task_for_clock_get(const clockid_t clock)
112 {
113 return __get_task_for_clock(clock, true, true);
114 }
115
116 static inline int validate_clock_permissions(const clockid_t clock)
117 {
118 return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL;
119 }
120
121
122
123
124
125 static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
126 {
127 u64 delta, incr, expires = timer->it.cpu.node.expires;
128 int i;
129
130 if (!timer->it_interval)
131 return expires;
132
133 if (now < expires)
134 return expires;
135
136 incr = timer->it_interval;
137 delta = now + incr - expires;
138
139
140 for (i = 0; incr < delta - incr; i++)
141 incr = incr << 1;
142
143 for (; i >= 0; incr >>= 1, i--) {
144 if (delta < incr)
145 continue;
146
147 timer->it.cpu.node.expires += incr;
148 timer->it_overrun += 1LL << i;
149 delta -= incr;
150 }
151 return timer->it.cpu.node.expires;
152 }
153
154
155 static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
156 {
157 return !(~pct->bases[CPUCLOCK_PROF].nextevt |
158 ~pct->bases[CPUCLOCK_VIRT].nextevt |
159 ~pct->bases[CPUCLOCK_SCHED].nextevt);
160 }
161
162 static int
163 posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
164 {
165 int error = validate_clock_permissions(which_clock);
166
167 if (!error) {
168 tp->tv_sec = 0;
169 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
170 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
171
172
173
174
175
176 tp->tv_nsec = 1;
177 }
178 }
179 return error;
180 }
181
182 static int
183 posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
184 {
185 int error = validate_clock_permissions(clock);
186
187
188
189
190
191 return error ? : -EPERM;
192 }
193
194
195
196
197 static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
198 {
199 u64 utime, stime;
200
201 if (clkid == CPUCLOCK_SCHED)
202 return task_sched_runtime(p);
203
204 task_cputime(p, &utime, &stime);
205
206 switch (clkid) {
207 case CPUCLOCK_PROF:
208 return utime + stime;
209 case CPUCLOCK_VIRT:
210 return utime;
211 default:
212 WARN_ON_ONCE(1);
213 }
214 return 0;
215 }
216
217 static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
218 {
219 samples[CPUCLOCK_PROF] = stime + utime;
220 samples[CPUCLOCK_VIRT] = utime;
221 samples[CPUCLOCK_SCHED] = rtime;
222 }
223
224 static void task_sample_cputime(struct task_struct *p, u64 *samples)
225 {
226 u64 stime, utime;
227
228 task_cputime(p, &utime, &stime);
229 store_samples(samples, stime, utime, p->se.sum_exec_runtime);
230 }
231
232 static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
233 u64 *samples)
234 {
235 u64 stime, utime, rtime;
236
237 utime = atomic64_read(&at->utime);
238 stime = atomic64_read(&at->stime);
239 rtime = atomic64_read(&at->sum_exec_runtime);
240 store_samples(samples, stime, utime, rtime);
241 }
242
243
244
245
246
247 static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
248 {
249 u64 curr_cputime;
250 retry:
251 curr_cputime = atomic64_read(cputime);
252 if (sum_cputime > curr_cputime) {
253 if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
254 goto retry;
255 }
256 }
257
258 static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
259 struct task_cputime *sum)
260 {
261 __update_gt_cputime(&cputime_atomic->utime, sum->utime);
262 __update_gt_cputime(&cputime_atomic->stime, sum->stime);
263 __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
264 }
265
266
267
268
269
270
271
272
273
274
275
276
277 void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
278 {
279 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
280 struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
281
282 WARN_ON_ONCE(!pct->timers_active);
283
284 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
285 }
286
287
288
289
290
291
292
293
294
295
296
297
298
299 static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
300 {
301 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
302 struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
303
304
305 if (!READ_ONCE(pct->timers_active)) {
306 struct task_cputime sum;
307
308
309
310
311
312
313 thread_group_cputime(tsk, &sum);
314 update_gt_cputime(&cputimer->cputime_atomic, &sum);
315
316
317
318
319
320
321
322
323 WRITE_ONCE(pct->timers_active, true);
324 }
325 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
326 }
327
328 static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
329 {
330 struct task_cputime ct;
331
332 thread_group_cputime(tsk, &ct);
333 store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
334 }
335
336
337
338
339
340
341
342
343 static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
344 bool start)
345 {
346 struct thread_group_cputimer *cputimer = &p->signal->cputimer;
347 struct posix_cputimers *pct = &p->signal->posix_cputimers;
348 u64 samples[CPUCLOCK_MAX];
349
350 if (!READ_ONCE(pct->timers_active)) {
351 if (start)
352 thread_group_start_cputime(p, samples);
353 else
354 __thread_group_cputime(p, samples);
355 } else {
356 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
357 }
358
359 return samples[clkid];
360 }
361
362 static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
363 {
364 const clockid_t clkid = CPUCLOCK_WHICH(clock);
365 struct task_struct *tsk;
366 u64 t;
367
368 tsk = get_task_for_clock_get(clock);
369 if (!tsk)
370 return -EINVAL;
371
372 if (CPUCLOCK_PERTHREAD(clock))
373 t = cpu_clock_sample(clkid, tsk);
374 else
375 t = cpu_clock_sample_group(clkid, tsk, false);
376 put_task_struct(tsk);
377
378 *tp = ns_to_timespec64(t);
379 return 0;
380 }
381
382
383
384
385
386
387 static int posix_cpu_timer_create(struct k_itimer *new_timer)
388 {
389 struct task_struct *p = get_task_for_clock(new_timer->it_clock);
390
391 if (!p)
392 return -EINVAL;
393
394 new_timer->kclock = &clock_posix_cpu;
395 timerqueue_init(&new_timer->it.cpu.node);
396 new_timer->it.cpu.task = p;
397 return 0;
398 }
399
400
401
402
403
404
405
406 static int posix_cpu_timer_del(struct k_itimer *timer)
407 {
408 struct cpu_timer *ctmr = &timer->it.cpu;
409 struct task_struct *p = ctmr->task;
410 struct sighand_struct *sighand;
411 unsigned long flags;
412 int ret = 0;
413
414 if (WARN_ON_ONCE(!p))
415 return -EINVAL;
416
417
418
419
420
421 sighand = lock_task_sighand(p, &flags);
422 if (unlikely(sighand == NULL)) {
423
424
425
426
427 WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
428 } else {
429 if (timer->it.cpu.firing)
430 ret = TIMER_RETRY;
431 else
432 cpu_timer_dequeue(ctmr);
433
434 unlock_task_sighand(p, &flags);
435 }
436
437 if (!ret)
438 put_task_struct(p);
439
440 return ret;
441 }
442
443 static void cleanup_timerqueue(struct timerqueue_head *head)
444 {
445 struct timerqueue_node *node;
446 struct cpu_timer *ctmr;
447
448 while ((node = timerqueue_getnext(head))) {
449 timerqueue_del(head, node);
450 ctmr = container_of(node, struct cpu_timer, node);
451 ctmr->head = NULL;
452 }
453 }
454
455
456
457
458
459
460
461
462 static void cleanup_timers(struct posix_cputimers *pct)
463 {
464 cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
465 cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
466 cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
467 }
468
469
470
471
472
473
474 void posix_cpu_timers_exit(struct task_struct *tsk)
475 {
476 cleanup_timers(&tsk->posix_cputimers);
477 }
478 void posix_cpu_timers_exit_group(struct task_struct *tsk)
479 {
480 cleanup_timers(&tsk->signal->posix_cputimers);
481 }
482
483
484
485
486
487 static void arm_timer(struct k_itimer *timer)
488 {
489 int clkidx = CPUCLOCK_WHICH(timer->it_clock);
490 struct cpu_timer *ctmr = &timer->it.cpu;
491 u64 newexp = cpu_timer_getexpires(ctmr);
492 struct task_struct *p = ctmr->task;
493 struct posix_cputimer_base *base;
494
495 if (CPUCLOCK_PERTHREAD(timer->it_clock))
496 base = p->posix_cputimers.bases + clkidx;
497 else
498 base = p->signal->posix_cputimers.bases + clkidx;
499
500 if (!cpu_timer_enqueue(&base->tqhead, ctmr))
501 return;
502
503
504
505
506
507
508
509 if (newexp < base->nextevt)
510 base->nextevt = newexp;
511
512 if (CPUCLOCK_PERTHREAD(timer->it_clock))
513 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
514 else
515 tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
516 }
517
518
519
520
521 static void cpu_timer_fire(struct k_itimer *timer)
522 {
523 struct cpu_timer *ctmr = &timer->it.cpu;
524
525 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
526
527
528
529 cpu_timer_setexpires(ctmr, 0);
530 } else if (unlikely(timer->sigq == NULL)) {
531
532
533
534
535 wake_up_process(timer->it_process);
536 cpu_timer_setexpires(ctmr, 0);
537 } else if (!timer->it_interval) {
538
539
540
541 posix_timer_event(timer, 0);
542 cpu_timer_setexpires(ctmr, 0);
543 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
544
545
546
547
548
549
550 posix_cpu_timer_rearm(timer);
551 ++timer->it_requeue_pending;
552 }
553 }
554
555
556
557
558
559
560
561 static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
562 struct itimerspec64 *new, struct itimerspec64 *old)
563 {
564 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
565 u64 old_expires, new_expires, old_incr, val;
566 struct cpu_timer *ctmr = &timer->it.cpu;
567 struct task_struct *p = ctmr->task;
568 struct sighand_struct *sighand;
569 unsigned long flags;
570 int ret = 0;
571
572 if (WARN_ON_ONCE(!p))
573 return -EINVAL;
574
575
576
577
578
579 new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value));
580
581
582
583
584
585 sighand = lock_task_sighand(p, &flags);
586
587
588
589
590 if (unlikely(sighand == NULL))
591 return -ESRCH;
592
593
594
595
596 old_incr = timer->it_interval;
597 old_expires = cpu_timer_getexpires(ctmr);
598
599 if (unlikely(timer->it.cpu.firing)) {
600 timer->it.cpu.firing = -1;
601 ret = TIMER_RETRY;
602 } else {
603 cpu_timer_dequeue(ctmr);
604 }
605
606
607
608
609
610
611
612
613
614 if (CPUCLOCK_PERTHREAD(timer->it_clock))
615 val = cpu_clock_sample(clkid, p);
616 else
617 val = cpu_clock_sample_group(clkid, p, true);
618
619 if (old) {
620 if (old_expires == 0) {
621 old->it_value.tv_sec = 0;
622 old->it_value.tv_nsec = 0;
623 } else {
624
625
626
627
628
629
630
631 u64 exp = bump_cpu_timer(timer, val);
632
633 if (val < exp) {
634 old_expires = exp - val;
635 old->it_value = ns_to_timespec64(old_expires);
636 } else {
637 old->it_value.tv_nsec = 1;
638 old->it_value.tv_sec = 0;
639 }
640 }
641 }
642
643 if (unlikely(ret)) {
644
645
646
647
648
649
650 unlock_task_sighand(p, &flags);
651 goto out;
652 }
653
654 if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
655 new_expires += val;
656 }
657
658
659
660
661
662
663 cpu_timer_setexpires(ctmr, new_expires);
664 if (new_expires != 0 && val < new_expires) {
665 arm_timer(timer);
666 }
667
668 unlock_task_sighand(p, &flags);
669
670
671
672
673 timer->it_interval = timespec64_to_ktime(new->it_interval);
674
675
676
677
678
679
680 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
681 ~REQUEUE_PENDING;
682 timer->it_overrun_last = 0;
683 timer->it_overrun = -1;
684
685 if (new_expires != 0 && !(val < new_expires)) {
686
687
688
689
690
691 cpu_timer_fire(timer);
692 }
693
694 ret = 0;
695 out:
696 if (old)
697 old->it_interval = ns_to_timespec64(old_incr);
698
699 return ret;
700 }
701
702 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
703 {
704 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
705 struct cpu_timer *ctmr = &timer->it.cpu;
706 u64 now, expires = cpu_timer_getexpires(ctmr);
707 struct task_struct *p = ctmr->task;
708
709 if (WARN_ON_ONCE(!p))
710 return;
711
712
713
714
715 itp->it_interval = ktime_to_timespec64(timer->it_interval);
716
717 if (!expires)
718 return;
719
720
721
722
723 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
724 now = cpu_clock_sample(clkid, p);
725 } else {
726 struct sighand_struct *sighand;
727 unsigned long flags;
728
729
730
731
732
733
734 sighand = lock_task_sighand(p, &flags);
735 if (unlikely(sighand == NULL)) {
736
737
738
739
740
741 cpu_timer_setexpires(ctmr, 0);
742 return;
743 } else {
744 now = cpu_clock_sample_group(clkid, p, false);
745 unlock_task_sighand(p, &flags);
746 }
747 }
748
749 if (now < expires) {
750 itp->it_value = ns_to_timespec64(expires - now);
751 } else {
752
753
754
755
756 itp->it_value.tv_nsec = 1;
757 itp->it_value.tv_sec = 0;
758 }
759 }
760
761 #define MAX_COLLECTED 20
762
763 static u64 collect_timerqueue(struct timerqueue_head *head,
764 struct list_head *firing, u64 now)
765 {
766 struct timerqueue_node *next;
767 int i = 0;
768
769 while ((next = timerqueue_getnext(head))) {
770 struct cpu_timer *ctmr;
771 u64 expires;
772
773 ctmr = container_of(next, struct cpu_timer, node);
774 expires = cpu_timer_getexpires(ctmr);
775
776 if (++i == MAX_COLLECTED || now < expires)
777 return expires;
778
779 ctmr->firing = 1;
780 cpu_timer_dequeue(ctmr);
781 list_add_tail(&ctmr->elist, firing);
782 }
783
784 return U64_MAX;
785 }
786
787 static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
788 struct list_head *firing)
789 {
790 struct posix_cputimer_base *base = pct->bases;
791 int i;
792
793 for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
794 base->nextevt = collect_timerqueue(&base->tqhead, firing,
795 samples[i]);
796 }
797 }
798
799 static inline void check_dl_overrun(struct task_struct *tsk)
800 {
801 if (tsk->dl.dl_overrun) {
802 tsk->dl.dl_overrun = 0;
803 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
804 }
805 }
806
807 static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
808 {
809 if (time < limit)
810 return false;
811
812 if (print_fatal_signals) {
813 pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
814 rt ? "RT" : "CPU", hard ? "hard" : "soft",
815 current->comm, task_pid_nr(current));
816 }
817 __group_send_sig_info(signo, SEND_SIG_PRIV, current);
818 return true;
819 }
820
821
822
823
824
825
826 static void check_thread_timers(struct task_struct *tsk,
827 struct list_head *firing)
828 {
829 struct posix_cputimers *pct = &tsk->posix_cputimers;
830 u64 samples[CPUCLOCK_MAX];
831 unsigned long soft;
832
833 if (dl_task(tsk))
834 check_dl_overrun(tsk);
835
836 if (expiry_cache_is_inactive(pct))
837 return;
838
839 task_sample_cputime(tsk, samples);
840 collect_posix_cputimers(pct, samples, firing);
841
842
843
844
845 soft = task_rlimit(tsk, RLIMIT_RTTIME);
846 if (soft != RLIM_INFINITY) {
847
848 unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
849 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
850
851
852 if (hard != RLIM_INFINITY &&
853 check_rlimit(rttime, hard, SIGKILL, true, true))
854 return;
855
856
857 if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
858 soft += USEC_PER_SEC;
859 tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
860 }
861 }
862
863 if (expiry_cache_is_inactive(pct))
864 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
865 }
866
867 static inline void stop_process_timers(struct signal_struct *sig)
868 {
869 struct posix_cputimers *pct = &sig->posix_cputimers;
870
871
872 WRITE_ONCE(pct->timers_active, false);
873 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
874 }
875
876 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
877 u64 *expires, u64 cur_time, int signo)
878 {
879 if (!it->expires)
880 return;
881
882 if (cur_time >= it->expires) {
883 if (it->incr)
884 it->expires += it->incr;
885 else
886 it->expires = 0;
887
888 trace_itimer_expire(signo == SIGPROF ?
889 ITIMER_PROF : ITIMER_VIRTUAL,
890 task_tgid(tsk), cur_time);
891 __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
892 }
893
894 if (it->expires && it->expires < *expires)
895 *expires = it->expires;
896 }
897
898
899
900
901
902
903 static void check_process_timers(struct task_struct *tsk,
904 struct list_head *firing)
905 {
906 struct signal_struct *const sig = tsk->signal;
907 struct posix_cputimers *pct = &sig->posix_cputimers;
908 u64 samples[CPUCLOCK_MAX];
909 unsigned long soft;
910
911
912
913
914
915
916 if (!READ_ONCE(pct->timers_active) || pct->expiry_active)
917 return;
918
919
920
921
922
923 pct->expiry_active = true;
924
925
926
927
928
929 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
930 collect_posix_cputimers(pct, samples, firing);
931
932
933
934
935 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],
936 &pct->bases[CPUCLOCK_PROF].nextevt,
937 samples[CPUCLOCK_PROF], SIGPROF);
938 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],
939 &pct->bases[CPUCLOCK_VIRT].nextevt,
940 samples[CPUCLOCK_VIRT], SIGVTALRM);
941
942 soft = task_rlimit(tsk, RLIMIT_CPU);
943 if (soft != RLIM_INFINITY) {
944
945 unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
946 u64 ptime = samples[CPUCLOCK_PROF];
947 u64 softns = (u64)soft * NSEC_PER_SEC;
948 u64 hardns = (u64)hard * NSEC_PER_SEC;
949
950
951 if (hard != RLIM_INFINITY &&
952 check_rlimit(ptime, hardns, SIGKILL, false, true))
953 return;
954
955
956 if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
957 sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
958 softns += NSEC_PER_SEC;
959 }
960
961
962 if (softns < pct->bases[CPUCLOCK_PROF].nextevt)
963 pct->bases[CPUCLOCK_PROF].nextevt = softns;
964 }
965
966 if (expiry_cache_is_inactive(pct))
967 stop_process_timers(sig);
968
969 pct->expiry_active = false;
970 }
971
972
973
974
975
976 static void posix_cpu_timer_rearm(struct k_itimer *timer)
977 {
978 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
979 struct cpu_timer *ctmr = &timer->it.cpu;
980 struct task_struct *p = ctmr->task;
981 struct sighand_struct *sighand;
982 unsigned long flags;
983 u64 now;
984
985 if (WARN_ON_ONCE(!p))
986 return;
987
988
989
990
991 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
992 now = cpu_clock_sample(clkid, p);
993 bump_cpu_timer(timer, now);
994 if (unlikely(p->exit_state))
995 return;
996
997
998 sighand = lock_task_sighand(p, &flags);
999 if (!sighand)
1000 return;
1001 } else {
1002
1003
1004
1005
1006 sighand = lock_task_sighand(p, &flags);
1007 if (unlikely(sighand == NULL)) {
1008
1009
1010
1011
1012 cpu_timer_setexpires(ctmr, 0);
1013 return;
1014 } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1015
1016 goto unlock;
1017 }
1018 now = cpu_clock_sample_group(clkid, p, true);
1019 bump_cpu_timer(timer, now);
1020
1021 }
1022
1023
1024
1025
1026 arm_timer(timer);
1027 unlock:
1028 unlock_task_sighand(p, &flags);
1029 }
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040 static inline bool
1041 task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
1042 {
1043 int i;
1044
1045 for (i = 0; i < CPUCLOCK_MAX; i++) {
1046 if (samples[i] >= pct->bases[i].nextevt)
1047 return true;
1048 }
1049 return false;
1050 }
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 static inline bool fastpath_timer_check(struct task_struct *tsk)
1063 {
1064 struct posix_cputimers *pct = &tsk->posix_cputimers;
1065 struct signal_struct *sig;
1066
1067 if (!expiry_cache_is_inactive(pct)) {
1068 u64 samples[CPUCLOCK_MAX];
1069
1070 task_sample_cputime(tsk, samples);
1071 if (task_cputimers_expired(samples, pct))
1072 return true;
1073 }
1074
1075 sig = tsk->signal;
1076 pct = &sig->posix_cputimers;
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
1093 u64 samples[CPUCLOCK_MAX];
1094
1095 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
1096 samples);
1097
1098 if (task_cputimers_expired(samples, pct))
1099 return true;
1100 }
1101
1102 if (dl_task(tsk) && tsk->dl.dl_overrun)
1103 return true;
1104
1105 return false;
1106 }
1107
1108
1109
1110
1111
1112
1113 void run_posix_cpu_timers(void)
1114 {
1115 struct task_struct *tsk = current;
1116 struct k_itimer *timer, *next;
1117 unsigned long flags;
1118 LIST_HEAD(firing);
1119
1120 lockdep_assert_irqs_disabled();
1121
1122
1123
1124
1125
1126 if (!fastpath_timer_check(tsk))
1127 return;
1128
1129 if (!lock_task_sighand(tsk, &flags))
1130 return;
1131
1132
1133
1134
1135
1136 check_thread_timers(tsk, &firing);
1137
1138 check_process_timers(tsk, &firing);
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 unlock_task_sighand(tsk, &flags);
1149
1150
1151
1152
1153
1154
1155
1156 list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
1157 int cpu_firing;
1158
1159 spin_lock(&timer->it_lock);
1160 list_del_init(&timer->it.cpu.elist);
1161 cpu_firing = timer->it.cpu.firing;
1162 timer->it.cpu.firing = 0;
1163
1164
1165
1166
1167
1168 if (likely(cpu_firing >= 0))
1169 cpu_timer_fire(timer);
1170 spin_unlock(&timer->it_lock);
1171 }
1172 }
1173
1174
1175
1176
1177
1178 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
1179 u64 *newval, u64 *oldval)
1180 {
1181 u64 now, *nextevt;
1182
1183 if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
1184 return;
1185
1186 nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
1187 now = cpu_clock_sample_group(clkid, tsk, true);
1188
1189 if (oldval) {
1190
1191
1192
1193
1194
1195 if (*oldval) {
1196 if (*oldval <= now) {
1197
1198 *oldval = TICK_NSEC;
1199 } else {
1200 *oldval -= now;
1201 }
1202 }
1203
1204 if (!*newval)
1205 return;
1206 *newval += now;
1207 }
1208
1209
1210
1211
1212
1213 if (*newval < *nextevt)
1214 *nextevt = *newval;
1215
1216 tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
1217 }
1218
1219 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1220 const struct timespec64 *rqtp)
1221 {
1222 struct itimerspec64 it;
1223 struct k_itimer timer;
1224 u64 expires;
1225 int error;
1226
1227
1228
1229
1230 memset(&timer, 0, sizeof timer);
1231 spin_lock_init(&timer.it_lock);
1232 timer.it_clock = which_clock;
1233 timer.it_overrun = -1;
1234 error = posix_cpu_timer_create(&timer);
1235 timer.it_process = current;
1236
1237 if (!error) {
1238 static struct itimerspec64 zero_it;
1239 struct restart_block *restart;
1240
1241 memset(&it, 0, sizeof(it));
1242 it.it_value = *rqtp;
1243
1244 spin_lock_irq(&timer.it_lock);
1245 error = posix_cpu_timer_set(&timer, flags, &it, NULL);
1246 if (error) {
1247 spin_unlock_irq(&timer.it_lock);
1248 return error;
1249 }
1250
1251 while (!signal_pending(current)) {
1252 if (!cpu_timer_getexpires(&timer.it.cpu)) {
1253
1254
1255
1256
1257 posix_cpu_timer_del(&timer);
1258 spin_unlock_irq(&timer.it_lock);
1259 return 0;
1260 }
1261
1262
1263
1264
1265 __set_current_state(TASK_INTERRUPTIBLE);
1266 spin_unlock_irq(&timer.it_lock);
1267 schedule();
1268 spin_lock_irq(&timer.it_lock);
1269 }
1270
1271
1272
1273
1274 expires = cpu_timer_getexpires(&timer.it.cpu);
1275 error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
1276 if (!error) {
1277
1278
1279
1280 posix_cpu_timer_del(&timer);
1281 }
1282 spin_unlock_irq(&timer.it_lock);
1283
1284 while (error == TIMER_RETRY) {
1285
1286
1287
1288
1289
1290 spin_lock_irq(&timer.it_lock);
1291 error = posix_cpu_timer_del(&timer);
1292 spin_unlock_irq(&timer.it_lock);
1293 }
1294
1295 if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
1296
1297
1298
1299 return 0;
1300 }
1301
1302 error = -ERESTART_RESTARTBLOCK;
1303
1304
1305
1306 restart = ¤t->restart_block;
1307 restart->nanosleep.expires = expires;
1308 if (restart->nanosleep.type != TT_NONE)
1309 error = nanosleep_copyout(restart, &it.it_value);
1310 }
1311
1312 return error;
1313 }
1314
1315 static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1316
1317 static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1318 const struct timespec64 *rqtp)
1319 {
1320 struct restart_block *restart_block = ¤t->restart_block;
1321 int error;
1322
1323
1324
1325
1326 if (CPUCLOCK_PERTHREAD(which_clock) &&
1327 (CPUCLOCK_PID(which_clock) == 0 ||
1328 CPUCLOCK_PID(which_clock) == task_pid_vnr(current)))
1329 return -EINVAL;
1330
1331 error = do_cpu_nanosleep(which_clock, flags, rqtp);
1332
1333 if (error == -ERESTART_RESTARTBLOCK) {
1334
1335 if (flags & TIMER_ABSTIME)
1336 return -ERESTARTNOHAND;
1337
1338 restart_block->fn = posix_cpu_nsleep_restart;
1339 restart_block->nanosleep.clockid = which_clock;
1340 }
1341 return error;
1342 }
1343
1344 static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1345 {
1346 clockid_t which_clock = restart_block->nanosleep.clockid;
1347 struct timespec64 t;
1348
1349 t = ns_to_timespec64(restart_block->nanosleep.expires);
1350
1351 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t);
1352 }
1353
1354 #define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED)
1355 #define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED)
1356
1357 static int process_cpu_clock_getres(const clockid_t which_clock,
1358 struct timespec64 *tp)
1359 {
1360 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1361 }
1362 static int process_cpu_clock_get(const clockid_t which_clock,
1363 struct timespec64 *tp)
1364 {
1365 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1366 }
1367 static int process_cpu_timer_create(struct k_itimer *timer)
1368 {
1369 timer->it_clock = PROCESS_CLOCK;
1370 return posix_cpu_timer_create(timer);
1371 }
1372 static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1373 const struct timespec64 *rqtp)
1374 {
1375 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
1376 }
1377 static int thread_cpu_clock_getres(const clockid_t which_clock,
1378 struct timespec64 *tp)
1379 {
1380 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1381 }
1382 static int thread_cpu_clock_get(const clockid_t which_clock,
1383 struct timespec64 *tp)
1384 {
1385 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1386 }
1387 static int thread_cpu_timer_create(struct k_itimer *timer)
1388 {
1389 timer->it_clock = THREAD_CLOCK;
1390 return posix_cpu_timer_create(timer);
1391 }
1392
1393 const struct k_clock clock_posix_cpu = {
1394 .clock_getres = posix_cpu_clock_getres,
1395 .clock_set = posix_cpu_clock_set,
1396 .clock_get = posix_cpu_clock_get,
1397 .timer_create = posix_cpu_timer_create,
1398 .nsleep = posix_cpu_nsleep,
1399 .timer_set = posix_cpu_timer_set,
1400 .timer_del = posix_cpu_timer_del,
1401 .timer_get = posix_cpu_timer_get,
1402 .timer_rearm = posix_cpu_timer_rearm,
1403 };
1404
1405 const struct k_clock clock_process = {
1406 .clock_getres = process_cpu_clock_getres,
1407 .clock_get = process_cpu_clock_get,
1408 .timer_create = process_cpu_timer_create,
1409 .nsleep = process_cpu_nsleep,
1410 };
1411
1412 const struct k_clock clock_thread = {
1413 .clock_getres = thread_cpu_clock_getres,
1414 .clock_get = thread_cpu_clock_get,
1415 .timer_create = thread_cpu_timer_create,
1416 };