This source file includes following definitions.
- setup_fail_futex
- should_fail_futex
- fail_futex_debugfs
- should_fail_futex
- compat_exit_robust_list
- futex_get_mm
- hb_waiters_inc
- hb_waiters_dec
- hb_waiters_pending
- hash_futex
- match_futex
- get_futex_key_refs
- drop_futex_key_refs
- futex_setup_timer
- get_inode_sequence_number
- get_futex_key
- put_futex_key
- fault_in_user_writeable
- futex_top_waiter
- cmpxchg_futex_value_locked
- get_futex_value_locked
- refill_pi_state_cache
- alloc_pi_state
- get_pi_state
- put_pi_state
- exit_pi_state_list
- exit_pi_state_list
- attach_to_pi_state
- wait_for_owner_exiting
- handle_exit_race
- attach_to_pi_owner
- lookup_pi_state
- lock_pi_update_atomic
- futex_lock_pi_atomic
- __unqueue_futex
- mark_wake_futex
- wake_futex_pi
- double_lock_hb
- double_unlock_hb
- futex_wake
- futex_atomic_op_inuser
- futex_wake_op
- requeue_futex
- requeue_pi_wake_futex
- futex_proxy_trylock_atomic
- futex_requeue
- queue_lock
- queue_unlock
- __queue_me
- queue_me
- unqueue_me
- unqueue_me_pi
- fixup_pi_state_owner
- fixup_owner
- futex_wait_queue_me
- futex_wait_setup
- futex_wait
- futex_wait_restart
- futex_lock_pi
- futex_unlock_pi
- handle_early_requeue_pi_wakeup
- futex_wait_requeue_pi
- SYSCALL_DEFINE2
- SYSCALL_DEFINE3
- handle_futex_death
- fetch_robust_entry
- exit_robust_list
- futex_cleanup
- futex_exit_recursive
- futex_cleanup_begin
- futex_cleanup_end
- futex_exec_release
- futex_exit_release
- do_futex
- SYSCALL_DEFINE6
- compat_fetch_robust_entry
- futex_uaddr
- compat_exit_robust_list
- COMPAT_SYSCALL_DEFINE2
- COMPAT_SYSCALL_DEFINE3
- SYSCALL_DEFINE6
- futex_detect_cmpxchg
- futex_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 #include <linux/compat.h>
35 #include <linux/slab.h>
36 #include <linux/poll.h>
37 #include <linux/fs.h>
38 #include <linux/file.h>
39 #include <linux/jhash.h>
40 #include <linux/init.h>
41 #include <linux/futex.h>
42 #include <linux/mount.h>
43 #include <linux/pagemap.h>
44 #include <linux/syscalls.h>
45 #include <linux/signal.h>
46 #include <linux/export.h>
47 #include <linux/magic.h>
48 #include <linux/pid.h>
49 #include <linux/nsproxy.h>
50 #include <linux/ptrace.h>
51 #include <linux/sched/rt.h>
52 #include <linux/sched/wake_q.h>
53 #include <linux/sched/mm.h>
54 #include <linux/hugetlb.h>
55 #include <linux/freezer.h>
56 #include <linux/memblock.h>
57 #include <linux/fault-inject.h>
58 #include <linux/refcount.h>
59
60 #include <asm/futex.h>
61
62 #include "locking/rtmutex_common.h"
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165 #ifdef CONFIG_HAVE_FUTEX_CMPXCHG
166 #define futex_cmpxchg_enabled 1
167 #else
168 static int __read_mostly futex_cmpxchg_enabled;
169 #endif
170
171
172
173
174
175 #ifdef CONFIG_MMU
176 # define FLAGS_SHARED 0x01
177 #else
178
179
180
181
182 # define FLAGS_SHARED 0x00
183 #endif
184 #define FLAGS_CLOCKRT 0x02
185 #define FLAGS_HAS_TIMEOUT 0x04
186
187
188
189
190 struct futex_pi_state {
191
192
193
194
195 struct list_head list;
196
197
198
199
200 struct rt_mutex pi_mutex;
201
202 struct task_struct *owner;
203 refcount_t refcount;
204
205 union futex_key key;
206 } __randomize_layout;
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230 struct futex_q {
231 struct plist_node list;
232
233 struct task_struct *task;
234 spinlock_t *lock_ptr;
235 union futex_key key;
236 struct futex_pi_state *pi_state;
237 struct rt_mutex_waiter *rt_waiter;
238 union futex_key *requeue_pi_key;
239 u32 bitset;
240 } __randomize_layout;
241
242 static const struct futex_q futex_q_init = {
243
244 .key = FUTEX_KEY_INIT,
245 .bitset = FUTEX_BITSET_MATCH_ANY
246 };
247
248
249
250
251
252
253 struct futex_hash_bucket {
254 atomic_t waiters;
255 spinlock_t lock;
256 struct plist_head chain;
257 } ____cacheline_aligned_in_smp;
258
259
260
261
262
263
264 static struct {
265 struct futex_hash_bucket *queues;
266 unsigned long hashsize;
267 } __futex_data __read_mostly __aligned(2*sizeof(long));
268 #define futex_queues (__futex_data.queues)
269 #define futex_hashsize (__futex_data.hashsize)
270
271
272
273
274
275 #ifdef CONFIG_FAIL_FUTEX
276
277 static struct {
278 struct fault_attr attr;
279
280 bool ignore_private;
281 } fail_futex = {
282 .attr = FAULT_ATTR_INITIALIZER,
283 .ignore_private = false,
284 };
285
286 static int __init setup_fail_futex(char *str)
287 {
288 return setup_fault_attr(&fail_futex.attr, str);
289 }
290 __setup("fail_futex=", setup_fail_futex);
291
292 static bool should_fail_futex(bool fshared)
293 {
294 if (fail_futex.ignore_private && !fshared)
295 return false;
296
297 return should_fail(&fail_futex.attr, 1);
298 }
299
300 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
301
302 static int __init fail_futex_debugfs(void)
303 {
304 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
305 struct dentry *dir;
306
307 dir = fault_create_debugfs_attr("fail_futex", NULL,
308 &fail_futex.attr);
309 if (IS_ERR(dir))
310 return PTR_ERR(dir);
311
312 debugfs_create_bool("ignore-private", mode, dir,
313 &fail_futex.ignore_private);
314 return 0;
315 }
316
317 late_initcall(fail_futex_debugfs);
318
319 #endif
320
321 #else
322 static inline bool should_fail_futex(bool fshared)
323 {
324 return false;
325 }
326 #endif
327
328 #ifdef CONFIG_COMPAT
329 static void compat_exit_robust_list(struct task_struct *curr);
330 #else
331 static inline void compat_exit_robust_list(struct task_struct *curr) { }
332 #endif
333
334 static inline void futex_get_mm(union futex_key *key)
335 {
336 mmgrab(key->private.mm);
337
338
339
340
341
342 smp_mb__after_atomic();
343 }
344
345
346
347
348 static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
349 {
350 #ifdef CONFIG_SMP
351 atomic_inc(&hb->waiters);
352
353
354
355 smp_mb__after_atomic();
356 #endif
357 }
358
359
360
361
362
363 static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
364 {
365 #ifdef CONFIG_SMP
366 atomic_dec(&hb->waiters);
367 #endif
368 }
369
370 static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
371 {
372 #ifdef CONFIG_SMP
373 return atomic_read(&hb->waiters);
374 #else
375 return 1;
376 #endif
377 }
378
379
380
381
382
383
384
385
386 static struct futex_hash_bucket *hash_futex(union futex_key *key)
387 {
388 u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
389 key->both.offset);
390
391 return &futex_queues[hash & (futex_hashsize - 1)];
392 }
393
394
395
396
397
398
399
400
401
402 static inline int match_futex(union futex_key *key1, union futex_key *key2)
403 {
404 return (key1 && key2
405 && key1->both.word == key2->both.word
406 && key1->both.ptr == key2->both.ptr
407 && key1->both.offset == key2->both.offset);
408 }
409
410
411
412
413
414
415 static void get_futex_key_refs(union futex_key *key)
416 {
417 if (!key->both.ptr)
418 return;
419
420
421
422
423
424
425 if (!IS_ENABLED(CONFIG_MMU)) {
426 smp_mb();
427 return;
428 }
429
430 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
431 case FUT_OFF_INODE:
432 smp_mb();
433 break;
434 case FUT_OFF_MMSHARED:
435 futex_get_mm(key);
436 break;
437 default:
438
439
440
441
442
443 smp_mb();
444 }
445 }
446
447
448
449
450
451
452
453 static void drop_futex_key_refs(union futex_key *key)
454 {
455 if (!key->both.ptr) {
456
457 WARN_ON_ONCE(1);
458 return;
459 }
460
461 if (!IS_ENABLED(CONFIG_MMU))
462 return;
463
464 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
465 case FUT_OFF_INODE:
466 break;
467 case FUT_OFF_MMSHARED:
468 mmdrop(key->private.mm);
469 break;
470 }
471 }
472
473 enum futex_access {
474 FUTEX_READ,
475 FUTEX_WRITE
476 };
477
478
479
480
481
482
483
484
485
486
487
488 static inline struct hrtimer_sleeper *
489 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
490 int flags, u64 range_ns)
491 {
492 if (!time)
493 return NULL;
494
495 hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
496 CLOCK_REALTIME : CLOCK_MONOTONIC,
497 HRTIMER_MODE_ABS);
498
499
500
501
502 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
503
504 return timeout;
505 }
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525 static u64 get_inode_sequence_number(struct inode *inode)
526 {
527 static atomic64_t i_seq;
528 u64 old;
529
530
531 old = atomic64_read(&inode->i_sequence);
532 if (likely(old))
533 return old;
534
535 for (;;) {
536 u64 new = atomic64_add_return(1, &i_seq);
537 if (WARN_ON_ONCE(!new))
538 continue;
539
540 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
541 if (old)
542 return old;
543 return new;
544 }
545 }
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571 static int
572 get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
573 {
574 unsigned long address = (unsigned long)uaddr;
575 struct mm_struct *mm = current->mm;
576 struct page *page, *tail;
577 struct address_space *mapping;
578 int err, ro = 0;
579
580
581
582
583 key->both.offset = address % PAGE_SIZE;
584 if (unlikely((address % sizeof(u32)) != 0))
585 return -EINVAL;
586 address -= key->both.offset;
587
588 if (unlikely(!access_ok(uaddr, sizeof(u32))))
589 return -EFAULT;
590
591 if (unlikely(should_fail_futex(fshared)))
592 return -EFAULT;
593
594
595
596
597
598
599
600
601 if (!fshared) {
602 key->private.mm = mm;
603 key->private.address = address;
604 get_futex_key_refs(key);
605 return 0;
606 }
607
608 again:
609
610 if (unlikely(should_fail_futex(fshared)))
611 return -EFAULT;
612
613 err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
614
615
616
617
618 if (err == -EFAULT && rw == FUTEX_READ) {
619 err = get_user_pages_fast(address, 1, 0, &page);
620 ro = 1;
621 }
622 if (err < 0)
623 return err;
624 else
625 err = 0;
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645 tail = page;
646 page = compound_head(page);
647 mapping = READ_ONCE(page->mapping);
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664 if (unlikely(!mapping)) {
665 int shmem_swizzled;
666
667
668
669
670
671
672 lock_page(page);
673 shmem_swizzled = PageSwapCache(page) || page->mapping;
674 unlock_page(page);
675 put_page(page);
676
677 if (shmem_swizzled)
678 goto again;
679
680 return -EFAULT;
681 }
682
683
684
685
686
687
688
689
690
691
692
693 if (PageAnon(page)) {
694
695
696
697
698 if (unlikely(should_fail_futex(fshared)) || ro) {
699 err = -EFAULT;
700 goto out;
701 }
702
703 key->both.offset |= FUT_OFF_MMSHARED;
704 key->private.mm = mm;
705 key->private.address = address;
706
707 } else {
708 struct inode *inode;
709
710
711
712
713
714
715
716
717
718
719
720
721 rcu_read_lock();
722
723 if (READ_ONCE(page->mapping) != mapping) {
724 rcu_read_unlock();
725 put_page(page);
726
727 goto again;
728 }
729
730 inode = READ_ONCE(mapping->host);
731 if (!inode) {
732 rcu_read_unlock();
733 put_page(page);
734
735 goto again;
736 }
737
738 key->both.offset |= FUT_OFF_INODE;
739 key->shared.i_seq = get_inode_sequence_number(inode);
740 key->shared.pgoff = basepage_index(tail);
741 rcu_read_unlock();
742 }
743
744 get_futex_key_refs(key);
745
746 out:
747 put_page(page);
748 return err;
749 }
750
751 static inline void put_futex_key(union futex_key *key)
752 {
753 drop_futex_key_refs(key);
754 }
755
756
757
758
759
760
761
762
763
764
765
766
767
768 static int fault_in_user_writeable(u32 __user *uaddr)
769 {
770 struct mm_struct *mm = current->mm;
771 int ret;
772
773 down_read(&mm->mmap_sem);
774 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
775 FAULT_FLAG_WRITE, NULL);
776 up_read(&mm->mmap_sem);
777
778 return ret < 0 ? ret : 0;
779 }
780
781
782
783
784
785
786
787
788 static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
789 union futex_key *key)
790 {
791 struct futex_q *this;
792
793 plist_for_each_entry(this, &hb->chain, list) {
794 if (match_futex(&this->key, key))
795 return this;
796 }
797 return NULL;
798 }
799
800 static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
801 u32 uval, u32 newval)
802 {
803 int ret;
804
805 pagefault_disable();
806 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
807 pagefault_enable();
808
809 return ret;
810 }
811
812 static int get_futex_value_locked(u32 *dest, u32 __user *from)
813 {
814 int ret;
815
816 pagefault_disable();
817 ret = __get_user(*dest, from);
818 pagefault_enable();
819
820 return ret ? -EFAULT : 0;
821 }
822
823
824
825
826
827 static int refill_pi_state_cache(void)
828 {
829 struct futex_pi_state *pi_state;
830
831 if (likely(current->pi_state_cache))
832 return 0;
833
834 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
835
836 if (!pi_state)
837 return -ENOMEM;
838
839 INIT_LIST_HEAD(&pi_state->list);
840
841 pi_state->owner = NULL;
842 refcount_set(&pi_state->refcount, 1);
843 pi_state->key = FUTEX_KEY_INIT;
844
845 current->pi_state_cache = pi_state;
846
847 return 0;
848 }
849
850 static struct futex_pi_state *alloc_pi_state(void)
851 {
852 struct futex_pi_state *pi_state = current->pi_state_cache;
853
854 WARN_ON(!pi_state);
855 current->pi_state_cache = NULL;
856
857 return pi_state;
858 }
859
860 static void get_pi_state(struct futex_pi_state *pi_state)
861 {
862 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
863 }
864
865
866
867
868
869 static void put_pi_state(struct futex_pi_state *pi_state)
870 {
871 if (!pi_state)
872 return;
873
874 if (!refcount_dec_and_test(&pi_state->refcount))
875 return;
876
877
878
879
880
881 if (pi_state->owner) {
882 struct task_struct *owner;
883
884 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
885 owner = pi_state->owner;
886 if (owner) {
887 raw_spin_lock(&owner->pi_lock);
888 list_del_init(&pi_state->list);
889 raw_spin_unlock(&owner->pi_lock);
890 }
891 rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
892 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
893 }
894
895 if (current->pi_state_cache) {
896 kfree(pi_state);
897 } else {
898
899
900
901
902
903 pi_state->owner = NULL;
904 refcount_set(&pi_state->refcount, 1);
905 current->pi_state_cache = pi_state;
906 }
907 }
908
909 #ifdef CONFIG_FUTEX_PI
910
911
912
913
914
915
916 static void exit_pi_state_list(struct task_struct *curr)
917 {
918 struct list_head *next, *head = &curr->pi_state_list;
919 struct futex_pi_state *pi_state;
920 struct futex_hash_bucket *hb;
921 union futex_key key = FUTEX_KEY_INIT;
922
923 if (!futex_cmpxchg_enabled)
924 return;
925
926
927
928
929
930 raw_spin_lock_irq(&curr->pi_lock);
931 while (!list_empty(head)) {
932 next = head->next;
933 pi_state = list_entry(next, struct futex_pi_state, list);
934 key = pi_state->key;
935 hb = hash_futex(&key);
936
937
938
939
940
941
942
943
944
945
946
947 if (!refcount_inc_not_zero(&pi_state->refcount)) {
948 raw_spin_unlock_irq(&curr->pi_lock);
949 cpu_relax();
950 raw_spin_lock_irq(&curr->pi_lock);
951 continue;
952 }
953 raw_spin_unlock_irq(&curr->pi_lock);
954
955 spin_lock(&hb->lock);
956 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
957 raw_spin_lock(&curr->pi_lock);
958
959
960
961
962 if (head->next != next) {
963
964 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
965 spin_unlock(&hb->lock);
966 put_pi_state(pi_state);
967 continue;
968 }
969
970 WARN_ON(pi_state->owner != curr);
971 WARN_ON(list_empty(&pi_state->list));
972 list_del_init(&pi_state->list);
973 pi_state->owner = NULL;
974
975 raw_spin_unlock(&curr->pi_lock);
976 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
977 spin_unlock(&hb->lock);
978
979 rt_mutex_futex_unlock(&pi_state->pi_mutex);
980 put_pi_state(pi_state);
981
982 raw_spin_lock_irq(&curr->pi_lock);
983 }
984 raw_spin_unlock_irq(&curr->pi_lock);
985 }
986 #else
987 static inline void exit_pi_state_list(struct task_struct *curr) { }
988 #endif
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078 static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
1079 struct futex_pi_state *pi_state,
1080 struct futex_pi_state **ps)
1081 {
1082 pid_t pid = uval & FUTEX_TID_MASK;
1083 u32 uval2;
1084 int ret;
1085
1086
1087
1088
1089 if (unlikely(!pi_state))
1090 return -EINVAL;
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104 WARN_ON(!refcount_read(&pi_state->refcount));
1105
1106
1107
1108
1109
1110 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1111
1112
1113
1114
1115
1116
1117
1118 if (get_futex_value_locked(&uval2, uaddr))
1119 goto out_efault;
1120
1121 if (uval != uval2)
1122 goto out_eagain;
1123
1124
1125
1126
1127 if (uval & FUTEX_OWNER_DIED) {
1128
1129
1130
1131
1132
1133 if (!pi_state->owner) {
1134
1135
1136
1137
1138 if (pid)
1139 goto out_einval;
1140
1141
1142
1143 goto out_attach;
1144 }
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 if (!pid)
1155 goto out_attach;
1156 } else {
1157
1158
1159
1160
1161 if (!pi_state->owner)
1162 goto out_einval;
1163 }
1164
1165
1166
1167
1168
1169
1170 if (pid != task_pid_vnr(pi_state->owner))
1171 goto out_einval;
1172
1173 out_attach:
1174 get_pi_state(pi_state);
1175 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1176 *ps = pi_state;
1177 return 0;
1178
1179 out_einval:
1180 ret = -EINVAL;
1181 goto out_error;
1182
1183 out_eagain:
1184 ret = -EAGAIN;
1185 goto out_error;
1186
1187 out_efault:
1188 ret = -EFAULT;
1189 goto out_error;
1190
1191 out_error:
1192 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1193 return ret;
1194 }
1195
1196
1197
1198
1199
1200
1201
1202 static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
1203 {
1204 if (ret != -EBUSY) {
1205 WARN_ON_ONCE(exiting);
1206 return;
1207 }
1208
1209 if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
1210 return;
1211
1212 mutex_lock(&exiting->futex_exit_mutex);
1213
1214
1215
1216
1217
1218
1219
1220
1221 mutex_unlock(&exiting->futex_exit_mutex);
1222
1223 put_task_struct(exiting);
1224 }
1225
1226 static int handle_exit_race(u32 __user *uaddr, u32 uval,
1227 struct task_struct *tsk)
1228 {
1229 u32 uval2;
1230
1231
1232
1233
1234
1235 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
1236 return -EBUSY;
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 if (get_futex_value_locked(&uval2, uaddr))
1268 return -EFAULT;
1269
1270
1271 if (uval2 != uval)
1272 return -EAGAIN;
1273
1274
1275
1276
1277
1278
1279 return -ESRCH;
1280 }
1281
1282
1283
1284
1285
1286 static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
1287 struct futex_pi_state **ps,
1288 struct task_struct **exiting)
1289 {
1290 pid_t pid = uval & FUTEX_TID_MASK;
1291 struct futex_pi_state *pi_state;
1292 struct task_struct *p;
1293
1294
1295
1296
1297
1298
1299
1300
1301 if (!pid)
1302 return -EAGAIN;
1303 p = find_get_task_by_vpid(pid);
1304 if (!p)
1305 return handle_exit_race(uaddr, uval, NULL);
1306
1307 if (unlikely(p->flags & PF_KTHREAD)) {
1308 put_task_struct(p);
1309 return -EPERM;
1310 }
1311
1312
1313
1314
1315
1316
1317 raw_spin_lock_irq(&p->pi_lock);
1318 if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
1319
1320
1321
1322
1323
1324 int ret = handle_exit_race(uaddr, uval, p);
1325
1326 raw_spin_unlock_irq(&p->pi_lock);
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336 if (ret == -EBUSY)
1337 *exiting = p;
1338 else
1339 put_task_struct(p);
1340 return ret;
1341 }
1342
1343
1344
1345
1346
1347
1348
1349 pi_state = alloc_pi_state();
1350
1351
1352
1353
1354
1355 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1356
1357
1358 pi_state->key = *key;
1359
1360 WARN_ON(!list_empty(&pi_state->list));
1361 list_add(&pi_state->list, &p->pi_state_list);
1362
1363
1364
1365
1366 pi_state->owner = p;
1367 raw_spin_unlock_irq(&p->pi_lock);
1368
1369 put_task_struct(p);
1370
1371 *ps = pi_state;
1372
1373 return 0;
1374 }
1375
1376 static int lookup_pi_state(u32 __user *uaddr, u32 uval,
1377 struct futex_hash_bucket *hb,
1378 union futex_key *key, struct futex_pi_state **ps,
1379 struct task_struct **exiting)
1380 {
1381 struct futex_q *top_waiter = futex_top_waiter(hb, key);
1382
1383
1384
1385
1386
1387 if (top_waiter)
1388 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1389
1390
1391
1392
1393
1394 return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
1395 }
1396
1397 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1398 {
1399 int err;
1400 u32 uninitialized_var(curval);
1401
1402 if (unlikely(should_fail_futex(true)))
1403 return -EFAULT;
1404
1405 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1406 if (unlikely(err))
1407 return err;
1408
1409
1410 return curval != uval ? -EAGAIN : 0;
1411 }
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437 static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1438 union futex_key *key,
1439 struct futex_pi_state **ps,
1440 struct task_struct *task,
1441 struct task_struct **exiting,
1442 int set_waiters)
1443 {
1444 u32 uval, newval, vpid = task_pid_vnr(task);
1445 struct futex_q *top_waiter;
1446 int ret;
1447
1448
1449
1450
1451
1452 if (get_futex_value_locked(&uval, uaddr))
1453 return -EFAULT;
1454
1455 if (unlikely(should_fail_futex(true)))
1456 return -EFAULT;
1457
1458
1459
1460
1461 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1462 return -EDEADLK;
1463
1464 if ((unlikely(should_fail_futex(true))))
1465 return -EDEADLK;
1466
1467
1468
1469
1470
1471 top_waiter = futex_top_waiter(hb, key);
1472 if (top_waiter)
1473 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1474
1475
1476
1477
1478
1479
1480
1481 if (!(uval & FUTEX_TID_MASK)) {
1482
1483
1484
1485
1486 newval = uval & FUTEX_OWNER_DIED;
1487 newval |= vpid;
1488
1489
1490 if (set_waiters)
1491 newval |= FUTEX_WAITERS;
1492
1493 ret = lock_pi_update_atomic(uaddr, uval, newval);
1494
1495 return ret < 0 ? ret : 1;
1496 }
1497
1498
1499
1500
1501
1502
1503 newval = uval | FUTEX_WAITERS;
1504 ret = lock_pi_update_atomic(uaddr, uval, newval);
1505 if (ret)
1506 return ret;
1507
1508
1509
1510
1511
1512 return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
1513 }
1514
1515
1516
1517
1518
1519
1520
1521 static void __unqueue_futex(struct futex_q *q)
1522 {
1523 struct futex_hash_bucket *hb;
1524
1525 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
1526 return;
1527 lockdep_assert_held(q->lock_ptr);
1528
1529 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1530 plist_del(&q->list, &hb->chain);
1531 hb_waiters_dec(hb);
1532 }
1533
1534
1535
1536
1537
1538
1539
1540 static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1541 {
1542 struct task_struct *p = q->task;
1543
1544 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1545 return;
1546
1547 get_task_struct(p);
1548 __unqueue_futex(q);
1549
1550
1551
1552
1553
1554
1555
1556 smp_store_release(&q->lock_ptr, NULL);
1557
1558
1559
1560
1561
1562 wake_q_add_safe(wake_q, p);
1563 }
1564
1565
1566
1567
1568 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
1569 {
1570 u32 uninitialized_var(curval), newval;
1571 struct task_struct *new_owner;
1572 bool postunlock = false;
1573 DEFINE_WAKE_Q(wake_q);
1574 int ret = 0;
1575
1576 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1577 if (WARN_ON_ONCE(!new_owner)) {
1578
1579
1580
1581
1582
1583
1584
1585
1586 ret = -EAGAIN;
1587 goto out_unlock;
1588 }
1589
1590
1591
1592
1593
1594
1595 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1596
1597 if (unlikely(should_fail_futex(true)))
1598 ret = -EFAULT;
1599
1600 ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1601 if (!ret && (curval != uval)) {
1602
1603
1604
1605
1606
1607
1608 if ((FUTEX_TID_MASK & curval) == uval)
1609 ret = -EAGAIN;
1610 else
1611 ret = -EINVAL;
1612 }
1613
1614 if (ret)
1615 goto out_unlock;
1616
1617
1618
1619
1620
1621
1622 raw_spin_lock(&pi_state->owner->pi_lock);
1623 WARN_ON(list_empty(&pi_state->list));
1624 list_del_init(&pi_state->list);
1625 raw_spin_unlock(&pi_state->owner->pi_lock);
1626
1627 raw_spin_lock(&new_owner->pi_lock);
1628 WARN_ON(!list_empty(&pi_state->list));
1629 list_add(&pi_state->list, &new_owner->pi_state_list);
1630 pi_state->owner = new_owner;
1631 raw_spin_unlock(&new_owner->pi_lock);
1632
1633 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1634
1635 out_unlock:
1636 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1637
1638 if (postunlock)
1639 rt_mutex_postunlock(&wake_q);
1640
1641 return ret;
1642 }
1643
1644
1645
1646
1647 static inline void
1648 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1649 {
1650 if (hb1 <= hb2) {
1651 spin_lock(&hb1->lock);
1652 if (hb1 < hb2)
1653 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1654 } else {
1655 spin_lock(&hb2->lock);
1656 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1657 }
1658 }
1659
1660 static inline void
1661 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1662 {
1663 spin_unlock(&hb1->lock);
1664 if (hb1 != hb2)
1665 spin_unlock(&hb2->lock);
1666 }
1667
1668
1669
1670
1671 static int
1672 futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1673 {
1674 struct futex_hash_bucket *hb;
1675 struct futex_q *this, *next;
1676 union futex_key key = FUTEX_KEY_INIT;
1677 int ret;
1678 DEFINE_WAKE_Q(wake_q);
1679
1680 if (!bitset)
1681 return -EINVAL;
1682
1683 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
1684 if (unlikely(ret != 0))
1685 goto out;
1686
1687 hb = hash_futex(&key);
1688
1689
1690 if (!hb_waiters_pending(hb))
1691 goto out_put_key;
1692
1693 spin_lock(&hb->lock);
1694
1695 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1696 if (match_futex (&this->key, &key)) {
1697 if (this->pi_state || this->rt_waiter) {
1698 ret = -EINVAL;
1699 break;
1700 }
1701
1702
1703 if (!(this->bitset & bitset))
1704 continue;
1705
1706 mark_wake_futex(&wake_q, this);
1707 if (++ret >= nr_wake)
1708 break;
1709 }
1710 }
1711
1712 spin_unlock(&hb->lock);
1713 wake_up_q(&wake_q);
1714 out_put_key:
1715 put_futex_key(&key);
1716 out:
1717 return ret;
1718 }
1719
1720 static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
1721 {
1722 unsigned int op = (encoded_op & 0x70000000) >> 28;
1723 unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
1724 int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
1725 int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
1726 int oldval, ret;
1727
1728 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
1729 if (oparg < 0 || oparg > 31) {
1730 char comm[sizeof(current->comm)];
1731
1732
1733
1734
1735 pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
1736 get_task_comm(comm, current), oparg);
1737 oparg &= 31;
1738 }
1739 oparg = 1 << oparg;
1740 }
1741
1742 if (!access_ok(uaddr, sizeof(u32)))
1743 return -EFAULT;
1744
1745 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1746 if (ret)
1747 return ret;
1748
1749 switch (cmp) {
1750 case FUTEX_OP_CMP_EQ:
1751 return oldval == cmparg;
1752 case FUTEX_OP_CMP_NE:
1753 return oldval != cmparg;
1754 case FUTEX_OP_CMP_LT:
1755 return oldval < cmparg;
1756 case FUTEX_OP_CMP_GE:
1757 return oldval >= cmparg;
1758 case FUTEX_OP_CMP_LE:
1759 return oldval <= cmparg;
1760 case FUTEX_OP_CMP_GT:
1761 return oldval > cmparg;
1762 default:
1763 return -ENOSYS;
1764 }
1765 }
1766
1767
1768
1769
1770
1771 static int
1772 futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1773 int nr_wake, int nr_wake2, int op)
1774 {
1775 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1776 struct futex_hash_bucket *hb1, *hb2;
1777 struct futex_q *this, *next;
1778 int ret, op_ret;
1779 DEFINE_WAKE_Q(wake_q);
1780
1781 retry:
1782 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
1783 if (unlikely(ret != 0))
1784 goto out;
1785 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
1786 if (unlikely(ret != 0))
1787 goto out_put_key1;
1788
1789 hb1 = hash_futex(&key1);
1790 hb2 = hash_futex(&key2);
1791
1792 retry_private:
1793 double_lock_hb(hb1, hb2);
1794 op_ret = futex_atomic_op_inuser(op, uaddr2);
1795 if (unlikely(op_ret < 0)) {
1796 double_unlock_hb(hb1, hb2);
1797
1798 if (!IS_ENABLED(CONFIG_MMU) ||
1799 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
1800
1801
1802
1803
1804 ret = op_ret;
1805 goto out_put_keys;
1806 }
1807
1808 if (op_ret == -EFAULT) {
1809 ret = fault_in_user_writeable(uaddr2);
1810 if (ret)
1811 goto out_put_keys;
1812 }
1813
1814 if (!(flags & FLAGS_SHARED)) {
1815 cond_resched();
1816 goto retry_private;
1817 }
1818
1819 put_futex_key(&key2);
1820 put_futex_key(&key1);
1821 cond_resched();
1822 goto retry;
1823 }
1824
1825 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1826 if (match_futex (&this->key, &key1)) {
1827 if (this->pi_state || this->rt_waiter) {
1828 ret = -EINVAL;
1829 goto out_unlock;
1830 }
1831 mark_wake_futex(&wake_q, this);
1832 if (++ret >= nr_wake)
1833 break;
1834 }
1835 }
1836
1837 if (op_ret > 0) {
1838 op_ret = 0;
1839 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1840 if (match_futex (&this->key, &key2)) {
1841 if (this->pi_state || this->rt_waiter) {
1842 ret = -EINVAL;
1843 goto out_unlock;
1844 }
1845 mark_wake_futex(&wake_q, this);
1846 if (++op_ret >= nr_wake2)
1847 break;
1848 }
1849 }
1850 ret += op_ret;
1851 }
1852
1853 out_unlock:
1854 double_unlock_hb(hb1, hb2);
1855 wake_up_q(&wake_q);
1856 out_put_keys:
1857 put_futex_key(&key2);
1858 out_put_key1:
1859 put_futex_key(&key1);
1860 out:
1861 return ret;
1862 }
1863
1864
1865
1866
1867
1868
1869
1870
1871 static inline
1872 void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1873 struct futex_hash_bucket *hb2, union futex_key *key2)
1874 {
1875
1876
1877
1878
1879
1880 if (likely(&hb1->chain != &hb2->chain)) {
1881 plist_del(&q->list, &hb1->chain);
1882 hb_waiters_dec(hb1);
1883 hb_waiters_inc(hb2);
1884 plist_add(&q->list, &hb2->chain);
1885 q->lock_ptr = &hb2->lock;
1886 }
1887 get_futex_key_refs(key2);
1888 q->key = *key2;
1889 }
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905 static inline
1906 void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1907 struct futex_hash_bucket *hb)
1908 {
1909 get_futex_key_refs(key);
1910 q->key = *key;
1911
1912 __unqueue_futex(q);
1913
1914 WARN_ON(!q->rt_waiter);
1915 q->rt_waiter = NULL;
1916
1917 q->lock_ptr = &hb->lock;
1918
1919 wake_up_state(q->task, TASK_NORMAL);
1920 }
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948 static int
1949 futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
1950 struct futex_hash_bucket *hb2, union futex_key *key1,
1951 union futex_key *key2, struct futex_pi_state **ps,
1952 struct task_struct **exiting, int set_waiters)
1953 {
1954 struct futex_q *top_waiter = NULL;
1955 u32 curval;
1956 int ret, vpid;
1957
1958 if (get_futex_value_locked(&curval, pifutex))
1959 return -EFAULT;
1960
1961 if (unlikely(should_fail_futex(true)))
1962 return -EFAULT;
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972 top_waiter = futex_top_waiter(hb1, key1);
1973
1974
1975 if (!top_waiter)
1976 return 0;
1977
1978
1979 if (!match_futex(top_waiter->requeue_pi_key, key2))
1980 return -EINVAL;
1981
1982
1983
1984
1985
1986
1987 vpid = task_pid_vnr(top_waiter->task);
1988 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1989 exiting, set_waiters);
1990 if (ret == 1) {
1991 requeue_pi_wake_futex(top_waiter, key2, hb2);
1992 return vpid;
1993 }
1994 return ret;
1995 }
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015 static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
2016 u32 __user *uaddr2, int nr_wake, int nr_requeue,
2017 u32 *cmpval, int requeue_pi)
2018 {
2019 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
2020 int drop_count = 0, task_count = 0, ret;
2021 struct futex_pi_state *pi_state = NULL;
2022 struct futex_hash_bucket *hb1, *hb2;
2023 struct futex_q *this, *next;
2024 DEFINE_WAKE_Q(wake_q);
2025
2026 if (nr_wake < 0 || nr_requeue < 0)
2027 return -EINVAL;
2028
2029
2030
2031
2032
2033
2034
2035 if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
2036 return -ENOSYS;
2037
2038 if (requeue_pi) {
2039
2040
2041
2042
2043 if (uaddr1 == uaddr2)
2044 return -EINVAL;
2045
2046
2047
2048
2049
2050 if (refill_pi_state_cache())
2051 return -ENOMEM;
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062 if (nr_wake != 1)
2063 return -EINVAL;
2064 }
2065
2066 retry:
2067 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
2068 if (unlikely(ret != 0))
2069 goto out;
2070 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
2071 requeue_pi ? FUTEX_WRITE : FUTEX_READ);
2072 if (unlikely(ret != 0))
2073 goto out_put_key1;
2074
2075
2076
2077
2078
2079 if (requeue_pi && match_futex(&key1, &key2)) {
2080 ret = -EINVAL;
2081 goto out_put_keys;
2082 }
2083
2084 hb1 = hash_futex(&key1);
2085 hb2 = hash_futex(&key2);
2086
2087 retry_private:
2088 hb_waiters_inc(hb2);
2089 double_lock_hb(hb1, hb2);
2090
2091 if (likely(cmpval != NULL)) {
2092 u32 curval;
2093
2094 ret = get_futex_value_locked(&curval, uaddr1);
2095
2096 if (unlikely(ret)) {
2097 double_unlock_hb(hb1, hb2);
2098 hb_waiters_dec(hb2);
2099
2100 ret = get_user(curval, uaddr1);
2101 if (ret)
2102 goto out_put_keys;
2103
2104 if (!(flags & FLAGS_SHARED))
2105 goto retry_private;
2106
2107 put_futex_key(&key2);
2108 put_futex_key(&key1);
2109 goto retry;
2110 }
2111 if (curval != *cmpval) {
2112 ret = -EAGAIN;
2113 goto out_unlock;
2114 }
2115 }
2116
2117 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
2118 struct task_struct *exiting = NULL;
2119
2120
2121
2122
2123
2124
2125
2126 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
2127 &key2, &pi_state,
2128 &exiting, nr_requeue);
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139 if (ret > 0) {
2140 WARN_ON(pi_state);
2141 drop_count++;
2142 task_count++;
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155 ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
2156 &pi_state, &exiting);
2157 }
2158
2159 switch (ret) {
2160 case 0:
2161
2162 break;
2163
2164
2165 case -EFAULT:
2166 double_unlock_hb(hb1, hb2);
2167 hb_waiters_dec(hb2);
2168 put_futex_key(&key2);
2169 put_futex_key(&key1);
2170 ret = fault_in_user_writeable(uaddr2);
2171 if (!ret)
2172 goto retry;
2173 goto out;
2174 case -EBUSY:
2175 case -EAGAIN:
2176
2177
2178
2179
2180
2181
2182 double_unlock_hb(hb1, hb2);
2183 hb_waiters_dec(hb2);
2184 put_futex_key(&key2);
2185 put_futex_key(&key1);
2186
2187
2188
2189
2190
2191 wait_for_owner_exiting(ret, exiting);
2192 cond_resched();
2193 goto retry;
2194 default:
2195 goto out_unlock;
2196 }
2197 }
2198
2199 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
2200 if (task_count - nr_wake >= nr_requeue)
2201 break;
2202
2203 if (!match_futex(&this->key, &key1))
2204 continue;
2205
2206
2207
2208
2209
2210
2211
2212
2213 if ((requeue_pi && !this->rt_waiter) ||
2214 (!requeue_pi && this->rt_waiter) ||
2215 this->pi_state) {
2216 ret = -EINVAL;
2217 break;
2218 }
2219
2220
2221
2222
2223
2224
2225 if (++task_count <= nr_wake && !requeue_pi) {
2226 mark_wake_futex(&wake_q, this);
2227 continue;
2228 }
2229
2230
2231 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
2232 ret = -EINVAL;
2233 break;
2234 }
2235
2236
2237
2238
2239
2240 if (requeue_pi) {
2241
2242
2243
2244
2245
2246 get_pi_state(pi_state);
2247 this->pi_state = pi_state;
2248 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
2249 this->rt_waiter,
2250 this->task);
2251 if (ret == 1) {
2252
2253
2254
2255
2256
2257
2258
2259
2260 requeue_pi_wake_futex(this, &key2, hb2);
2261 drop_count++;
2262 continue;
2263 } else if (ret) {
2264
2265
2266
2267
2268
2269
2270
2271
2272 this->pi_state = NULL;
2273 put_pi_state(pi_state);
2274
2275
2276
2277
2278 break;
2279 }
2280 }
2281 requeue_futex(this, hb1, hb2, &key2);
2282 drop_count++;
2283 }
2284
2285
2286
2287
2288
2289
2290 put_pi_state(pi_state);
2291
2292 out_unlock:
2293 double_unlock_hb(hb1, hb2);
2294 wake_up_q(&wake_q);
2295 hb_waiters_dec(hb2);
2296
2297
2298
2299
2300
2301
2302
2303 while (--drop_count >= 0)
2304 drop_futex_key_refs(&key1);
2305
2306 out_put_keys:
2307 put_futex_key(&key2);
2308 out_put_key1:
2309 put_futex_key(&key1);
2310 out:
2311 return ret ? ret : task_count;
2312 }
2313
2314
2315 static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
2316 __acquires(&hb->lock)
2317 {
2318 struct futex_hash_bucket *hb;
2319
2320 hb = hash_futex(&q->key);
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330 hb_waiters_inc(hb);
2331
2332 q->lock_ptr = &hb->lock;
2333
2334 spin_lock(&hb->lock);
2335 return hb;
2336 }
2337
2338 static inline void
2339 queue_unlock(struct futex_hash_bucket *hb)
2340 __releases(&hb->lock)
2341 {
2342 spin_unlock(&hb->lock);
2343 hb_waiters_dec(hb);
2344 }
2345
2346 static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2347 {
2348 int prio;
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358 prio = min(current->normal_prio, MAX_RT_PRIO);
2359
2360 plist_node_init(&q->list, prio);
2361 plist_add(&q->list, &hb->chain);
2362 q->task = current;
2363 }
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377 static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2378 __releases(&hb->lock)
2379 {
2380 __queue_me(q, hb);
2381 spin_unlock(&hb->lock);
2382 }
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395 static int unqueue_me(struct futex_q *q)
2396 {
2397 spinlock_t *lock_ptr;
2398 int ret = 0;
2399
2400
2401 retry:
2402
2403
2404
2405
2406
2407 lock_ptr = READ_ONCE(q->lock_ptr);
2408 if (lock_ptr != NULL) {
2409 spin_lock(lock_ptr);
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423 if (unlikely(lock_ptr != q->lock_ptr)) {
2424 spin_unlock(lock_ptr);
2425 goto retry;
2426 }
2427 __unqueue_futex(q);
2428
2429 BUG_ON(q->pi_state);
2430
2431 spin_unlock(lock_ptr);
2432 ret = 1;
2433 }
2434
2435 drop_futex_key_refs(&q->key);
2436 return ret;
2437 }
2438
2439
2440
2441
2442
2443
2444 static void unqueue_me_pi(struct futex_q *q)
2445 __releases(q->lock_ptr)
2446 {
2447 __unqueue_futex(q);
2448
2449 BUG_ON(!q->pi_state);
2450 put_pi_state(q->pi_state);
2451 q->pi_state = NULL;
2452
2453 spin_unlock(q->lock_ptr);
2454 }
2455
2456 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2457 struct task_struct *argowner)
2458 {
2459 struct futex_pi_state *pi_state = q->pi_state;
2460 u32 uval, uninitialized_var(curval), newval;
2461 struct task_struct *oldowner, *newowner;
2462 u32 newtid;
2463 int ret, err = 0;
2464
2465 lockdep_assert_held(q->lock_ptr);
2466
2467 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2468
2469 oldowner = pi_state->owner;
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494 retry:
2495 if (!argowner) {
2496 if (oldowner != current) {
2497
2498
2499
2500
2501 ret = 0;
2502 goto out_unlock;
2503 }
2504
2505 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
2506
2507 ret = 0;
2508 goto out_unlock;
2509 }
2510
2511
2512
2513
2514 newowner = rt_mutex_owner(&pi_state->pi_mutex);
2515 BUG_ON(!newowner);
2516 } else {
2517 WARN_ON_ONCE(argowner != current);
2518 if (oldowner == current) {
2519
2520
2521
2522
2523 ret = 0;
2524 goto out_unlock;
2525 }
2526 newowner = argowner;
2527 }
2528
2529 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
2530
2531 if (!pi_state->owner)
2532 newtid |= FUTEX_OWNER_DIED;
2533
2534 err = get_futex_value_locked(&uval, uaddr);
2535 if (err)
2536 goto handle_err;
2537
2538 for (;;) {
2539 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2540
2541 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
2542 if (err)
2543 goto handle_err;
2544
2545 if (curval == uval)
2546 break;
2547 uval = curval;
2548 }
2549
2550
2551
2552
2553
2554 if (pi_state->owner != NULL) {
2555 raw_spin_lock(&pi_state->owner->pi_lock);
2556 WARN_ON(list_empty(&pi_state->list));
2557 list_del_init(&pi_state->list);
2558 raw_spin_unlock(&pi_state->owner->pi_lock);
2559 }
2560
2561 pi_state->owner = newowner;
2562
2563 raw_spin_lock(&newowner->pi_lock);
2564 WARN_ON(!list_empty(&pi_state->list));
2565 list_add(&pi_state->list, &newowner->pi_state_list);
2566 raw_spin_unlock(&newowner->pi_lock);
2567 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2568
2569 return 0;
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584 handle_err:
2585 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2586 spin_unlock(q->lock_ptr);
2587
2588 switch (err) {
2589 case -EFAULT:
2590 ret = fault_in_user_writeable(uaddr);
2591 break;
2592
2593 case -EAGAIN:
2594 cond_resched();
2595 ret = 0;
2596 break;
2597
2598 default:
2599 WARN_ON_ONCE(1);
2600 ret = err;
2601 break;
2602 }
2603
2604 spin_lock(q->lock_ptr);
2605 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2606
2607
2608
2609
2610 if (pi_state->owner != oldowner) {
2611 ret = 0;
2612 goto out_unlock;
2613 }
2614
2615 if (ret)
2616 goto out_unlock;
2617
2618 goto retry;
2619
2620 out_unlock:
2621 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2622 return ret;
2623 }
2624
2625 static long futex_wait_restart(struct restart_block *restart);
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642 static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2643 {
2644 int ret = 0;
2645
2646 if (locked) {
2647
2648
2649
2650
2651
2652
2653
2654
2655 if (q->pi_state->owner != current)
2656 ret = fixup_pi_state_owner(uaddr, q, current);
2657 goto out;
2658 }
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668 if (q->pi_state->owner == current) {
2669 ret = fixup_pi_state_owner(uaddr, q, NULL);
2670 goto out;
2671 }
2672
2673
2674
2675
2676
2677 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
2678 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2679 "pi-state %p\n", ret,
2680 q->pi_state->pi_mutex.owner,
2681 q->pi_state->owner);
2682 }
2683
2684 out:
2685 return ret ? ret : locked;
2686 }
2687
2688
2689
2690
2691
2692
2693
2694 static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2695 struct hrtimer_sleeper *timeout)
2696 {
2697
2698
2699
2700
2701
2702
2703 set_current_state(TASK_INTERRUPTIBLE);
2704 queue_me(q, hb);
2705
2706
2707 if (timeout)
2708 hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
2709
2710
2711
2712
2713
2714 if (likely(!plist_node_empty(&q->list))) {
2715
2716
2717
2718
2719
2720 if (!timeout || timeout->task)
2721 freezable_schedule();
2722 }
2723 __set_current_state(TASK_RUNNING);
2724 }
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743 static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2744 struct futex_q *q, struct futex_hash_bucket **hb)
2745 {
2746 u32 uval;
2747 int ret;
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767 retry:
2768 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
2769 if (unlikely(ret != 0))
2770 return ret;
2771
2772 retry_private:
2773 *hb = queue_lock(q);
2774
2775 ret = get_futex_value_locked(&uval, uaddr);
2776
2777 if (ret) {
2778 queue_unlock(*hb);
2779
2780 ret = get_user(uval, uaddr);
2781 if (ret)
2782 goto out;
2783
2784 if (!(flags & FLAGS_SHARED))
2785 goto retry_private;
2786
2787 put_futex_key(&q->key);
2788 goto retry;
2789 }
2790
2791 if (uval != val) {
2792 queue_unlock(*hb);
2793 ret = -EWOULDBLOCK;
2794 }
2795
2796 out:
2797 if (ret)
2798 put_futex_key(&q->key);
2799 return ret;
2800 }
2801
2802 static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2803 ktime_t *abs_time, u32 bitset)
2804 {
2805 struct hrtimer_sleeper timeout, *to;
2806 struct restart_block *restart;
2807 struct futex_hash_bucket *hb;
2808 struct futex_q q = futex_q_init;
2809 int ret;
2810
2811 if (!bitset)
2812 return -EINVAL;
2813 q.bitset = bitset;
2814
2815 to = futex_setup_timer(abs_time, &timeout, flags,
2816 current->timer_slack_ns);
2817 retry:
2818
2819
2820
2821
2822 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2823 if (ret)
2824 goto out;
2825
2826
2827 futex_wait_queue_me(hb, &q, to);
2828
2829
2830 ret = 0;
2831
2832 if (!unqueue_me(&q))
2833 goto out;
2834 ret = -ETIMEDOUT;
2835 if (to && !to->task)
2836 goto out;
2837
2838
2839
2840
2841
2842 if (!signal_pending(current))
2843 goto retry;
2844
2845 ret = -ERESTARTSYS;
2846 if (!abs_time)
2847 goto out;
2848
2849 restart = ¤t->restart_block;
2850 restart->fn = futex_wait_restart;
2851 restart->futex.uaddr = uaddr;
2852 restart->futex.val = val;
2853 restart->futex.time = *abs_time;
2854 restart->futex.bitset = bitset;
2855 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2856
2857 ret = -ERESTART_RESTARTBLOCK;
2858
2859 out:
2860 if (to) {
2861 hrtimer_cancel(&to->timer);
2862 destroy_hrtimer_on_stack(&to->timer);
2863 }
2864 return ret;
2865 }
2866
2867
2868 static long futex_wait_restart(struct restart_block *restart)
2869 {
2870 u32 __user *uaddr = restart->futex.uaddr;
2871 ktime_t t, *tp = NULL;
2872
2873 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2874 t = restart->futex.time;
2875 tp = &t;
2876 }
2877 restart->fn = do_no_restart_syscall;
2878
2879 return (long)futex_wait(uaddr, restart->futex.flags,
2880 restart->futex.val, tp, restart->futex.bitset);
2881 }
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2894 ktime_t *time, int trylock)
2895 {
2896 struct hrtimer_sleeper timeout, *to;
2897 struct futex_pi_state *pi_state = NULL;
2898 struct task_struct *exiting = NULL;
2899 struct rt_mutex_waiter rt_waiter;
2900 struct futex_hash_bucket *hb;
2901 struct futex_q q = futex_q_init;
2902 int res, ret;
2903
2904 if (!IS_ENABLED(CONFIG_FUTEX_PI))
2905 return -ENOSYS;
2906
2907 if (refill_pi_state_cache())
2908 return -ENOMEM;
2909
2910 to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
2911
2912 retry:
2913 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
2914 if (unlikely(ret != 0))
2915 goto out;
2916
2917 retry_private:
2918 hb = queue_lock(&q);
2919
2920 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
2921 &exiting, 0);
2922 if (unlikely(ret)) {
2923
2924
2925
2926
2927 switch (ret) {
2928 case 1:
2929
2930 ret = 0;
2931 goto out_unlock_put_key;
2932 case -EFAULT:
2933 goto uaddr_faulted;
2934 case -EBUSY:
2935 case -EAGAIN:
2936
2937
2938
2939
2940
2941
2942 queue_unlock(hb);
2943 put_futex_key(&q.key);
2944
2945
2946
2947
2948
2949 wait_for_owner_exiting(ret, exiting);
2950 cond_resched();
2951 goto retry;
2952 default:
2953 goto out_unlock_put_key;
2954 }
2955 }
2956
2957 WARN_ON(!q.pi_state);
2958
2959
2960
2961
2962 __queue_me(&q, hb);
2963
2964 if (trylock) {
2965 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
2966
2967 ret = ret ? 0 : -EWOULDBLOCK;
2968 goto no_block;
2969 }
2970
2971 rt_mutex_init_waiter(&rt_waiter);
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2987 spin_unlock(q.lock_ptr);
2988
2989
2990
2991
2992
2993 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2994 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2995
2996 if (ret) {
2997 if (ret == 1)
2998 ret = 0;
2999 goto cleanup;
3000 }
3001
3002 if (unlikely(to))
3003 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
3004
3005 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
3006
3007 cleanup:
3008 spin_lock(q.lock_ptr);
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
3019 ret = 0;
3020
3021 no_block:
3022
3023
3024
3025
3026 res = fixup_owner(uaddr, &q, !ret);
3027
3028
3029
3030
3031 if (res)
3032 ret = (res < 0) ? res : 0;
3033
3034
3035
3036
3037
3038 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
3039 pi_state = q.pi_state;
3040 get_pi_state(pi_state);
3041 }
3042
3043
3044 unqueue_me_pi(&q);
3045
3046 if (pi_state) {
3047 rt_mutex_futex_unlock(&pi_state->pi_mutex);
3048 put_pi_state(pi_state);
3049 }
3050
3051 goto out_put_key;
3052
3053 out_unlock_put_key:
3054 queue_unlock(hb);
3055
3056 out_put_key:
3057 put_futex_key(&q.key);
3058 out:
3059 if (to) {
3060 hrtimer_cancel(&to->timer);
3061 destroy_hrtimer_on_stack(&to->timer);
3062 }
3063 return ret != -EINTR ? ret : -ERESTARTNOINTR;
3064
3065 uaddr_faulted:
3066 queue_unlock(hb);
3067
3068 ret = fault_in_user_writeable(uaddr);
3069 if (ret)
3070 goto out_put_key;
3071
3072 if (!(flags & FLAGS_SHARED))
3073 goto retry_private;
3074
3075 put_futex_key(&q.key);
3076 goto retry;
3077 }
3078
3079
3080
3081
3082
3083
3084 static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
3085 {
3086 u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
3087 union futex_key key = FUTEX_KEY_INIT;
3088 struct futex_hash_bucket *hb;
3089 struct futex_q *top_waiter;
3090 int ret;
3091
3092 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3093 return -ENOSYS;
3094
3095 retry:
3096 if (get_user(uval, uaddr))
3097 return -EFAULT;
3098
3099
3100
3101 if ((uval & FUTEX_TID_MASK) != vpid)
3102 return -EPERM;
3103
3104 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
3105 if (ret)
3106 return ret;
3107
3108 hb = hash_futex(&key);
3109 spin_lock(&hb->lock);
3110
3111
3112
3113
3114
3115
3116 top_waiter = futex_top_waiter(hb, &key);
3117 if (top_waiter) {
3118 struct futex_pi_state *pi_state = top_waiter->pi_state;
3119
3120 ret = -EINVAL;
3121 if (!pi_state)
3122 goto out_unlock;
3123
3124
3125
3126
3127
3128 if (pi_state->owner != current)
3129 goto out_unlock;
3130
3131 get_pi_state(pi_state);
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
3143 spin_unlock(&hb->lock);
3144
3145
3146 ret = wake_futex_pi(uaddr, uval, pi_state);
3147
3148 put_pi_state(pi_state);
3149
3150
3151
3152
3153 if (!ret)
3154 goto out_putkey;
3155
3156
3157
3158
3159 if (ret == -EFAULT)
3160 goto pi_faulted;
3161
3162
3163
3164
3165 if (ret == -EAGAIN)
3166 goto pi_retry;
3167
3168
3169
3170
3171 goto out_putkey;
3172 }
3173
3174
3175
3176
3177
3178
3179
3180
3181 if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
3182 spin_unlock(&hb->lock);
3183 switch (ret) {
3184 case -EFAULT:
3185 goto pi_faulted;
3186
3187 case -EAGAIN:
3188 goto pi_retry;
3189
3190 default:
3191 WARN_ON_ONCE(1);
3192 goto out_putkey;
3193 }
3194 }
3195
3196
3197
3198
3199 ret = (curval == uval) ? 0 : -EAGAIN;
3200
3201 out_unlock:
3202 spin_unlock(&hb->lock);
3203 out_putkey:
3204 put_futex_key(&key);
3205 return ret;
3206
3207 pi_retry:
3208 put_futex_key(&key);
3209 cond_resched();
3210 goto retry;
3211
3212 pi_faulted:
3213 put_futex_key(&key);
3214
3215 ret = fault_in_user_writeable(uaddr);
3216 if (!ret)
3217 goto retry;
3218
3219 return ret;
3220 }
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238 static inline
3239 int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
3240 struct futex_q *q, union futex_key *key2,
3241 struct hrtimer_sleeper *timeout)
3242 {
3243 int ret = 0;
3244
3245
3246
3247
3248
3249
3250
3251
3252 if (!match_futex(&q->key, key2)) {
3253 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
3254
3255
3256
3257
3258 plist_del(&q->list, &hb->chain);
3259 hb_waiters_dec(hb);
3260
3261
3262 ret = -EWOULDBLOCK;
3263 if (timeout && !timeout->task)
3264 ret = -ETIMEDOUT;
3265 else if (signal_pending(current))
3266 ret = -ERESTARTNOINTR;
3267 }
3268 return ret;
3269 }
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311 static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
3312 u32 val, ktime_t *abs_time, u32 bitset,
3313 u32 __user *uaddr2)
3314 {
3315 struct hrtimer_sleeper timeout, *to;
3316 struct futex_pi_state *pi_state = NULL;
3317 struct rt_mutex_waiter rt_waiter;
3318 struct futex_hash_bucket *hb;
3319 union futex_key key2 = FUTEX_KEY_INIT;
3320 struct futex_q q = futex_q_init;
3321 int res, ret;
3322
3323 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3324 return -ENOSYS;
3325
3326 if (uaddr == uaddr2)
3327 return -EINVAL;
3328
3329 if (!bitset)
3330 return -EINVAL;
3331
3332 to = futex_setup_timer(abs_time, &timeout, flags,
3333 current->timer_slack_ns);
3334
3335
3336
3337
3338
3339 rt_mutex_init_waiter(&rt_waiter);
3340
3341 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
3342 if (unlikely(ret != 0))
3343 goto out;
3344
3345 q.bitset = bitset;
3346 q.rt_waiter = &rt_waiter;
3347 q.requeue_pi_key = &key2;
3348
3349
3350
3351
3352
3353 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
3354 if (ret)
3355 goto out_key2;
3356
3357
3358
3359
3360
3361 if (match_futex(&q.key, &key2)) {
3362 queue_unlock(hb);
3363 ret = -EINVAL;
3364 goto out_put_keys;
3365 }
3366
3367
3368 futex_wait_queue_me(hb, &q, to);
3369
3370 spin_lock(&hb->lock);
3371 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
3372 spin_unlock(&hb->lock);
3373 if (ret)
3374 goto out_put_keys;
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386 if (!q.rt_waiter) {
3387
3388
3389
3390
3391 if (q.pi_state && (q.pi_state->owner != current)) {
3392 spin_lock(q.lock_ptr);
3393 ret = fixup_pi_state_owner(uaddr2, &q, current);
3394 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
3395 pi_state = q.pi_state;
3396 get_pi_state(pi_state);
3397 }
3398
3399
3400
3401
3402 put_pi_state(q.pi_state);
3403 spin_unlock(q.lock_ptr);
3404 }
3405 } else {
3406 struct rt_mutex *pi_mutex;
3407
3408
3409
3410
3411
3412
3413 WARN_ON(!q.pi_state);
3414 pi_mutex = &q.pi_state->pi_mutex;
3415 ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
3416
3417 spin_lock(q.lock_ptr);
3418 if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
3419 ret = 0;
3420
3421 debug_rt_mutex_free_waiter(&rt_waiter);
3422
3423
3424
3425
3426 res = fixup_owner(uaddr2, &q, !ret);
3427
3428
3429
3430
3431 if (res)
3432 ret = (res < 0) ? res : 0;
3433
3434
3435
3436
3437
3438
3439 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
3440 pi_state = q.pi_state;
3441 get_pi_state(pi_state);
3442 }
3443
3444
3445 unqueue_me_pi(&q);
3446 }
3447
3448 if (pi_state) {
3449 rt_mutex_futex_unlock(&pi_state->pi_mutex);
3450 put_pi_state(pi_state);
3451 }
3452
3453 if (ret == -EINTR) {
3454
3455
3456
3457
3458
3459
3460
3461 ret = -EWOULDBLOCK;
3462 }
3463
3464 out_put_keys:
3465 put_futex_key(&q.key);
3466 out_key2:
3467 put_futex_key(&key2);
3468
3469 out:
3470 if (to) {
3471 hrtimer_cancel(&to->timer);
3472 destroy_hrtimer_on_stack(&to->timer);
3473 }
3474 return ret;
3475 }
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497 SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
3498 size_t, len)
3499 {
3500 if (!futex_cmpxchg_enabled)
3501 return -ENOSYS;
3502
3503
3504
3505 if (unlikely(len != sizeof(*head)))
3506 return -EINVAL;
3507
3508 current->robust_list = head;
3509
3510 return 0;
3511 }
3512
3513
3514
3515
3516
3517
3518
3519 SYSCALL_DEFINE3(get_robust_list, int, pid,
3520 struct robust_list_head __user * __user *, head_ptr,
3521 size_t __user *, len_ptr)
3522 {
3523 struct robust_list_head __user *head;
3524 unsigned long ret;
3525 struct task_struct *p;
3526
3527 if (!futex_cmpxchg_enabled)
3528 return -ENOSYS;
3529
3530 rcu_read_lock();
3531
3532 ret = -ESRCH;
3533 if (!pid)
3534 p = current;
3535 else {
3536 p = find_task_by_vpid(pid);
3537 if (!p)
3538 goto err_unlock;
3539 }
3540
3541 ret = -EPERM;
3542 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
3543 goto err_unlock;
3544
3545 head = p->robust_list;
3546 rcu_read_unlock();
3547
3548 if (put_user(sizeof(*head), len_ptr))
3549 return -EFAULT;
3550 return put_user(head, head_ptr);
3551
3552 err_unlock:
3553 rcu_read_unlock();
3554
3555 return ret;
3556 }
3557
3558
3559 #define HANDLE_DEATH_PENDING true
3560 #define HANDLE_DEATH_LIST false
3561
3562
3563
3564
3565
3566 static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
3567 bool pi, bool pending_op)
3568 {
3569 u32 uval, uninitialized_var(nval), mval;
3570 int err;
3571
3572
3573 if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
3574 return -1;
3575
3576 retry:
3577 if (get_user(uval, uaddr))
3578 return -1;
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611 if (pending_op && !pi && !uval) {
3612 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3613 return 0;
3614 }
3615
3616 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
3617 return 0;
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640 if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
3641 switch (err) {
3642 case -EFAULT:
3643 if (fault_in_user_writeable(uaddr))
3644 return -1;
3645 goto retry;
3646
3647 case -EAGAIN:
3648 cond_resched();
3649 goto retry;
3650
3651 default:
3652 WARN_ON_ONCE(1);
3653 return err;
3654 }
3655 }
3656
3657 if (nval != uval)
3658 goto retry;
3659
3660
3661
3662
3663
3664 if (!pi && (uval & FUTEX_WAITERS))
3665 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3666
3667 return 0;
3668 }
3669
3670
3671
3672
3673 static inline int fetch_robust_entry(struct robust_list __user **entry,
3674 struct robust_list __user * __user *head,
3675 unsigned int *pi)
3676 {
3677 unsigned long uentry;
3678
3679 if (get_user(uentry, (unsigned long __user *)head))
3680 return -EFAULT;
3681
3682 *entry = (void __user *)(uentry & ~1UL);
3683 *pi = uentry & 1;
3684
3685 return 0;
3686 }
3687
3688
3689
3690
3691
3692
3693
3694 static void exit_robust_list(struct task_struct *curr)
3695 {
3696 struct robust_list_head __user *head = curr->robust_list;
3697 struct robust_list __user *entry, *next_entry, *pending;
3698 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3699 unsigned int uninitialized_var(next_pi);
3700 unsigned long futex_offset;
3701 int rc;
3702
3703 if (!futex_cmpxchg_enabled)
3704 return;
3705
3706
3707
3708
3709
3710 if (fetch_robust_entry(&entry, &head->list.next, &pi))
3711 return;
3712
3713
3714
3715 if (get_user(futex_offset, &head->futex_offset))
3716 return;
3717
3718
3719
3720
3721 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
3722 return;
3723
3724 next_entry = NULL;
3725 while (entry != &head->list) {
3726
3727
3728
3729
3730 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3731
3732
3733
3734
3735 if (entry != pending) {
3736 if (handle_futex_death((void __user *)entry + futex_offset,
3737 curr, pi, HANDLE_DEATH_LIST))
3738 return;
3739 }
3740 if (rc)
3741 return;
3742 entry = next_entry;
3743 pi = next_pi;
3744
3745
3746
3747 if (!--limit)
3748 break;
3749
3750 cond_resched();
3751 }
3752
3753 if (pending) {
3754 handle_futex_death((void __user *)pending + futex_offset,
3755 curr, pip, HANDLE_DEATH_PENDING);
3756 }
3757 }
3758
3759 static void futex_cleanup(struct task_struct *tsk)
3760 {
3761 if (unlikely(tsk->robust_list)) {
3762 exit_robust_list(tsk);
3763 tsk->robust_list = NULL;
3764 }
3765
3766 #ifdef CONFIG_COMPAT
3767 if (unlikely(tsk->compat_robust_list)) {
3768 compat_exit_robust_list(tsk);
3769 tsk->compat_robust_list = NULL;
3770 }
3771 #endif
3772
3773 if (unlikely(!list_empty(&tsk->pi_state_list)))
3774 exit_pi_state_list(tsk);
3775 }
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794 void futex_exit_recursive(struct task_struct *tsk)
3795 {
3796
3797 if (tsk->futex_state == FUTEX_STATE_EXITING)
3798 mutex_unlock(&tsk->futex_exit_mutex);
3799 tsk->futex_state = FUTEX_STATE_DEAD;
3800 }
3801
3802 static void futex_cleanup_begin(struct task_struct *tsk)
3803 {
3804
3805
3806
3807
3808
3809
3810 mutex_lock(&tsk->futex_exit_mutex);
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823 raw_spin_lock_irq(&tsk->pi_lock);
3824 tsk->futex_state = FUTEX_STATE_EXITING;
3825 raw_spin_unlock_irq(&tsk->pi_lock);
3826 }
3827
3828 static void futex_cleanup_end(struct task_struct *tsk, int state)
3829 {
3830
3831
3832
3833
3834 tsk->futex_state = state;
3835
3836
3837
3838
3839 mutex_unlock(&tsk->futex_exit_mutex);
3840 }
3841
3842 void futex_exec_release(struct task_struct *tsk)
3843 {
3844
3845
3846
3847
3848
3849
3850
3851 futex_cleanup_begin(tsk);
3852 futex_cleanup(tsk);
3853
3854
3855
3856
3857 futex_cleanup_end(tsk, FUTEX_STATE_OK);
3858 }
3859
3860 void futex_exit_release(struct task_struct *tsk)
3861 {
3862 futex_cleanup_begin(tsk);
3863 futex_cleanup(tsk);
3864 futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
3865 }
3866
3867 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3868 u32 __user *uaddr2, u32 val2, u32 val3)
3869 {
3870 int cmd = op & FUTEX_CMD_MASK;
3871 unsigned int flags = 0;
3872
3873 if (!(op & FUTEX_PRIVATE_FLAG))
3874 flags |= FLAGS_SHARED;
3875
3876 if (op & FUTEX_CLOCK_REALTIME) {
3877 flags |= FLAGS_CLOCKRT;
3878 if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
3879 cmd != FUTEX_WAIT_REQUEUE_PI)
3880 return -ENOSYS;
3881 }
3882
3883 switch (cmd) {
3884 case FUTEX_LOCK_PI:
3885 case FUTEX_UNLOCK_PI:
3886 case FUTEX_TRYLOCK_PI:
3887 case FUTEX_WAIT_REQUEUE_PI:
3888 case FUTEX_CMP_REQUEUE_PI:
3889 if (!futex_cmpxchg_enabled)
3890 return -ENOSYS;
3891 }
3892
3893 switch (cmd) {
3894 case FUTEX_WAIT:
3895 val3 = FUTEX_BITSET_MATCH_ANY;
3896
3897 case FUTEX_WAIT_BITSET:
3898 return futex_wait(uaddr, flags, val, timeout, val3);
3899 case FUTEX_WAKE:
3900 val3 = FUTEX_BITSET_MATCH_ANY;
3901
3902 case FUTEX_WAKE_BITSET:
3903 return futex_wake(uaddr, flags, val, val3);
3904 case FUTEX_REQUEUE:
3905 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3906 case FUTEX_CMP_REQUEUE:
3907 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3908 case FUTEX_WAKE_OP:
3909 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3910 case FUTEX_LOCK_PI:
3911 return futex_lock_pi(uaddr, flags, timeout, 0);
3912 case FUTEX_UNLOCK_PI:
3913 return futex_unlock_pi(uaddr, flags);
3914 case FUTEX_TRYLOCK_PI:
3915 return futex_lock_pi(uaddr, flags, NULL, 1);
3916 case FUTEX_WAIT_REQUEUE_PI:
3917 val3 = FUTEX_BITSET_MATCH_ANY;
3918 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3919 uaddr2);
3920 case FUTEX_CMP_REQUEUE_PI:
3921 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3922 }
3923 return -ENOSYS;
3924 }
3925
3926
3927 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3928 struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
3929 u32, val3)
3930 {
3931 struct timespec64 ts;
3932 ktime_t t, *tp = NULL;
3933 u32 val2 = 0;
3934 int cmd = op & FUTEX_CMD_MASK;
3935
3936 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3937 cmd == FUTEX_WAIT_BITSET ||
3938 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3939 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
3940 return -EFAULT;
3941 if (get_timespec64(&ts, utime))
3942 return -EFAULT;
3943 if (!timespec64_valid(&ts))
3944 return -EINVAL;
3945
3946 t = timespec64_to_ktime(ts);
3947 if (cmd == FUTEX_WAIT)
3948 t = ktime_add_safe(ktime_get(), t);
3949 tp = &t;
3950 }
3951
3952
3953
3954
3955 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3956 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3957 val2 = (u32) (unsigned long) utime;
3958
3959 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3960 }
3961
3962 #ifdef CONFIG_COMPAT
3963
3964
3965
3966 static inline int
3967 compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
3968 compat_uptr_t __user *head, unsigned int *pi)
3969 {
3970 if (get_user(*uentry, head))
3971 return -EFAULT;
3972
3973 *entry = compat_ptr((*uentry) & ~1);
3974 *pi = (unsigned int)(*uentry) & 1;
3975
3976 return 0;
3977 }
3978
3979 static void __user *futex_uaddr(struct robust_list __user *entry,
3980 compat_long_t futex_offset)
3981 {
3982 compat_uptr_t base = ptr_to_compat(entry);
3983 void __user *uaddr = compat_ptr(base + futex_offset);
3984
3985 return uaddr;
3986 }
3987
3988
3989
3990
3991
3992
3993
3994 static void compat_exit_robust_list(struct task_struct *curr)
3995 {
3996 struct compat_robust_list_head __user *head = curr->compat_robust_list;
3997 struct robust_list __user *entry, *next_entry, *pending;
3998 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3999 unsigned int uninitialized_var(next_pi);
4000 compat_uptr_t uentry, next_uentry, upending;
4001 compat_long_t futex_offset;
4002 int rc;
4003
4004 if (!futex_cmpxchg_enabled)
4005 return;
4006
4007
4008
4009
4010
4011 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
4012 return;
4013
4014
4015
4016 if (get_user(futex_offset, &head->futex_offset))
4017 return;
4018
4019
4020
4021
4022 if (compat_fetch_robust_entry(&upending, &pending,
4023 &head->list_op_pending, &pip))
4024 return;
4025
4026 next_entry = NULL;
4027 while (entry != (struct robust_list __user *) &head->list) {
4028
4029
4030
4031
4032 rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
4033 (compat_uptr_t __user *)&entry->next, &next_pi);
4034
4035
4036
4037
4038 if (entry != pending) {
4039 void __user *uaddr = futex_uaddr(entry, futex_offset);
4040
4041 if (handle_futex_death(uaddr, curr, pi,
4042 HANDLE_DEATH_LIST))
4043 return;
4044 }
4045 if (rc)
4046 return;
4047 uentry = next_uentry;
4048 entry = next_entry;
4049 pi = next_pi;
4050
4051
4052
4053 if (!--limit)
4054 break;
4055
4056 cond_resched();
4057 }
4058 if (pending) {
4059 void __user *uaddr = futex_uaddr(pending, futex_offset);
4060
4061 handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
4062 }
4063 }
4064
4065 COMPAT_SYSCALL_DEFINE2(set_robust_list,
4066 struct compat_robust_list_head __user *, head,
4067 compat_size_t, len)
4068 {
4069 if (!futex_cmpxchg_enabled)
4070 return -ENOSYS;
4071
4072 if (unlikely(len != sizeof(*head)))
4073 return -EINVAL;
4074
4075 current->compat_robust_list = head;
4076
4077 return 0;
4078 }
4079
4080 COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
4081 compat_uptr_t __user *, head_ptr,
4082 compat_size_t __user *, len_ptr)
4083 {
4084 struct compat_robust_list_head __user *head;
4085 unsigned long ret;
4086 struct task_struct *p;
4087
4088 if (!futex_cmpxchg_enabled)
4089 return -ENOSYS;
4090
4091 rcu_read_lock();
4092
4093 ret = -ESRCH;
4094 if (!pid)
4095 p = current;
4096 else {
4097 p = find_task_by_vpid(pid);
4098 if (!p)
4099 goto err_unlock;
4100 }
4101
4102 ret = -EPERM;
4103 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
4104 goto err_unlock;
4105
4106 head = p->compat_robust_list;
4107 rcu_read_unlock();
4108
4109 if (put_user(sizeof(*head), len_ptr))
4110 return -EFAULT;
4111 return put_user(ptr_to_compat(head), head_ptr);
4112
4113 err_unlock:
4114 rcu_read_unlock();
4115
4116 return ret;
4117 }
4118 #endif
4119
4120 #ifdef CONFIG_COMPAT_32BIT_TIME
4121 SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
4122 struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
4123 u32, val3)
4124 {
4125 struct timespec64 ts;
4126 ktime_t t, *tp = NULL;
4127 int val2 = 0;
4128 int cmd = op & FUTEX_CMD_MASK;
4129
4130 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
4131 cmd == FUTEX_WAIT_BITSET ||
4132 cmd == FUTEX_WAIT_REQUEUE_PI)) {
4133 if (get_old_timespec32(&ts, utime))
4134 return -EFAULT;
4135 if (!timespec64_valid(&ts))
4136 return -EINVAL;
4137
4138 t = timespec64_to_ktime(ts);
4139 if (cmd == FUTEX_WAIT)
4140 t = ktime_add_safe(ktime_get(), t);
4141 tp = &t;
4142 }
4143 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
4144 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
4145 val2 = (int) (unsigned long) utime;
4146
4147 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
4148 }
4149 #endif
4150
4151 static void __init futex_detect_cmpxchg(void)
4152 {
4153 #ifndef CONFIG_HAVE_FUTEX_CMPXCHG
4154 u32 curval;
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
4167 futex_cmpxchg_enabled = 1;
4168 #endif
4169 }
4170
4171 static int __init futex_init(void)
4172 {
4173 unsigned int futex_shift;
4174 unsigned long i;
4175
4176 #if CONFIG_BASE_SMALL
4177 futex_hashsize = 16;
4178 #else
4179 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
4180 #endif
4181
4182 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
4183 futex_hashsize, 0,
4184 futex_hashsize < 256 ? HASH_SMALL : 0,
4185 &futex_shift, NULL,
4186 futex_hashsize, futex_hashsize);
4187 futex_hashsize = 1UL << futex_shift;
4188
4189 futex_detect_cmpxchg();
4190
4191 for (i = 0; i < futex_hashsize; i++) {
4192 atomic_set(&futex_queues[i].waiters, 0);
4193 plist_head_init(&futex_queues[i].chain);
4194 spin_lock_init(&futex_queues[i].lock);
4195 }
4196
4197 return 0;
4198 }
4199 core_initcall(futex_init);