This source file includes following definitions.
- unsigned_offsets
- vfs_setpos
- generic_file_llseek_size
- generic_file_llseek
- fixed_size_llseek
- no_seek_end_llseek
- no_seek_end_llseek_size
- noop_llseek
- no_llseek
- default_llseek
- vfs_llseek
- ksys_lseek
- SYSCALL_DEFINE3
- COMPAT_SYSCALL_DEFINE3
- SYSCALL_DEFINE5
- rw_verify_area
- new_sync_read
- __vfs_read
- kernel_read
- vfs_read
- new_sync_write
- __vfs_write
- __kernel_write
- kernel_write
- vfs_write
- file_ppos
- ksys_read
- SYSCALL_DEFINE3
- ksys_write
- SYSCALL_DEFINE3
- ksys_pread64
- SYSCALL_DEFINE4
- ksys_pwrite64
- SYSCALL_DEFINE4
- do_iter_readv_writev
- do_loop_readv_writev
- rw_copy_check_uvector
- compat_rw_copy_check_uvector
- do_iter_read
- vfs_iter_read
- do_iter_write
- vfs_iter_write
- vfs_readv
- vfs_writev
- do_readv
- do_writev
- pos_from_hilo
- do_preadv
- do_pwritev
- SYSCALL_DEFINE3
- SYSCALL_DEFINE3
- SYSCALL_DEFINE5
- SYSCALL_DEFINE6
- SYSCALL_DEFINE5
- SYSCALL_DEFINE6
- compat_readv
- do_compat_readv
- COMPAT_SYSCALL_DEFINE3
- do_compat_preadv64
- COMPAT_SYSCALL_DEFINE4
- COMPAT_SYSCALL_DEFINE5
- COMPAT_SYSCALL_DEFINE5
- COMPAT_SYSCALL_DEFINE6
- compat_writev
- do_compat_writev
- COMPAT_SYSCALL_DEFINE3
- do_compat_pwritev64
- COMPAT_SYSCALL_DEFINE4
- COMPAT_SYSCALL_DEFINE5
- COMPAT_SYSCALL_DEFINE5
- COMPAT_SYSCALL_DEFINE6
- do_sendfile
- SYSCALL_DEFINE4
- SYSCALL_DEFINE4
- COMPAT_SYSCALL_DEFINE4
- COMPAT_SYSCALL_DEFINE4
- generic_copy_file_range
- do_copy_file_range
- vfs_copy_file_range
- SYSCALL_DEFINE6
- remap_verify_area
- generic_remap_check_len
- vfs_dedupe_get_page
- vfs_lock_two_pages
- vfs_unlock_two_pages
- vfs_dedupe_file_range_compare
- generic_remap_file_range_prep
- do_clone_file_range
- vfs_clone_file_range
- allow_file_dedupe
- vfs_dedupe_file_range_one
- vfs_dedupe_file_range
1
2
3
4
5
6
7
8 #include <linux/slab.h>
9 #include <linux/stat.h>
10 #include <linux/sched/xacct.h>
11 #include <linux/fcntl.h>
12 #include <linux/file.h>
13 #include <linux/uio.h>
14 #include <linux/fsnotify.h>
15 #include <linux/security.h>
16 #include <linux/export.h>
17 #include <linux/syscalls.h>
18 #include <linux/pagemap.h>
19 #include <linux/splice.h>
20 #include <linux/compat.h>
21 #include <linux/mount.h>
22 #include <linux/fs.h>
23 #include "internal.h"
24
25 #include <linux/uaccess.h>
26 #include <asm/unistd.h>
27
28 const struct file_operations generic_ro_fops = {
29 .llseek = generic_file_llseek,
30 .read_iter = generic_file_read_iter,
31 .mmap = generic_file_readonly_mmap,
32 .splice_read = generic_file_splice_read,
33 };
34
35 EXPORT_SYMBOL(generic_ro_fops);
36
37 static inline bool unsigned_offsets(struct file *file)
38 {
39 return file->f_mode & FMODE_UNSIGNED_OFFSET;
40 }
41
42
43
44
45
46
47
48
49
50
51
52
53
54 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
55 {
56 if (offset < 0 && !unsigned_offsets(file))
57 return -EINVAL;
58 if (offset > maxsize)
59 return -EINVAL;
60
61 if (offset != file->f_pos) {
62 file->f_pos = offset;
63 file->f_version = 0;
64 }
65 return offset;
66 }
67 EXPORT_SYMBOL(vfs_setpos);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 loff_t
86 generic_file_llseek_size(struct file *file, loff_t offset, int whence,
87 loff_t maxsize, loff_t eof)
88 {
89 switch (whence) {
90 case SEEK_END:
91 offset += eof;
92 break;
93 case SEEK_CUR:
94
95
96
97
98
99
100 if (offset == 0)
101 return file->f_pos;
102
103
104
105
106
107 spin_lock(&file->f_lock);
108 offset = vfs_setpos(file, file->f_pos + offset, maxsize);
109 spin_unlock(&file->f_lock);
110 return offset;
111 case SEEK_DATA:
112
113
114
115
116 if ((unsigned long long)offset >= eof)
117 return -ENXIO;
118 break;
119 case SEEK_HOLE:
120
121
122
123
124 if ((unsigned long long)offset >= eof)
125 return -ENXIO;
126 offset = eof;
127 break;
128 }
129
130 return vfs_setpos(file, offset, maxsize);
131 }
132 EXPORT_SYMBOL(generic_file_llseek_size);
133
134
135
136
137
138
139
140
141
142
143
144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
145 {
146 struct inode *inode = file->f_mapping->host;
147
148 return generic_file_llseek_size(file, offset, whence,
149 inode->i_sb->s_maxbytes,
150 i_size_read(inode));
151 }
152 EXPORT_SYMBOL(generic_file_llseek);
153
154
155
156
157
158
159
160
161
162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
163 {
164 switch (whence) {
165 case SEEK_SET: case SEEK_CUR: case SEEK_END:
166 return generic_file_llseek_size(file, offset, whence,
167 size, size);
168 default:
169 return -EINVAL;
170 }
171 }
172 EXPORT_SYMBOL(fixed_size_llseek);
173
174
175
176
177
178
179
180
181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
182 {
183 switch (whence) {
184 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence,
186 OFFSET_MAX, 0);
187 default:
188 return -EINVAL;
189 }
190 }
191 EXPORT_SYMBOL(no_seek_end_llseek);
192
193
194
195
196
197
198
199
200
201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
202 {
203 switch (whence) {
204 case SEEK_SET: case SEEK_CUR:
205 return generic_file_llseek_size(file, offset, whence,
206 size, 0);
207 default:
208 return -EINVAL;
209 }
210 }
211 EXPORT_SYMBOL(no_seek_end_llseek_size);
212
213
214
215
216
217
218
219
220
221
222
223
224 loff_t noop_llseek(struct file *file, loff_t offset, int whence)
225 {
226 return file->f_pos;
227 }
228 EXPORT_SYMBOL(noop_llseek);
229
230 loff_t no_llseek(struct file *file, loff_t offset, int whence)
231 {
232 return -ESPIPE;
233 }
234 EXPORT_SYMBOL(no_llseek);
235
236 loff_t default_llseek(struct file *file, loff_t offset, int whence)
237 {
238 struct inode *inode = file_inode(file);
239 loff_t retval;
240
241 inode_lock(inode);
242 switch (whence) {
243 case SEEK_END:
244 offset += i_size_read(inode);
245 break;
246 case SEEK_CUR:
247 if (offset == 0) {
248 retval = file->f_pos;
249 goto out;
250 }
251 offset += file->f_pos;
252 break;
253 case SEEK_DATA:
254
255
256
257
258
259 if (offset >= inode->i_size) {
260 retval = -ENXIO;
261 goto out;
262 }
263 break;
264 case SEEK_HOLE:
265
266
267
268
269
270 if (offset >= inode->i_size) {
271 retval = -ENXIO;
272 goto out;
273 }
274 offset = inode->i_size;
275 break;
276 }
277 retval = -EINVAL;
278 if (offset >= 0 || unsigned_offsets(file)) {
279 if (offset != file->f_pos) {
280 file->f_pos = offset;
281 file->f_version = 0;
282 }
283 retval = offset;
284 }
285 out:
286 inode_unlock(inode);
287 return retval;
288 }
289 EXPORT_SYMBOL(default_llseek);
290
291 loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
292 {
293 loff_t (*fn)(struct file *, loff_t, int);
294
295 fn = no_llseek;
296 if (file->f_mode & FMODE_LSEEK) {
297 if (file->f_op->llseek)
298 fn = file->f_op->llseek;
299 }
300 return fn(file, offset, whence);
301 }
302 EXPORT_SYMBOL(vfs_llseek);
303
304 off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
305 {
306 off_t retval;
307 struct fd f = fdget_pos(fd);
308 if (!f.file)
309 return -EBADF;
310
311 retval = -EINVAL;
312 if (whence <= SEEK_MAX) {
313 loff_t res = vfs_llseek(f.file, offset, whence);
314 retval = res;
315 if (res != (loff_t)retval)
316 retval = -EOVERFLOW;
317 }
318 fdput_pos(f);
319 return retval;
320 }
321
322 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
323 {
324 return ksys_lseek(fd, offset, whence);
325 }
326
327 #ifdef CONFIG_COMPAT
328 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
329 {
330 return ksys_lseek(fd, offset, whence);
331 }
332 #endif
333
334 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
335 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
336 unsigned long, offset_low, loff_t __user *, result,
337 unsigned int, whence)
338 {
339 int retval;
340 struct fd f = fdget_pos(fd);
341 loff_t offset;
342
343 if (!f.file)
344 return -EBADF;
345
346 retval = -EINVAL;
347 if (whence > SEEK_MAX)
348 goto out_putf;
349
350 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
351 whence);
352
353 retval = (int)offset;
354 if (offset >= 0) {
355 retval = -EFAULT;
356 if (!copy_to_user(result, &offset, sizeof(offset)))
357 retval = 0;
358 }
359 out_putf:
360 fdput_pos(f);
361 return retval;
362 }
363 #endif
364
365 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
366 {
367 struct inode *inode;
368 int retval = -EINVAL;
369
370 inode = file_inode(file);
371 if (unlikely((ssize_t) count < 0))
372 return retval;
373
374
375
376
377
378 if (ppos) {
379 loff_t pos = *ppos;
380
381 if (unlikely(pos < 0)) {
382 if (!unsigned_offsets(file))
383 return retval;
384 if (count >= -pos)
385 return -EOVERFLOW;
386 } else if (unlikely((loff_t) (pos + count) < 0)) {
387 if (!unsigned_offsets(file))
388 return retval;
389 }
390
391 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
392 retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
393 read_write == READ ? F_RDLCK : F_WRLCK);
394 if (retval < 0)
395 return retval;
396 }
397 }
398
399 return security_file_permission(file,
400 read_write == READ ? MAY_READ : MAY_WRITE);
401 }
402
403 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
404 {
405 struct iovec iov = { .iov_base = buf, .iov_len = len };
406 struct kiocb kiocb;
407 struct iov_iter iter;
408 ssize_t ret;
409
410 init_sync_kiocb(&kiocb, filp);
411 kiocb.ki_pos = (ppos ? *ppos : 0);
412 iov_iter_init(&iter, READ, &iov, 1, len);
413
414 ret = call_read_iter(filp, &kiocb, &iter);
415 BUG_ON(ret == -EIOCBQUEUED);
416 if (ppos)
417 *ppos = kiocb.ki_pos;
418 return ret;
419 }
420
421 ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
422 loff_t *pos)
423 {
424 if (file->f_op->read)
425 return file->f_op->read(file, buf, count, pos);
426 else if (file->f_op->read_iter)
427 return new_sync_read(file, buf, count, pos);
428 else
429 return -EINVAL;
430 }
431
432 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
433 {
434 mm_segment_t old_fs;
435 ssize_t result;
436
437 old_fs = get_fs();
438 set_fs(KERNEL_DS);
439
440 result = vfs_read(file, (void __user *)buf, count, pos);
441 set_fs(old_fs);
442 return result;
443 }
444 EXPORT_SYMBOL(kernel_read);
445
446 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
447 {
448 ssize_t ret;
449
450 if (!(file->f_mode & FMODE_READ))
451 return -EBADF;
452 if (!(file->f_mode & FMODE_CAN_READ))
453 return -EINVAL;
454 if (unlikely(!access_ok(buf, count)))
455 return -EFAULT;
456
457 ret = rw_verify_area(READ, file, pos, count);
458 if (!ret) {
459 if (count > MAX_RW_COUNT)
460 count = MAX_RW_COUNT;
461 ret = __vfs_read(file, buf, count, pos);
462 if (ret > 0) {
463 fsnotify_access(file);
464 add_rchar(current, ret);
465 }
466 inc_syscr(current);
467 }
468
469 return ret;
470 }
471
472 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
473 {
474 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
475 struct kiocb kiocb;
476 struct iov_iter iter;
477 ssize_t ret;
478
479 init_sync_kiocb(&kiocb, filp);
480 kiocb.ki_pos = (ppos ? *ppos : 0);
481 iov_iter_init(&iter, WRITE, &iov, 1, len);
482
483 ret = call_write_iter(filp, &kiocb, &iter);
484 BUG_ON(ret == -EIOCBQUEUED);
485 if (ret > 0 && ppos)
486 *ppos = kiocb.ki_pos;
487 return ret;
488 }
489
490 static ssize_t __vfs_write(struct file *file, const char __user *p,
491 size_t count, loff_t *pos)
492 {
493 if (file->f_op->write)
494 return file->f_op->write(file, p, count, pos);
495 else if (file->f_op->write_iter)
496 return new_sync_write(file, p, count, pos);
497 else
498 return -EINVAL;
499 }
500
501 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
502 {
503 mm_segment_t old_fs;
504 const char __user *p;
505 ssize_t ret;
506
507 if (!(file->f_mode & FMODE_CAN_WRITE))
508 return -EINVAL;
509
510 old_fs = get_fs();
511 set_fs(KERNEL_DS);
512 p = (__force const char __user *)buf;
513 if (count > MAX_RW_COUNT)
514 count = MAX_RW_COUNT;
515 ret = __vfs_write(file, p, count, pos);
516 set_fs(old_fs);
517 if (ret > 0) {
518 fsnotify_modify(file);
519 add_wchar(current, ret);
520 }
521 inc_syscw(current);
522 return ret;
523 }
524 EXPORT_SYMBOL(__kernel_write);
525
526 ssize_t kernel_write(struct file *file, const void *buf, size_t count,
527 loff_t *pos)
528 {
529 mm_segment_t old_fs;
530 ssize_t res;
531
532 old_fs = get_fs();
533 set_fs(KERNEL_DS);
534
535 res = vfs_write(file, (__force const char __user *)buf, count, pos);
536 set_fs(old_fs);
537
538 return res;
539 }
540 EXPORT_SYMBOL(kernel_write);
541
542 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
543 {
544 ssize_t ret;
545
546 if (!(file->f_mode & FMODE_WRITE))
547 return -EBADF;
548 if (!(file->f_mode & FMODE_CAN_WRITE))
549 return -EINVAL;
550 if (unlikely(!access_ok(buf, count)))
551 return -EFAULT;
552
553 ret = rw_verify_area(WRITE, file, pos, count);
554 if (!ret) {
555 if (count > MAX_RW_COUNT)
556 count = MAX_RW_COUNT;
557 file_start_write(file);
558 ret = __vfs_write(file, buf, count, pos);
559 if (ret > 0) {
560 fsnotify_modify(file);
561 add_wchar(current, ret);
562 }
563 inc_syscw(current);
564 file_end_write(file);
565 }
566
567 return ret;
568 }
569
570
571 static inline loff_t *file_ppos(struct file *file)
572 {
573 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos;
574 }
575
576 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
577 {
578 struct fd f = fdget_pos(fd);
579 ssize_t ret = -EBADF;
580
581 if (f.file) {
582 loff_t pos, *ppos = file_ppos(f.file);
583 if (ppos) {
584 pos = *ppos;
585 ppos = &pos;
586 }
587 ret = vfs_read(f.file, buf, count, ppos);
588 if (ret >= 0 && ppos)
589 f.file->f_pos = pos;
590 fdput_pos(f);
591 }
592 return ret;
593 }
594
595 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
596 {
597 return ksys_read(fd, buf, count);
598 }
599
600 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
601 {
602 struct fd f = fdget_pos(fd);
603 ssize_t ret = -EBADF;
604
605 if (f.file) {
606 loff_t pos, *ppos = file_ppos(f.file);
607 if (ppos) {
608 pos = *ppos;
609 ppos = &pos;
610 }
611 ret = vfs_write(f.file, buf, count, ppos);
612 if (ret >= 0 && ppos)
613 f.file->f_pos = pos;
614 fdput_pos(f);
615 }
616
617 return ret;
618 }
619
620 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
621 size_t, count)
622 {
623 return ksys_write(fd, buf, count);
624 }
625
626 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
627 loff_t pos)
628 {
629 struct fd f;
630 ssize_t ret = -EBADF;
631
632 if (pos < 0)
633 return -EINVAL;
634
635 f = fdget(fd);
636 if (f.file) {
637 ret = -ESPIPE;
638 if (f.file->f_mode & FMODE_PREAD)
639 ret = vfs_read(f.file, buf, count, &pos);
640 fdput(f);
641 }
642
643 return ret;
644 }
645
646 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
647 size_t, count, loff_t, pos)
648 {
649 return ksys_pread64(fd, buf, count, pos);
650 }
651
652 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
653 size_t count, loff_t pos)
654 {
655 struct fd f;
656 ssize_t ret = -EBADF;
657
658 if (pos < 0)
659 return -EINVAL;
660
661 f = fdget(fd);
662 if (f.file) {
663 ret = -ESPIPE;
664 if (f.file->f_mode & FMODE_PWRITE)
665 ret = vfs_write(f.file, buf, count, &pos);
666 fdput(f);
667 }
668
669 return ret;
670 }
671
672 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
673 size_t, count, loff_t, pos)
674 {
675 return ksys_pwrite64(fd, buf, count, pos);
676 }
677
678 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
679 loff_t *ppos, int type, rwf_t flags)
680 {
681 struct kiocb kiocb;
682 ssize_t ret;
683
684 init_sync_kiocb(&kiocb, filp);
685 ret = kiocb_set_rw_flags(&kiocb, flags);
686 if (ret)
687 return ret;
688 kiocb.ki_pos = (ppos ? *ppos : 0);
689
690 if (type == READ)
691 ret = call_read_iter(filp, &kiocb, iter);
692 else
693 ret = call_write_iter(filp, &kiocb, iter);
694 BUG_ON(ret == -EIOCBQUEUED);
695 if (ppos)
696 *ppos = kiocb.ki_pos;
697 return ret;
698 }
699
700
701 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
702 loff_t *ppos, int type, rwf_t flags)
703 {
704 ssize_t ret = 0;
705
706 if (flags & ~RWF_HIPRI)
707 return -EOPNOTSUPP;
708
709 while (iov_iter_count(iter)) {
710 struct iovec iovec = iov_iter_iovec(iter);
711 ssize_t nr;
712
713 if (type == READ) {
714 nr = filp->f_op->read(filp, iovec.iov_base,
715 iovec.iov_len, ppos);
716 } else {
717 nr = filp->f_op->write(filp, iovec.iov_base,
718 iovec.iov_len, ppos);
719 }
720
721 if (nr < 0) {
722 if (!ret)
723 ret = nr;
724 break;
725 }
726 ret += nr;
727 if (nr != iovec.iov_len)
728 break;
729 iov_iter_advance(iter, nr);
730 }
731
732 return ret;
733 }
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
765 unsigned long nr_segs, unsigned long fast_segs,
766 struct iovec *fast_pointer,
767 struct iovec **ret_pointer)
768 {
769 unsigned long seg;
770 ssize_t ret;
771 struct iovec *iov = fast_pointer;
772
773
774
775
776
777
778 if (nr_segs == 0) {
779 ret = 0;
780 goto out;
781 }
782
783
784
785
786
787 if (nr_segs > UIO_MAXIOV) {
788 ret = -EINVAL;
789 goto out;
790 }
791 if (nr_segs > fast_segs) {
792 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
793 if (iov == NULL) {
794 ret = -ENOMEM;
795 goto out;
796 }
797 }
798 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
799 ret = -EFAULT;
800 goto out;
801 }
802
803
804
805
806
807
808
809
810
811
812 ret = 0;
813 for (seg = 0; seg < nr_segs; seg++) {
814 void __user *buf = iov[seg].iov_base;
815 ssize_t len = (ssize_t)iov[seg].iov_len;
816
817
818
819 if (len < 0) {
820 ret = -EINVAL;
821 goto out;
822 }
823 if (type >= 0
824 && unlikely(!access_ok(buf, len))) {
825 ret = -EFAULT;
826 goto out;
827 }
828 if (len > MAX_RW_COUNT - ret) {
829 len = MAX_RW_COUNT - ret;
830 iov[seg].iov_len = len;
831 }
832 ret += len;
833 }
834 out:
835 *ret_pointer = iov;
836 return ret;
837 }
838
839 #ifdef CONFIG_COMPAT
840 ssize_t compat_rw_copy_check_uvector(int type,
841 const struct compat_iovec __user *uvector, unsigned long nr_segs,
842 unsigned long fast_segs, struct iovec *fast_pointer,
843 struct iovec **ret_pointer)
844 {
845 compat_ssize_t tot_len;
846 struct iovec *iov = *ret_pointer = fast_pointer;
847 ssize_t ret = 0;
848 int seg;
849
850
851
852
853
854
855 if (nr_segs == 0)
856 goto out;
857
858 ret = -EINVAL;
859 if (nr_segs > UIO_MAXIOV)
860 goto out;
861 if (nr_segs > fast_segs) {
862 ret = -ENOMEM;
863 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
864 if (iov == NULL)
865 goto out;
866 }
867 *ret_pointer = iov;
868
869 ret = -EFAULT;
870 if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
871 goto out;
872
873
874
875
876
877
878
879
880
881 tot_len = 0;
882 ret = -EINVAL;
883 for (seg = 0; seg < nr_segs; seg++) {
884 compat_uptr_t buf;
885 compat_ssize_t len;
886
887 if (__get_user(len, &uvector->iov_len) ||
888 __get_user(buf, &uvector->iov_base)) {
889 ret = -EFAULT;
890 goto out;
891 }
892 if (len < 0)
893 goto out;
894 if (type >= 0 &&
895 !access_ok(compat_ptr(buf), len)) {
896 ret = -EFAULT;
897 goto out;
898 }
899 if (len > MAX_RW_COUNT - tot_len)
900 len = MAX_RW_COUNT - tot_len;
901 tot_len += len;
902 iov->iov_base = compat_ptr(buf);
903 iov->iov_len = (compat_size_t) len;
904 uvector++;
905 iov++;
906 }
907 ret = tot_len;
908
909 out:
910 return ret;
911 }
912 #endif
913
914 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
915 loff_t *pos, rwf_t flags)
916 {
917 size_t tot_len;
918 ssize_t ret = 0;
919
920 if (!(file->f_mode & FMODE_READ))
921 return -EBADF;
922 if (!(file->f_mode & FMODE_CAN_READ))
923 return -EINVAL;
924
925 tot_len = iov_iter_count(iter);
926 if (!tot_len)
927 goto out;
928 ret = rw_verify_area(READ, file, pos, tot_len);
929 if (ret < 0)
930 return ret;
931
932 if (file->f_op->read_iter)
933 ret = do_iter_readv_writev(file, iter, pos, READ, flags);
934 else
935 ret = do_loop_readv_writev(file, iter, pos, READ, flags);
936 out:
937 if (ret >= 0)
938 fsnotify_access(file);
939 return ret;
940 }
941
942 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
943 rwf_t flags)
944 {
945 if (!file->f_op->read_iter)
946 return -EINVAL;
947 return do_iter_read(file, iter, ppos, flags);
948 }
949 EXPORT_SYMBOL(vfs_iter_read);
950
951 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
952 loff_t *pos, rwf_t flags)
953 {
954 size_t tot_len;
955 ssize_t ret = 0;
956
957 if (!(file->f_mode & FMODE_WRITE))
958 return -EBADF;
959 if (!(file->f_mode & FMODE_CAN_WRITE))
960 return -EINVAL;
961
962 tot_len = iov_iter_count(iter);
963 if (!tot_len)
964 return 0;
965 ret = rw_verify_area(WRITE, file, pos, tot_len);
966 if (ret < 0)
967 return ret;
968
969 if (file->f_op->write_iter)
970 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
971 else
972 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
973 if (ret > 0)
974 fsnotify_modify(file);
975 return ret;
976 }
977
978 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
979 rwf_t flags)
980 {
981 if (!file->f_op->write_iter)
982 return -EINVAL;
983 return do_iter_write(file, iter, ppos, flags);
984 }
985 EXPORT_SYMBOL(vfs_iter_write);
986
987 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
988 unsigned long vlen, loff_t *pos, rwf_t flags)
989 {
990 struct iovec iovstack[UIO_FASTIOV];
991 struct iovec *iov = iovstack;
992 struct iov_iter iter;
993 ssize_t ret;
994
995 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
996 if (ret >= 0) {
997 ret = do_iter_read(file, &iter, pos, flags);
998 kfree(iov);
999 }
1000
1001 return ret;
1002 }
1003
1004 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
1005 unsigned long vlen, loff_t *pos, rwf_t flags)
1006 {
1007 struct iovec iovstack[UIO_FASTIOV];
1008 struct iovec *iov = iovstack;
1009 struct iov_iter iter;
1010 ssize_t ret;
1011
1012 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
1013 if (ret >= 0) {
1014 file_start_write(file);
1015 ret = do_iter_write(file, &iter, pos, flags);
1016 file_end_write(file);
1017 kfree(iov);
1018 }
1019 return ret;
1020 }
1021
1022 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
1023 unsigned long vlen, rwf_t flags)
1024 {
1025 struct fd f = fdget_pos(fd);
1026 ssize_t ret = -EBADF;
1027
1028 if (f.file) {
1029 loff_t pos, *ppos = file_ppos(f.file);
1030 if (ppos) {
1031 pos = *ppos;
1032 ppos = &pos;
1033 }
1034 ret = vfs_readv(f.file, vec, vlen, ppos, flags);
1035 if (ret >= 0 && ppos)
1036 f.file->f_pos = pos;
1037 fdput_pos(f);
1038 }
1039
1040 if (ret > 0)
1041 add_rchar(current, ret);
1042 inc_syscr(current);
1043 return ret;
1044 }
1045
1046 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
1047 unsigned long vlen, rwf_t flags)
1048 {
1049 struct fd f = fdget_pos(fd);
1050 ssize_t ret = -EBADF;
1051
1052 if (f.file) {
1053 loff_t pos, *ppos = file_ppos(f.file);
1054 if (ppos) {
1055 pos = *ppos;
1056 ppos = &pos;
1057 }
1058 ret = vfs_writev(f.file, vec, vlen, ppos, flags);
1059 if (ret >= 0 && ppos)
1060 f.file->f_pos = pos;
1061 fdput_pos(f);
1062 }
1063
1064 if (ret > 0)
1065 add_wchar(current, ret);
1066 inc_syscw(current);
1067 return ret;
1068 }
1069
1070 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
1071 {
1072 #define HALF_LONG_BITS (BITS_PER_LONG / 2)
1073 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
1074 }
1075
1076 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
1077 unsigned long vlen, loff_t pos, rwf_t flags)
1078 {
1079 struct fd f;
1080 ssize_t ret = -EBADF;
1081
1082 if (pos < 0)
1083 return -EINVAL;
1084
1085 f = fdget(fd);
1086 if (f.file) {
1087 ret = -ESPIPE;
1088 if (f.file->f_mode & FMODE_PREAD)
1089 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1090 fdput(f);
1091 }
1092
1093 if (ret > 0)
1094 add_rchar(current, ret);
1095 inc_syscr(current);
1096 return ret;
1097 }
1098
1099 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1100 unsigned long vlen, loff_t pos, rwf_t flags)
1101 {
1102 struct fd f;
1103 ssize_t ret = -EBADF;
1104
1105 if (pos < 0)
1106 return -EINVAL;
1107
1108 f = fdget(fd);
1109 if (f.file) {
1110 ret = -ESPIPE;
1111 if (f.file->f_mode & FMODE_PWRITE)
1112 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1113 fdput(f);
1114 }
1115
1116 if (ret > 0)
1117 add_wchar(current, ret);
1118 inc_syscw(current);
1119 return ret;
1120 }
1121
1122 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1123 unsigned long, vlen)
1124 {
1125 return do_readv(fd, vec, vlen, 0);
1126 }
1127
1128 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1129 unsigned long, vlen)
1130 {
1131 return do_writev(fd, vec, vlen, 0);
1132 }
1133
1134 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1135 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1136 {
1137 loff_t pos = pos_from_hilo(pos_h, pos_l);
1138
1139 return do_preadv(fd, vec, vlen, pos, 0);
1140 }
1141
1142 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1143 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1144 rwf_t, flags)
1145 {
1146 loff_t pos = pos_from_hilo(pos_h, pos_l);
1147
1148 if (pos == -1)
1149 return do_readv(fd, vec, vlen, flags);
1150
1151 return do_preadv(fd, vec, vlen, pos, flags);
1152 }
1153
1154 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1155 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1156 {
1157 loff_t pos = pos_from_hilo(pos_h, pos_l);
1158
1159 return do_pwritev(fd, vec, vlen, pos, 0);
1160 }
1161
1162 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1163 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1164 rwf_t, flags)
1165 {
1166 loff_t pos = pos_from_hilo(pos_h, pos_l);
1167
1168 if (pos == -1)
1169 return do_writev(fd, vec, vlen, flags);
1170
1171 return do_pwritev(fd, vec, vlen, pos, flags);
1172 }
1173
1174 #ifdef CONFIG_COMPAT
1175 static size_t compat_readv(struct file *file,
1176 const struct compat_iovec __user *vec,
1177 unsigned long vlen, loff_t *pos, rwf_t flags)
1178 {
1179 struct iovec iovstack[UIO_FASTIOV];
1180 struct iovec *iov = iovstack;
1181 struct iov_iter iter;
1182 ssize_t ret;
1183
1184 ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
1185 if (ret >= 0) {
1186 ret = do_iter_read(file, &iter, pos, flags);
1187 kfree(iov);
1188 }
1189 if (ret > 0)
1190 add_rchar(current, ret);
1191 inc_syscr(current);
1192 return ret;
1193 }
1194
1195 static size_t do_compat_readv(compat_ulong_t fd,
1196 const struct compat_iovec __user *vec,
1197 compat_ulong_t vlen, rwf_t flags)
1198 {
1199 struct fd f = fdget_pos(fd);
1200 ssize_t ret;
1201 loff_t pos;
1202
1203 if (!f.file)
1204 return -EBADF;
1205 pos = f.file->f_pos;
1206 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1207 if (ret >= 0)
1208 f.file->f_pos = pos;
1209 fdput_pos(f);
1210 return ret;
1211
1212 }
1213
1214 COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1215 const struct compat_iovec __user *,vec,
1216 compat_ulong_t, vlen)
1217 {
1218 return do_compat_readv(fd, vec, vlen, 0);
1219 }
1220
1221 static long do_compat_preadv64(unsigned long fd,
1222 const struct compat_iovec __user *vec,
1223 unsigned long vlen, loff_t pos, rwf_t flags)
1224 {
1225 struct fd f;
1226 ssize_t ret;
1227
1228 if (pos < 0)
1229 return -EINVAL;
1230 f = fdget(fd);
1231 if (!f.file)
1232 return -EBADF;
1233 ret = -ESPIPE;
1234 if (f.file->f_mode & FMODE_PREAD)
1235 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1236 fdput(f);
1237 return ret;
1238 }
1239
1240 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
1241 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1242 const struct compat_iovec __user *,vec,
1243 unsigned long, vlen, loff_t, pos)
1244 {
1245 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1246 }
1247 #endif
1248
1249 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1250 const struct compat_iovec __user *,vec,
1251 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1252 {
1253 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1254
1255 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1256 }
1257
1258 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1259 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1260 const struct compat_iovec __user *,vec,
1261 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1262 {
1263 if (pos == -1)
1264 return do_compat_readv(fd, vec, vlen, flags);
1265
1266 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1267 }
1268 #endif
1269
1270 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1271 const struct compat_iovec __user *,vec,
1272 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1273 rwf_t, flags)
1274 {
1275 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1276
1277 if (pos == -1)
1278 return do_compat_readv(fd, vec, vlen, flags);
1279
1280 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1281 }
1282
1283 static size_t compat_writev(struct file *file,
1284 const struct compat_iovec __user *vec,
1285 unsigned long vlen, loff_t *pos, rwf_t flags)
1286 {
1287 struct iovec iovstack[UIO_FASTIOV];
1288 struct iovec *iov = iovstack;
1289 struct iov_iter iter;
1290 ssize_t ret;
1291
1292 ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
1293 if (ret >= 0) {
1294 file_start_write(file);
1295 ret = do_iter_write(file, &iter, pos, flags);
1296 file_end_write(file);
1297 kfree(iov);
1298 }
1299 if (ret > 0)
1300 add_wchar(current, ret);
1301 inc_syscw(current);
1302 return ret;
1303 }
1304
1305 static size_t do_compat_writev(compat_ulong_t fd,
1306 const struct compat_iovec __user* vec,
1307 compat_ulong_t vlen, rwf_t flags)
1308 {
1309 struct fd f = fdget_pos(fd);
1310 ssize_t ret;
1311 loff_t pos;
1312
1313 if (!f.file)
1314 return -EBADF;
1315 pos = f.file->f_pos;
1316 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1317 if (ret >= 0)
1318 f.file->f_pos = pos;
1319 fdput_pos(f);
1320 return ret;
1321 }
1322
1323 COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1324 const struct compat_iovec __user *, vec,
1325 compat_ulong_t, vlen)
1326 {
1327 return do_compat_writev(fd, vec, vlen, 0);
1328 }
1329
1330 static long do_compat_pwritev64(unsigned long fd,
1331 const struct compat_iovec __user *vec,
1332 unsigned long vlen, loff_t pos, rwf_t flags)
1333 {
1334 struct fd f;
1335 ssize_t ret;
1336
1337 if (pos < 0)
1338 return -EINVAL;
1339 f = fdget(fd);
1340 if (!f.file)
1341 return -EBADF;
1342 ret = -ESPIPE;
1343 if (f.file->f_mode & FMODE_PWRITE)
1344 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1345 fdput(f);
1346 return ret;
1347 }
1348
1349 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
1350 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1351 const struct compat_iovec __user *,vec,
1352 unsigned long, vlen, loff_t, pos)
1353 {
1354 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1355 }
1356 #endif
1357
1358 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1359 const struct compat_iovec __user *,vec,
1360 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1361 {
1362 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1363
1364 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1365 }
1366
1367 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1368 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1369 const struct compat_iovec __user *,vec,
1370 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1371 {
1372 if (pos == -1)
1373 return do_compat_writev(fd, vec, vlen, flags);
1374
1375 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1376 }
1377 #endif
1378
1379 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1380 const struct compat_iovec __user *,vec,
1381 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1382 {
1383 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1384
1385 if (pos == -1)
1386 return do_compat_writev(fd, vec, vlen, flags);
1387
1388 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1389 }
1390
1391 #endif
1392
1393 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1394 size_t count, loff_t max)
1395 {
1396 struct fd in, out;
1397 struct inode *in_inode, *out_inode;
1398 loff_t pos;
1399 loff_t out_pos;
1400 ssize_t retval;
1401 int fl;
1402
1403
1404
1405
1406 retval = -EBADF;
1407 in = fdget(in_fd);
1408 if (!in.file)
1409 goto out;
1410 if (!(in.file->f_mode & FMODE_READ))
1411 goto fput_in;
1412 retval = -ESPIPE;
1413 if (!ppos) {
1414 pos = in.file->f_pos;
1415 } else {
1416 pos = *ppos;
1417 if (!(in.file->f_mode & FMODE_PREAD))
1418 goto fput_in;
1419 }
1420 retval = rw_verify_area(READ, in.file, &pos, count);
1421 if (retval < 0)
1422 goto fput_in;
1423 if (count > MAX_RW_COUNT)
1424 count = MAX_RW_COUNT;
1425
1426
1427
1428
1429 retval = -EBADF;
1430 out = fdget(out_fd);
1431 if (!out.file)
1432 goto fput_in;
1433 if (!(out.file->f_mode & FMODE_WRITE))
1434 goto fput_out;
1435 in_inode = file_inode(in.file);
1436 out_inode = file_inode(out.file);
1437 out_pos = out.file->f_pos;
1438 retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1439 if (retval < 0)
1440 goto fput_out;
1441
1442 if (!max)
1443 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1444
1445 if (unlikely(pos + count > max)) {
1446 retval = -EOVERFLOW;
1447 if (pos >= max)
1448 goto fput_out;
1449 count = max - pos;
1450 }
1451
1452 fl = 0;
1453 #if 0
1454
1455
1456
1457
1458
1459
1460 if (in.file->f_flags & O_NONBLOCK)
1461 fl = SPLICE_F_NONBLOCK;
1462 #endif
1463 file_start_write(out.file);
1464 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1465 file_end_write(out.file);
1466
1467 if (retval > 0) {
1468 add_rchar(current, retval);
1469 add_wchar(current, retval);
1470 fsnotify_access(in.file);
1471 fsnotify_modify(out.file);
1472 out.file->f_pos = out_pos;
1473 if (ppos)
1474 *ppos = pos;
1475 else
1476 in.file->f_pos = pos;
1477 }
1478
1479 inc_syscr(current);
1480 inc_syscw(current);
1481 if (pos > max)
1482 retval = -EOVERFLOW;
1483
1484 fput_out:
1485 fdput(out);
1486 fput_in:
1487 fdput(in);
1488 out:
1489 return retval;
1490 }
1491
1492 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1493 {
1494 loff_t pos;
1495 off_t off;
1496 ssize_t ret;
1497
1498 if (offset) {
1499 if (unlikely(get_user(off, offset)))
1500 return -EFAULT;
1501 pos = off;
1502 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1503 if (unlikely(put_user(pos, offset)))
1504 return -EFAULT;
1505 return ret;
1506 }
1507
1508 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1509 }
1510
1511 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1512 {
1513 loff_t pos;
1514 ssize_t ret;
1515
1516 if (offset) {
1517 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1518 return -EFAULT;
1519 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1520 if (unlikely(put_user(pos, offset)))
1521 return -EFAULT;
1522 return ret;
1523 }
1524
1525 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1526 }
1527
1528 #ifdef CONFIG_COMPAT
1529 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1530 compat_off_t __user *, offset, compat_size_t, count)
1531 {
1532 loff_t pos;
1533 off_t off;
1534 ssize_t ret;
1535
1536 if (offset) {
1537 if (unlikely(get_user(off, offset)))
1538 return -EFAULT;
1539 pos = off;
1540 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1541 if (unlikely(put_user(pos, offset)))
1542 return -EFAULT;
1543 return ret;
1544 }
1545
1546 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1547 }
1548
1549 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1550 compat_loff_t __user *, offset, compat_size_t, count)
1551 {
1552 loff_t pos;
1553 ssize_t ret;
1554
1555 if (offset) {
1556 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1557 return -EFAULT;
1558 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1559 if (unlikely(put_user(pos, offset)))
1560 return -EFAULT;
1561 return ret;
1562 }
1563
1564 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1565 }
1566 #endif
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
1590 struct file *file_out, loff_t pos_out,
1591 size_t len, unsigned int flags)
1592 {
1593 return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1594 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1595 }
1596 EXPORT_SYMBOL(generic_copy_file_range);
1597
1598 static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
1599 struct file *file_out, loff_t pos_out,
1600 size_t len, unsigned int flags)
1601 {
1602
1603
1604
1605
1606
1607
1608
1609
1610 if (file_out->f_op->copy_file_range &&
1611 file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
1612 return file_out->f_op->copy_file_range(file_in, pos_in,
1613 file_out, pos_out,
1614 len, flags);
1615
1616 return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1617 flags);
1618 }
1619
1620
1621
1622
1623
1624
1625 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1626 struct file *file_out, loff_t pos_out,
1627 size_t len, unsigned int flags)
1628 {
1629 ssize_t ret;
1630
1631 if (flags != 0)
1632 return -EINVAL;
1633
1634 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
1635 flags);
1636 if (unlikely(ret))
1637 return ret;
1638
1639 ret = rw_verify_area(READ, file_in, &pos_in, len);
1640 if (unlikely(ret))
1641 return ret;
1642
1643 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1644 if (unlikely(ret))
1645 return ret;
1646
1647 if (len == 0)
1648 return 0;
1649
1650 file_start_write(file_out);
1651
1652
1653
1654
1655
1656 if (file_in->f_op->remap_file_range &&
1657 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
1658 loff_t cloned;
1659
1660 cloned = file_in->f_op->remap_file_range(file_in, pos_in,
1661 file_out, pos_out,
1662 min_t(loff_t, MAX_RW_COUNT, len),
1663 REMAP_FILE_CAN_SHORTEN);
1664 if (cloned > 0) {
1665 ret = cloned;
1666 goto done;
1667 }
1668 }
1669
1670 ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1671 flags);
1672 WARN_ON_ONCE(ret == -EOPNOTSUPP);
1673 done:
1674 if (ret > 0) {
1675 fsnotify_access(file_in);
1676 add_rchar(current, ret);
1677 fsnotify_modify(file_out);
1678 add_wchar(current, ret);
1679 }
1680
1681 inc_syscr(current);
1682 inc_syscw(current);
1683
1684 file_end_write(file_out);
1685
1686 return ret;
1687 }
1688 EXPORT_SYMBOL(vfs_copy_file_range);
1689
1690 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1691 int, fd_out, loff_t __user *, off_out,
1692 size_t, len, unsigned int, flags)
1693 {
1694 loff_t pos_in;
1695 loff_t pos_out;
1696 struct fd f_in;
1697 struct fd f_out;
1698 ssize_t ret = -EBADF;
1699
1700 f_in = fdget(fd_in);
1701 if (!f_in.file)
1702 goto out2;
1703
1704 f_out = fdget(fd_out);
1705 if (!f_out.file)
1706 goto out1;
1707
1708 ret = -EFAULT;
1709 if (off_in) {
1710 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1711 goto out;
1712 } else {
1713 pos_in = f_in.file->f_pos;
1714 }
1715
1716 if (off_out) {
1717 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1718 goto out;
1719 } else {
1720 pos_out = f_out.file->f_pos;
1721 }
1722
1723 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1724 flags);
1725 if (ret > 0) {
1726 pos_in += ret;
1727 pos_out += ret;
1728
1729 if (off_in) {
1730 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1731 ret = -EFAULT;
1732 } else {
1733 f_in.file->f_pos = pos_in;
1734 }
1735
1736 if (off_out) {
1737 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1738 ret = -EFAULT;
1739 } else {
1740 f_out.file->f_pos = pos_out;
1741 }
1742 }
1743
1744 out:
1745 fdput(f_out);
1746 out1:
1747 fdput(f_in);
1748 out2:
1749 return ret;
1750 }
1751
1752 static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
1753 bool write)
1754 {
1755 struct inode *inode = file_inode(file);
1756
1757 if (unlikely(pos < 0 || len < 0))
1758 return -EINVAL;
1759
1760 if (unlikely((loff_t) (pos + len) < 0))
1761 return -EINVAL;
1762
1763 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1764 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1765 int retval;
1766
1767 retval = locks_mandatory_area(inode, file, pos, end,
1768 write ? F_WRLCK : F_RDLCK);
1769 if (retval < 0)
1770 return retval;
1771 }
1772
1773 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1774 }
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786 static int generic_remap_check_len(struct inode *inode_in,
1787 struct inode *inode_out,
1788 loff_t pos_out,
1789 loff_t *len,
1790 unsigned int remap_flags)
1791 {
1792 u64 blkmask = i_blocksize(inode_in) - 1;
1793 loff_t new_len = *len;
1794
1795 if ((*len & blkmask) == 0)
1796 return 0;
1797
1798 if (pos_out + *len < i_size_read(inode_out))
1799 new_len &= ~blkmask;
1800
1801 if (new_len == *len)
1802 return 0;
1803
1804 if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
1805 *len = new_len;
1806 return 0;
1807 }
1808
1809 return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
1810 }
1811
1812
1813 static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1814 {
1815 struct page *page;
1816
1817 page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
1818 if (IS_ERR(page))
1819 return page;
1820 if (!PageUptodate(page)) {
1821 put_page(page);
1822 return ERR_PTR(-EIO);
1823 }
1824 return page;
1825 }
1826
1827
1828
1829
1830
1831 static void vfs_lock_two_pages(struct page *page1, struct page *page2)
1832 {
1833
1834 if (page1->index > page2->index)
1835 swap(page1, page2);
1836
1837 lock_page(page1);
1838 if (page1 != page2)
1839 lock_page(page2);
1840 }
1841
1842
1843 static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
1844 {
1845 unlock_page(page1);
1846 if (page1 != page2)
1847 unlock_page(page2);
1848 }
1849
1850
1851
1852
1853
1854 static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1855 struct inode *dest, loff_t destoff,
1856 loff_t len, bool *is_same)
1857 {
1858 loff_t src_poff;
1859 loff_t dest_poff;
1860 void *src_addr;
1861 void *dest_addr;
1862 struct page *src_page;
1863 struct page *dest_page;
1864 loff_t cmp_len;
1865 bool same;
1866 int error;
1867
1868 error = -EINVAL;
1869 same = true;
1870 while (len) {
1871 src_poff = srcoff & (PAGE_SIZE - 1);
1872 dest_poff = destoff & (PAGE_SIZE - 1);
1873 cmp_len = min(PAGE_SIZE - src_poff,
1874 PAGE_SIZE - dest_poff);
1875 cmp_len = min(cmp_len, len);
1876 if (cmp_len <= 0)
1877 goto out_error;
1878
1879 src_page = vfs_dedupe_get_page(src, srcoff);
1880 if (IS_ERR(src_page)) {
1881 error = PTR_ERR(src_page);
1882 goto out_error;
1883 }
1884 dest_page = vfs_dedupe_get_page(dest, destoff);
1885 if (IS_ERR(dest_page)) {
1886 error = PTR_ERR(dest_page);
1887 put_page(src_page);
1888 goto out_error;
1889 }
1890
1891 vfs_lock_two_pages(src_page, dest_page);
1892
1893
1894
1895
1896
1897
1898 if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
1899 src_page->mapping != src->i_mapping ||
1900 dest_page->mapping != dest->i_mapping) {
1901 same = false;
1902 goto unlock;
1903 }
1904
1905 src_addr = kmap_atomic(src_page);
1906 dest_addr = kmap_atomic(dest_page);
1907
1908 flush_dcache_page(src_page);
1909 flush_dcache_page(dest_page);
1910
1911 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1912 same = false;
1913
1914 kunmap_atomic(dest_addr);
1915 kunmap_atomic(src_addr);
1916 unlock:
1917 vfs_unlock_two_pages(src_page, dest_page);
1918 put_page(dest_page);
1919 put_page(src_page);
1920
1921 if (!same)
1922 break;
1923
1924 srcoff += cmp_len;
1925 destoff += cmp_len;
1926 len -= cmp_len;
1927 }
1928
1929 *is_same = same;
1930 return 0;
1931
1932 out_error:
1933 return error;
1934 }
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1945 struct file *file_out, loff_t pos_out,
1946 loff_t *len, unsigned int remap_flags)
1947 {
1948 struct inode *inode_in = file_inode(file_in);
1949 struct inode *inode_out = file_inode(file_out);
1950 bool same_inode = (inode_in == inode_out);
1951 int ret;
1952
1953
1954 if (IS_IMMUTABLE(inode_out))
1955 return -EPERM;
1956
1957 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1958 return -ETXTBSY;
1959
1960
1961 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1962 return -EISDIR;
1963 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1964 return -EINVAL;
1965
1966
1967 if (*len == 0) {
1968 loff_t isize = i_size_read(inode_in);
1969
1970 if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
1971 return 0;
1972 if (pos_in > isize)
1973 return -EINVAL;
1974 *len = isize - pos_in;
1975 if (*len == 0)
1976 return 0;
1977 }
1978
1979
1980 ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
1981 remap_flags);
1982 if (ret)
1983 return ret;
1984
1985
1986 inode_dio_wait(inode_in);
1987 if (!same_inode)
1988 inode_dio_wait(inode_out);
1989
1990 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1991 pos_in, pos_in + *len - 1);
1992 if (ret)
1993 return ret;
1994
1995 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1996 pos_out, pos_out + *len - 1);
1997 if (ret)
1998 return ret;
1999
2000
2001
2002
2003 if (remap_flags & REMAP_FILE_DEDUP) {
2004 bool is_same = false;
2005
2006 ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
2007 inode_out, pos_out, *len, &is_same);
2008 if (ret)
2009 return ret;
2010 if (!is_same)
2011 return -EBADE;
2012 }
2013
2014 ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
2015 remap_flags);
2016 if (ret)
2017 return ret;
2018
2019
2020 if (!(remap_flags & REMAP_FILE_DEDUP))
2021 ret = file_modified(file_out);
2022
2023 return ret;
2024 }
2025 EXPORT_SYMBOL(generic_remap_file_range_prep);
2026
2027 loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
2028 struct file *file_out, loff_t pos_out,
2029 loff_t len, unsigned int remap_flags)
2030 {
2031 loff_t ret;
2032
2033 WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
2034
2035
2036
2037
2038
2039
2040 if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
2041 return -EXDEV;
2042
2043 ret = generic_file_rw_checks(file_in, file_out);
2044 if (ret < 0)
2045 return ret;
2046
2047 if (!file_in->f_op->remap_file_range)
2048 return -EOPNOTSUPP;
2049
2050 ret = remap_verify_area(file_in, pos_in, len, false);
2051 if (ret)
2052 return ret;
2053
2054 ret = remap_verify_area(file_out, pos_out, len, true);
2055 if (ret)
2056 return ret;
2057
2058 ret = file_in->f_op->remap_file_range(file_in, pos_in,
2059 file_out, pos_out, len, remap_flags);
2060 if (ret < 0)
2061 return ret;
2062
2063 fsnotify_access(file_in);
2064 fsnotify_modify(file_out);
2065 return ret;
2066 }
2067 EXPORT_SYMBOL(do_clone_file_range);
2068
2069 loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
2070 struct file *file_out, loff_t pos_out,
2071 loff_t len, unsigned int remap_flags)
2072 {
2073 loff_t ret;
2074
2075 file_start_write(file_out);
2076 ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
2077 remap_flags);
2078 file_end_write(file_out);
2079
2080 return ret;
2081 }
2082 EXPORT_SYMBOL(vfs_clone_file_range);
2083
2084
2085 static bool allow_file_dedupe(struct file *file)
2086 {
2087 if (capable(CAP_SYS_ADMIN))
2088 return true;
2089 if (file->f_mode & FMODE_WRITE)
2090 return true;
2091 if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
2092 return true;
2093 if (!inode_permission(file_inode(file), MAY_WRITE))
2094 return true;
2095 return false;
2096 }
2097
2098 loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2099 struct file *dst_file, loff_t dst_pos,
2100 loff_t len, unsigned int remap_flags)
2101 {
2102 loff_t ret;
2103
2104 WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
2105 REMAP_FILE_CAN_SHORTEN));
2106
2107 ret = mnt_want_write_file(dst_file);
2108 if (ret)
2109 return ret;
2110
2111 ret = remap_verify_area(dst_file, dst_pos, len, true);
2112 if (ret < 0)
2113 goto out_drop_write;
2114
2115 ret = -EPERM;
2116 if (!allow_file_dedupe(dst_file))
2117 goto out_drop_write;
2118
2119 ret = -EXDEV;
2120 if (src_file->f_path.mnt != dst_file->f_path.mnt)
2121 goto out_drop_write;
2122
2123 ret = -EISDIR;
2124 if (S_ISDIR(file_inode(dst_file)->i_mode))
2125 goto out_drop_write;
2126
2127 ret = -EINVAL;
2128 if (!dst_file->f_op->remap_file_range)
2129 goto out_drop_write;
2130
2131 if (len == 0) {
2132 ret = 0;
2133 goto out_drop_write;
2134 }
2135
2136 ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
2137 dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
2138 out_drop_write:
2139 mnt_drop_write_file(dst_file);
2140
2141 return ret;
2142 }
2143 EXPORT_SYMBOL(vfs_dedupe_file_range_one);
2144
2145 int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2146 {
2147 struct file_dedupe_range_info *info;
2148 struct inode *src = file_inode(file);
2149 u64 off;
2150 u64 len;
2151 int i;
2152 int ret;
2153 u16 count = same->dest_count;
2154 loff_t deduped;
2155
2156 if (!(file->f_mode & FMODE_READ))
2157 return -EINVAL;
2158
2159 if (same->reserved1 || same->reserved2)
2160 return -EINVAL;
2161
2162 off = same->src_offset;
2163 len = same->src_length;
2164
2165 if (S_ISDIR(src->i_mode))
2166 return -EISDIR;
2167
2168 if (!S_ISREG(src->i_mode))
2169 return -EINVAL;
2170
2171 if (!file->f_op->remap_file_range)
2172 return -EOPNOTSUPP;
2173
2174 ret = remap_verify_area(file, off, len, false);
2175 if (ret < 0)
2176 return ret;
2177 ret = 0;
2178
2179 if (off + len > i_size_read(src))
2180 return -EINVAL;
2181
2182
2183 len = min_t(u64, len, 1 << 30);
2184
2185
2186 for (i = 0; i < count; i++) {
2187 same->info[i].bytes_deduped = 0ULL;
2188 same->info[i].status = FILE_DEDUPE_RANGE_SAME;
2189 }
2190
2191 for (i = 0, info = same->info; i < count; i++, info++) {
2192 struct fd dst_fd = fdget(info->dest_fd);
2193 struct file *dst_file = dst_fd.file;
2194
2195 if (!dst_file) {
2196 info->status = -EBADF;
2197 goto next_loop;
2198 }
2199
2200 if (info->reserved) {
2201 info->status = -EINVAL;
2202 goto next_fdput;
2203 }
2204
2205 deduped = vfs_dedupe_file_range_one(file, off, dst_file,
2206 info->dest_offset, len,
2207 REMAP_FILE_CAN_SHORTEN);
2208 if (deduped == -EBADE)
2209 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2210 else if (deduped < 0)
2211 info->status = deduped;
2212 else
2213 info->bytes_deduped = len;
2214
2215 next_fdput:
2216 fdput(dst_fd);
2217 next_loop:
2218 if (fatal_signal_pending(current))
2219 break;
2220 }
2221 return ret;
2222 }
2223 EXPORT_SYMBOL(vfs_dedupe_file_range);