1/*
2 * NET4:	Implementation of BSD Unix domain sockets.
3 *
4 * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 *		Linus Torvalds	:	Assorted bug cures.
13 *		Niibe Yutaka	:	async I/O support.
14 *		Carsten Paeth	:	PF_UNIX check, address fixes.
15 *		Alan Cox	:	Limit size of allocated blocks.
16 *		Alan Cox	:	Fixed the stupid socketpair bug.
17 *		Alan Cox	:	BSD compatibility fine tuning.
18 *		Alan Cox	:	Fixed a bug in connect when interrupted.
19 *		Alan Cox	:	Sorted out a proper draft version of
20 *					file descriptor passing hacked up from
21 *					Mike Shaver's work.
22 *		Marty Leisner	:	Fixes to fd passing
23 *		Nick Nevin	:	recvmsg bugfix.
24 *		Alan Cox	:	Started proper garbage collector
25 *		Heiko EiBfeldt	:	Missing verify_area check
26 *		Alan Cox	:	Started POSIXisms
27 *		Andreas Schwab	:	Replace inode by dentry for proper
28 *					reference counting
29 *		Kirk Petersen	:	Made this a module
30 *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31 *					Lots of bug fixes.
32 *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33 *					by above two patches.
34 *	     Andrea Arcangeli	:	If possible we block in connect(2)
35 *					if the max backlog of the listen socket
36 *					is been reached. This won't break
37 *					old apps and it will avoid huge amount
38 *					of socks hashed (this for unix_gc()
39 *					performances reasons).
40 *					Security fix that limits the max
41 *					number of socks to 2*max_files and
42 *					the number of skb queueable in the
43 *					dgram receiver.
44 *		Artur Skawina   :	Hash function optimizations
45 *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46 *	      Malcolm Beattie   :	Set peercred for socketpair
47 *	     Michal Ostrowski   :       Module initialization cleanup.
48 *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49 *	     				the core infrastructure is doing that
50 *	     				for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 *	[TO FIX]
56 *	ECONNREFUSED is not returned from one end of a connected() socket to the
57 *		other the moment one end closes.
58 *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 *	[NOT TO FIX]
61 *	accept() returns a path name even if the connecting socket has closed
62 *		in the meantime (BSD loses the path and gives up).
63 *	accept() returns 0 length path for an unbound connector. BSD returns 16
64 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 *	BSD af_unix apparently has connect forgetting to block properly.
67 *		(need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 *	Bug fixes and improvements.
71 *		- client shutdown killed server socket.
72 *		- removed all useless cli/sti pairs.
73 *
74 *	Semantic changes/extensions.
75 *		- generic control message passing.
76 *		- SCM_CREDENTIALS control message.
77 *		- "Abstract" (not FS based) socket bindings.
78 *		  Abstract names are sequences of bytes (not zero terminated)
79 *		  started by 0, so that this name space does not intersect
80 *		  with BSD names.
81 */
82
83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
85#include <linux/module.h>
86#include <linux/kernel.h>
87#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
106#include <net/net_namespace.h>
107#include <net/sock.h>
108#include <net/tcp_states.h>
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
119#include <linux/freezer.h>
120
121struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122EXPORT_SYMBOL_GPL(unix_socket_table);
123DEFINE_SPINLOCK(unix_table_lock);
124EXPORT_SYMBOL_GPL(unix_table_lock);
125static atomic_long_t unix_nr_socks;
126
127
128static struct hlist_head *unix_sockets_unbound(void *addr)
129{
130	unsigned long hash = (unsigned long)addr;
131
132	hash ^= hash >> 16;
133	hash ^= hash >> 8;
134	hash %= UNIX_HASH_SIZE;
135	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136}
137
138#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139
140#ifdef CONFIG_SECURITY_NETWORK
141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142{
143	UNIXCB(skb).secid = scm->secid;
144}
145
146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147{
148	scm->secid = UNIXCB(skb).secid;
149}
150
151static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152{
153	return (scm->secid == UNIXCB(skb).secid);
154}
155#else
156static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157{ }
158
159static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160{ }
161
162static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163{
164	return true;
165}
166#endif /* CONFIG_SECURITY_NETWORK */
167
168/*
169 *  SMP locking strategy:
170 *    hash table is protected with spinlock unix_table_lock
171 *    each socket state is protected by separate spin lock.
172 */
173
174static inline unsigned int unix_hash_fold(__wsum n)
175{
176	unsigned int hash = (__force unsigned int)csum_fold(n);
177
178	hash ^= hash>>8;
179	return hash&(UNIX_HASH_SIZE-1);
180}
181
182#define unix_peer(sk) (unix_sk(sk)->peer)
183
184static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185{
186	return unix_peer(osk) == sk;
187}
188
189static inline int unix_may_send(struct sock *sk, struct sock *osk)
190{
191	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192}
193
194static inline int unix_recvq_full(struct sock const *sk)
195{
196	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197}
198
199struct sock *unix_peer_get(struct sock *s)
200{
201	struct sock *peer;
202
203	unix_state_lock(s);
204	peer = unix_peer(s);
205	if (peer)
206		sock_hold(peer);
207	unix_state_unlock(s);
208	return peer;
209}
210EXPORT_SYMBOL_GPL(unix_peer_get);
211
212static inline void unix_release_addr(struct unix_address *addr)
213{
214	if (atomic_dec_and_test(&addr->refcnt))
215		kfree(addr);
216}
217
218/*
219 *	Check unix socket name:
220 *		- should be not zero length.
221 *	        - if started by not zero, should be NULL terminated (FS object)
222 *		- if started by zero, it is abstract name.
223 */
224
225static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
226{
227	if (len <= sizeof(short) || len > sizeof(*sunaddr))
228		return -EINVAL;
229	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230		return -EINVAL;
231	if (sunaddr->sun_path[0]) {
232		/*
233		 * This may look like an off by one error but it is a bit more
234		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235		 * sun_path[108] doesn't as such exist.  However in kernel space
236		 * we are guaranteed that it is a valid memory location in our
237		 * kernel address buffer.
238		 */
239		((char *)sunaddr)[len] = 0;
240		len = strlen(sunaddr->sun_path)+1+sizeof(short);
241		return len;
242	}
243
244	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245	return len;
246}
247
248static void __unix_remove_socket(struct sock *sk)
249{
250	sk_del_node_init(sk);
251}
252
253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254{
255	WARN_ON(!sk_unhashed(sk));
256	sk_add_node(sk, list);
257}
258
259static inline void unix_remove_socket(struct sock *sk)
260{
261	spin_lock(&unix_table_lock);
262	__unix_remove_socket(sk);
263	spin_unlock(&unix_table_lock);
264}
265
266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267{
268	spin_lock(&unix_table_lock);
269	__unix_insert_socket(list, sk);
270	spin_unlock(&unix_table_lock);
271}
272
273static struct sock *__unix_find_socket_byname(struct net *net,
274					      struct sockaddr_un *sunname,
275					      int len, int type, unsigned int hash)
276{
277	struct sock *s;
278
279	sk_for_each(s, &unix_socket_table[hash ^ type]) {
280		struct unix_sock *u = unix_sk(s);
281
282		if (!net_eq(sock_net(s), net))
283			continue;
284
285		if (u->addr->len == len &&
286		    !memcmp(u->addr->name, sunname, len))
287			goto found;
288	}
289	s = NULL;
290found:
291	return s;
292}
293
294static inline struct sock *unix_find_socket_byname(struct net *net,
295						   struct sockaddr_un *sunname,
296						   int len, int type,
297						   unsigned int hash)
298{
299	struct sock *s;
300
301	spin_lock(&unix_table_lock);
302	s = __unix_find_socket_byname(net, sunname, len, type, hash);
303	if (s)
304		sock_hold(s);
305	spin_unlock(&unix_table_lock);
306	return s;
307}
308
309static struct sock *unix_find_socket_byinode(struct inode *i)
310{
311	struct sock *s;
312
313	spin_lock(&unix_table_lock);
314	sk_for_each(s,
315		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316		struct dentry *dentry = unix_sk(s)->path.dentry;
317
318		if (dentry && d_backing_inode(dentry) == i) {
319			sock_hold(s);
320			goto found;
321		}
322	}
323	s = NULL;
324found:
325	spin_unlock(&unix_table_lock);
326	return s;
327}
328
329/* Support code for asymmetrically connected dgram sockets
330 *
331 * If a datagram socket is connected to a socket not itself connected
332 * to the first socket (eg, /dev/log), clients may only enqueue more
333 * messages if the present receive queue of the server socket is not
334 * "too large". This means there's a second writeability condition
335 * poll and sendmsg need to test. The dgram recv code will do a wake
336 * up on the peer_wait wait queue of a socket upon reception of a
337 * datagram which needs to be propagated to sleeping would-be writers
338 * since these might not have sent anything so far. This can't be
339 * accomplished via poll_wait because the lifetime of the server
340 * socket might be less than that of its clients if these break their
341 * association with it or if the server socket is closed while clients
342 * are still connected to it and there's no way to inform "a polling
343 * implementation" that it should let go of a certain wait queue
344 *
345 * In order to propagate a wake up, a wait_queue_t of the client
346 * socket is enqueued on the peer_wait queue of the server socket
347 * whose wake function does a wake_up on the ordinary client socket
348 * wait queue. This connection is established whenever a write (or
349 * poll for write) hit the flow control condition and broken when the
350 * association to the server socket is dissolved or after a wake up
351 * was relayed.
352 */
353
354static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355				      void *key)
356{
357	struct unix_sock *u;
358	wait_queue_head_t *u_sleep;
359
360	u = container_of(q, struct unix_sock, peer_wake);
361
362	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363			    q);
364	u->peer_wake.private = NULL;
365
366	/* relaying can only happen while the wq still exists */
367	u_sleep = sk_sleep(&u->sk);
368	if (u_sleep)
369		wake_up_interruptible_poll(u_sleep, key);
370
371	return 0;
372}
373
374static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375{
376	struct unix_sock *u, *u_other;
377	int rc;
378
379	u = unix_sk(sk);
380	u_other = unix_sk(other);
381	rc = 0;
382	spin_lock(&u_other->peer_wait.lock);
383
384	if (!u->peer_wake.private) {
385		u->peer_wake.private = other;
386		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387
388		rc = 1;
389	}
390
391	spin_unlock(&u_other->peer_wait.lock);
392	return rc;
393}
394
395static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396					    struct sock *other)
397{
398	struct unix_sock *u, *u_other;
399
400	u = unix_sk(sk);
401	u_other = unix_sk(other);
402	spin_lock(&u_other->peer_wait.lock);
403
404	if (u->peer_wake.private == other) {
405		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406		u->peer_wake.private = NULL;
407	}
408
409	spin_unlock(&u_other->peer_wait.lock);
410}
411
412static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413						   struct sock *other)
414{
415	unix_dgram_peer_wake_disconnect(sk, other);
416	wake_up_interruptible_poll(sk_sleep(sk),
417				   POLLOUT |
418				   POLLWRNORM |
419				   POLLWRBAND);
420}
421
422/* preconditions:
423 *	- unix_peer(sk) == other
424 *	- association is stable
425 */
426static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427{
428	int connected;
429
430	connected = unix_dgram_peer_wake_connect(sk, other);
431
432	if (unix_recvq_full(other))
433		return 1;
434
435	if (connected)
436		unix_dgram_peer_wake_disconnect(sk, other);
437
438	return 0;
439}
440
441static int unix_writable(const struct sock *sk)
442{
443	return sk->sk_state != TCP_LISTEN &&
444	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
445}
446
447static void unix_write_space(struct sock *sk)
448{
449	struct socket_wq *wq;
450
451	rcu_read_lock();
452	if (unix_writable(sk)) {
453		wq = rcu_dereference(sk->sk_wq);
454		if (wq_has_sleeper(wq))
455			wake_up_interruptible_sync_poll(&wq->wait,
456				POLLOUT | POLLWRNORM | POLLWRBAND);
457		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
458	}
459	rcu_read_unlock();
460}
461
462/* When dgram socket disconnects (or changes its peer), we clear its receive
463 * queue of packets arrived from previous peer. First, it allows to do
464 * flow control based only on wmem_alloc; second, sk connected to peer
465 * may receive messages only from that peer. */
466static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
467{
468	if (!skb_queue_empty(&sk->sk_receive_queue)) {
469		skb_queue_purge(&sk->sk_receive_queue);
470		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
471
472		/* If one link of bidirectional dgram pipe is disconnected,
473		 * we signal error. Messages are lost. Do not make this,
474		 * when peer was not connected to us.
475		 */
476		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
477			other->sk_err = ECONNRESET;
478			other->sk_error_report(other);
479		}
480	}
481}
482
483static void unix_sock_destructor(struct sock *sk)
484{
485	struct unix_sock *u = unix_sk(sk);
486
487	skb_queue_purge(&sk->sk_receive_queue);
488
489	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
490	WARN_ON(!sk_unhashed(sk));
491	WARN_ON(sk->sk_socket);
492	if (!sock_flag(sk, SOCK_DEAD)) {
493		pr_info("Attempt to release alive unix socket: %p\n", sk);
494		return;
495	}
496
497	if (u->addr)
498		unix_release_addr(u->addr);
499
500	atomic_long_dec(&unix_nr_socks);
501	local_bh_disable();
502	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
503	local_bh_enable();
504#ifdef UNIX_REFCNT_DEBUG
505	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
506		atomic_long_read(&unix_nr_socks));
507#endif
508}
509
510static void unix_release_sock(struct sock *sk, int embrion)
511{
512	struct unix_sock *u = unix_sk(sk);
513	struct path path;
514	struct sock *skpair;
515	struct sk_buff *skb;
516	int state;
517
518	unix_remove_socket(sk);
519
520	/* Clear state */
521	unix_state_lock(sk);
522	sock_orphan(sk);
523	sk->sk_shutdown = SHUTDOWN_MASK;
524	path	     = u->path;
525	u->path.dentry = NULL;
526	u->path.mnt = NULL;
527	state = sk->sk_state;
528	sk->sk_state = TCP_CLOSE;
529	unix_state_unlock(sk);
530
531	wake_up_interruptible_all(&u->peer_wait);
532
533	skpair = unix_peer(sk);
534
535	if (skpair != NULL) {
536		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
537			unix_state_lock(skpair);
538			/* No more writes */
539			skpair->sk_shutdown = SHUTDOWN_MASK;
540			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
541				skpair->sk_err = ECONNRESET;
542			unix_state_unlock(skpair);
543			skpair->sk_state_change(skpair);
544			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
545		}
546
547		unix_dgram_peer_wake_disconnect(sk, skpair);
548		sock_put(skpair); /* It may now die */
549		unix_peer(sk) = NULL;
550	}
551
552	/* Try to flush out this socket. Throw out buffers at least */
553
554	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
555		if (state == TCP_LISTEN)
556			unix_release_sock(skb->sk, 1);
557		/* passed fds are erased in the kfree_skb hook	      */
558		UNIXCB(skb).consumed = skb->len;
559		kfree_skb(skb);
560	}
561
562	if (path.dentry)
563		path_put(&path);
564
565	sock_put(sk);
566
567	/* ---- Socket is dead now and most probably destroyed ---- */
568
569	/*
570	 * Fixme: BSD difference: In BSD all sockets connected to us get
571	 *	  ECONNRESET and we die on the spot. In Linux we behave
572	 *	  like files and pipes do and wait for the last
573	 *	  dereference.
574	 *
575	 * Can't we simply set sock->err?
576	 *
577	 *	  What the above comment does talk about? --ANK(980817)
578	 */
579
580	if (unix_tot_inflight)
581		unix_gc();		/* Garbage collect fds */
582}
583
584static void init_peercred(struct sock *sk)
585{
586	put_pid(sk->sk_peer_pid);
587	if (sk->sk_peer_cred)
588		put_cred(sk->sk_peer_cred);
589	sk->sk_peer_pid  = get_pid(task_tgid(current));
590	sk->sk_peer_cred = get_current_cred();
591}
592
593static void copy_peercred(struct sock *sk, struct sock *peersk)
594{
595	put_pid(sk->sk_peer_pid);
596	if (sk->sk_peer_cred)
597		put_cred(sk->sk_peer_cred);
598	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
599	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
600}
601
602static int unix_listen(struct socket *sock, int backlog)
603{
604	int err;
605	struct sock *sk = sock->sk;
606	struct unix_sock *u = unix_sk(sk);
607	struct pid *old_pid = NULL;
608
609	err = -EOPNOTSUPP;
610	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
611		goto out;	/* Only stream/seqpacket sockets accept */
612	err = -EINVAL;
613	if (!u->addr)
614		goto out;	/* No listens on an unbound socket */
615	unix_state_lock(sk);
616	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
617		goto out_unlock;
618	if (backlog > sk->sk_max_ack_backlog)
619		wake_up_interruptible_all(&u->peer_wait);
620	sk->sk_max_ack_backlog	= backlog;
621	sk->sk_state		= TCP_LISTEN;
622	/* set credentials so connect can copy them */
623	init_peercred(sk);
624	err = 0;
625
626out_unlock:
627	unix_state_unlock(sk);
628	put_pid(old_pid);
629out:
630	return err;
631}
632
633static int unix_release(struct socket *);
634static int unix_bind(struct socket *, struct sockaddr *, int);
635static int unix_stream_connect(struct socket *, struct sockaddr *,
636			       int addr_len, int flags);
637static int unix_socketpair(struct socket *, struct socket *);
638static int unix_accept(struct socket *, struct socket *, int);
639static int unix_getname(struct socket *, struct sockaddr *, int *, int);
640static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
641static unsigned int unix_dgram_poll(struct file *, struct socket *,
642				    poll_table *);
643static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644static int unix_shutdown(struct socket *, int);
645static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648				    size_t size, int flags);
649static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
650				       struct pipe_inode_info *, size_t size,
651				       unsigned int flags);
652static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654static int unix_dgram_connect(struct socket *, struct sockaddr *,
655			      int, int);
656static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
658				  int);
659
660static int unix_set_peek_off(struct sock *sk, int val)
661{
662	struct unix_sock *u = unix_sk(sk);
663
664	if (mutex_lock_interruptible(&u->readlock))
665		return -EINTR;
666
667	sk->sk_peek_off = val;
668	mutex_unlock(&u->readlock);
669
670	return 0;
671}
672
673
674static const struct proto_ops unix_stream_ops = {
675	.family =	PF_UNIX,
676	.owner =	THIS_MODULE,
677	.release =	unix_release,
678	.bind =		unix_bind,
679	.connect =	unix_stream_connect,
680	.socketpair =	unix_socketpair,
681	.accept =	unix_accept,
682	.getname =	unix_getname,
683	.poll =		unix_poll,
684	.ioctl =	unix_ioctl,
685	.listen =	unix_listen,
686	.shutdown =	unix_shutdown,
687	.setsockopt =	sock_no_setsockopt,
688	.getsockopt =	sock_no_getsockopt,
689	.sendmsg =	unix_stream_sendmsg,
690	.recvmsg =	unix_stream_recvmsg,
691	.mmap =		sock_no_mmap,
692	.sendpage =	unix_stream_sendpage,
693	.splice_read =	unix_stream_splice_read,
694	.set_peek_off =	unix_set_peek_off,
695};
696
697static const struct proto_ops unix_dgram_ops = {
698	.family =	PF_UNIX,
699	.owner =	THIS_MODULE,
700	.release =	unix_release,
701	.bind =		unix_bind,
702	.connect =	unix_dgram_connect,
703	.socketpair =	unix_socketpair,
704	.accept =	sock_no_accept,
705	.getname =	unix_getname,
706	.poll =		unix_dgram_poll,
707	.ioctl =	unix_ioctl,
708	.listen =	sock_no_listen,
709	.shutdown =	unix_shutdown,
710	.setsockopt =	sock_no_setsockopt,
711	.getsockopt =	sock_no_getsockopt,
712	.sendmsg =	unix_dgram_sendmsg,
713	.recvmsg =	unix_dgram_recvmsg,
714	.mmap =		sock_no_mmap,
715	.sendpage =	sock_no_sendpage,
716	.set_peek_off =	unix_set_peek_off,
717};
718
719static const struct proto_ops unix_seqpacket_ops = {
720	.family =	PF_UNIX,
721	.owner =	THIS_MODULE,
722	.release =	unix_release,
723	.bind =		unix_bind,
724	.connect =	unix_stream_connect,
725	.socketpair =	unix_socketpair,
726	.accept =	unix_accept,
727	.getname =	unix_getname,
728	.poll =		unix_dgram_poll,
729	.ioctl =	unix_ioctl,
730	.listen =	unix_listen,
731	.shutdown =	unix_shutdown,
732	.setsockopt =	sock_no_setsockopt,
733	.getsockopt =	sock_no_getsockopt,
734	.sendmsg =	unix_seqpacket_sendmsg,
735	.recvmsg =	unix_seqpacket_recvmsg,
736	.mmap =		sock_no_mmap,
737	.sendpage =	sock_no_sendpage,
738	.set_peek_off =	unix_set_peek_off,
739};
740
741static struct proto unix_proto = {
742	.name			= "UNIX",
743	.owner			= THIS_MODULE,
744	.obj_size		= sizeof(struct unix_sock),
745};
746
747/*
748 * AF_UNIX sockets do not interact with hardware, hence they
749 * dont trigger interrupts - so it's safe for them to have
750 * bh-unsafe locking for their sk_receive_queue.lock. Split off
751 * this special lock-class by reinitializing the spinlock key:
752 */
753static struct lock_class_key af_unix_sk_receive_queue_lock_key;
754
755static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
756{
757	struct sock *sk = NULL;
758	struct unix_sock *u;
759
760	atomic_long_inc(&unix_nr_socks);
761	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
762		goto out;
763
764	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
765	if (!sk)
766		goto out;
767
768	sock_init_data(sock, sk);
769	lockdep_set_class(&sk->sk_receive_queue.lock,
770				&af_unix_sk_receive_queue_lock_key);
771
772	sk->sk_write_space	= unix_write_space;
773	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
774	sk->sk_destruct		= unix_sock_destructor;
775	u	  = unix_sk(sk);
776	u->path.dentry = NULL;
777	u->path.mnt = NULL;
778	spin_lock_init(&u->lock);
779	atomic_long_set(&u->inflight, 0);
780	INIT_LIST_HEAD(&u->link);
781	mutex_init(&u->readlock); /* single task reading lock */
782	init_waitqueue_head(&u->peer_wait);
783	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
784	unix_insert_socket(unix_sockets_unbound(sk), sk);
785out:
786	if (sk == NULL)
787		atomic_long_dec(&unix_nr_socks);
788	else {
789		local_bh_disable();
790		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
791		local_bh_enable();
792	}
793	return sk;
794}
795
796static int unix_create(struct net *net, struct socket *sock, int protocol,
797		       int kern)
798{
799	if (protocol && protocol != PF_UNIX)
800		return -EPROTONOSUPPORT;
801
802	sock->state = SS_UNCONNECTED;
803
804	switch (sock->type) {
805	case SOCK_STREAM:
806		sock->ops = &unix_stream_ops;
807		break;
808		/*
809		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
810		 *	nothing uses it.
811		 */
812	case SOCK_RAW:
813		sock->type = SOCK_DGRAM;
814	case SOCK_DGRAM:
815		sock->ops = &unix_dgram_ops;
816		break;
817	case SOCK_SEQPACKET:
818		sock->ops = &unix_seqpacket_ops;
819		break;
820	default:
821		return -ESOCKTNOSUPPORT;
822	}
823
824	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
825}
826
827static int unix_release(struct socket *sock)
828{
829	struct sock *sk = sock->sk;
830
831	if (!sk)
832		return 0;
833
834	unix_release_sock(sk, 0);
835	sock->sk = NULL;
836
837	return 0;
838}
839
840static int unix_autobind(struct socket *sock)
841{
842	struct sock *sk = sock->sk;
843	struct net *net = sock_net(sk);
844	struct unix_sock *u = unix_sk(sk);
845	static u32 ordernum = 1;
846	struct unix_address *addr;
847	int err;
848	unsigned int retries = 0;
849
850	err = mutex_lock_interruptible(&u->readlock);
851	if (err)
852		return err;
853
854	err = 0;
855	if (u->addr)
856		goto out;
857
858	err = -ENOMEM;
859	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
860	if (!addr)
861		goto out;
862
863	addr->name->sun_family = AF_UNIX;
864	atomic_set(&addr->refcnt, 1);
865
866retry:
867	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
868	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
869
870	spin_lock(&unix_table_lock);
871	ordernum = (ordernum+1)&0xFFFFF;
872
873	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
874				      addr->hash)) {
875		spin_unlock(&unix_table_lock);
876		/*
877		 * __unix_find_socket_byname() may take long time if many names
878		 * are already in use.
879		 */
880		cond_resched();
881		/* Give up if all names seems to be in use. */
882		if (retries++ == 0xFFFFF) {
883			err = -ENOSPC;
884			kfree(addr);
885			goto out;
886		}
887		goto retry;
888	}
889	addr->hash ^= sk->sk_type;
890
891	__unix_remove_socket(sk);
892	u->addr = addr;
893	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
894	spin_unlock(&unix_table_lock);
895	err = 0;
896
897out:	mutex_unlock(&u->readlock);
898	return err;
899}
900
901static struct sock *unix_find_other(struct net *net,
902				    struct sockaddr_un *sunname, int len,
903				    int type, unsigned int hash, int *error)
904{
905	struct sock *u;
906	struct path path;
907	int err = 0;
908
909	if (sunname->sun_path[0]) {
910		struct inode *inode;
911		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
912		if (err)
913			goto fail;
914		inode = d_backing_inode(path.dentry);
915		err = inode_permission(inode, MAY_WRITE);
916		if (err)
917			goto put_fail;
918
919		err = -ECONNREFUSED;
920		if (!S_ISSOCK(inode->i_mode))
921			goto put_fail;
922		u = unix_find_socket_byinode(inode);
923		if (!u)
924			goto put_fail;
925
926		if (u->sk_type == type)
927			touch_atime(&path);
928
929		path_put(&path);
930
931		err = -EPROTOTYPE;
932		if (u->sk_type != type) {
933			sock_put(u);
934			goto fail;
935		}
936	} else {
937		err = -ECONNREFUSED;
938		u = unix_find_socket_byname(net, sunname, len, type, hash);
939		if (u) {
940			struct dentry *dentry;
941			dentry = unix_sk(u)->path.dentry;
942			if (dentry)
943				touch_atime(&unix_sk(u)->path);
944		} else
945			goto fail;
946	}
947	return u;
948
949put_fail:
950	path_put(&path);
951fail:
952	*error = err;
953	return NULL;
954}
955
956static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
957		      struct path *res)
958{
959	int err;
960
961	err = security_path_mknod(path, dentry, mode, 0);
962	if (!err) {
963		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
964		if (!err) {
965			res->mnt = mntget(path->mnt);
966			res->dentry = dget(dentry);
967		}
968	}
969
970	return err;
971}
972
973static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
974{
975	struct sock *sk = sock->sk;
976	struct net *net = sock_net(sk);
977	struct unix_sock *u = unix_sk(sk);
978	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
979	char *sun_path = sunaddr->sun_path;
980	int err, name_err;
981	unsigned int hash;
982	struct unix_address *addr;
983	struct hlist_head *list;
984	struct path path;
985	struct dentry *dentry;
986
987	err = -EINVAL;
988	if (sunaddr->sun_family != AF_UNIX)
989		goto out;
990
991	if (addr_len == sizeof(short)) {
992		err = unix_autobind(sock);
993		goto out;
994	}
995
996	err = unix_mkname(sunaddr, addr_len, &hash);
997	if (err < 0)
998		goto out;
999	addr_len = err;
1000
1001	name_err = 0;
1002	dentry = NULL;
1003	if (sun_path[0]) {
1004		/* Get the parent directory, calculate the hash for last
1005		 * component.
1006		 */
1007		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1008
1009		if (IS_ERR(dentry)) {
1010			/* delay report until after 'already bound' check */
1011			name_err = PTR_ERR(dentry);
1012			dentry = NULL;
1013		}
1014	}
1015
1016	err = mutex_lock_interruptible(&u->readlock);
1017	if (err)
1018		goto out_path;
1019
1020	err = -EINVAL;
1021	if (u->addr)
1022		goto out_up;
1023
1024	if (name_err) {
1025		err = name_err == -EEXIST ? -EADDRINUSE : name_err;
1026		goto out_up;
1027	}
1028
1029	err = -ENOMEM;
1030	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1031	if (!addr)
1032		goto out_up;
1033
1034	memcpy(addr->name, sunaddr, addr_len);
1035	addr->len = addr_len;
1036	addr->hash = hash ^ sk->sk_type;
1037	atomic_set(&addr->refcnt, 1);
1038
1039	if (dentry) {
1040		struct path u_path;
1041		umode_t mode = S_IFSOCK |
1042		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1043		err = unix_mknod(dentry, &path, mode, &u_path);
1044		if (err) {
1045			if (err == -EEXIST)
1046				err = -EADDRINUSE;
1047			unix_release_addr(addr);
1048			goto out_up;
1049		}
1050		addr->hash = UNIX_HASH_SIZE;
1051		hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1052		spin_lock(&unix_table_lock);
1053		u->path = u_path;
1054		list = &unix_socket_table[hash];
1055	} else {
1056		spin_lock(&unix_table_lock);
1057		err = -EADDRINUSE;
1058		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1059					      sk->sk_type, hash)) {
1060			unix_release_addr(addr);
1061			goto out_unlock;
1062		}
1063
1064		list = &unix_socket_table[addr->hash];
1065	}
1066
1067	err = 0;
1068	__unix_remove_socket(sk);
1069	u->addr = addr;
1070	__unix_insert_socket(list, sk);
1071
1072out_unlock:
1073	spin_unlock(&unix_table_lock);
1074out_up:
1075	mutex_unlock(&u->readlock);
1076out_path:
1077	if (dentry)
1078		done_path_create(&path, dentry);
1079
1080out:
1081	return err;
1082}
1083
1084static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085{
1086	if (unlikely(sk1 == sk2) || !sk2) {
1087		unix_state_lock(sk1);
1088		return;
1089	}
1090	if (sk1 < sk2) {
1091		unix_state_lock(sk1);
1092		unix_state_lock_nested(sk2);
1093	} else {
1094		unix_state_lock(sk2);
1095		unix_state_lock_nested(sk1);
1096	}
1097}
1098
1099static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100{
1101	if (unlikely(sk1 == sk2) || !sk2) {
1102		unix_state_unlock(sk1);
1103		return;
1104	}
1105	unix_state_unlock(sk1);
1106	unix_state_unlock(sk2);
1107}
1108
1109static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110			      int alen, int flags)
1111{
1112	struct sock *sk = sock->sk;
1113	struct net *net = sock_net(sk);
1114	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115	struct sock *other;
1116	unsigned int hash;
1117	int err;
1118
1119	if (addr->sa_family != AF_UNSPEC) {
1120		err = unix_mkname(sunaddr, alen, &hash);
1121		if (err < 0)
1122			goto out;
1123		alen = err;
1124
1125		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1126		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1127			goto out;
1128
1129restart:
1130		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1131		if (!other)
1132			goto out;
1133
1134		unix_state_double_lock(sk, other);
1135
1136		/* Apparently VFS overslept socket death. Retry. */
1137		if (sock_flag(other, SOCK_DEAD)) {
1138			unix_state_double_unlock(sk, other);
1139			sock_put(other);
1140			goto restart;
1141		}
1142
1143		err = -EPERM;
1144		if (!unix_may_send(sk, other))
1145			goto out_unlock;
1146
1147		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1148		if (err)
1149			goto out_unlock;
1150
1151	} else {
1152		/*
1153		 *	1003.1g breaking connected state with AF_UNSPEC
1154		 */
1155		other = NULL;
1156		unix_state_double_lock(sk, other);
1157	}
1158
1159	/*
1160	 * If it was connected, reconnect.
1161	 */
1162	if (unix_peer(sk)) {
1163		struct sock *old_peer = unix_peer(sk);
1164		unix_peer(sk) = other;
1165		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1166
1167		unix_state_double_unlock(sk, other);
1168
1169		if (other != old_peer)
1170			unix_dgram_disconnected(sk, old_peer);
1171		sock_put(old_peer);
1172	} else {
1173		unix_peer(sk) = other;
1174		unix_state_double_unlock(sk, other);
1175	}
1176	return 0;
1177
1178out_unlock:
1179	unix_state_double_unlock(sk, other);
1180	sock_put(other);
1181out:
1182	return err;
1183}
1184
1185static long unix_wait_for_peer(struct sock *other, long timeo)
1186{
1187	struct unix_sock *u = unix_sk(other);
1188	int sched;
1189	DEFINE_WAIT(wait);
1190
1191	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1192
1193	sched = !sock_flag(other, SOCK_DEAD) &&
1194		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1195		unix_recvq_full(other);
1196
1197	unix_state_unlock(other);
1198
1199	if (sched)
1200		timeo = schedule_timeout(timeo);
1201
1202	finish_wait(&u->peer_wait, &wait);
1203	return timeo;
1204}
1205
1206static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1207			       int addr_len, int flags)
1208{
1209	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1210	struct sock *sk = sock->sk;
1211	struct net *net = sock_net(sk);
1212	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1213	struct sock *newsk = NULL;
1214	struct sock *other = NULL;
1215	struct sk_buff *skb = NULL;
1216	unsigned int hash;
1217	int st;
1218	int err;
1219	long timeo;
1220
1221	err = unix_mkname(sunaddr, addr_len, &hash);
1222	if (err < 0)
1223		goto out;
1224	addr_len = err;
1225
1226	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1227	    (err = unix_autobind(sock)) != 0)
1228		goto out;
1229
1230	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1231
1232	/* First of all allocate resources.
1233	   If we will make it after state is locked,
1234	   we will have to recheck all again in any case.
1235	 */
1236
1237	err = -ENOMEM;
1238
1239	/* create new sock for complete connection */
1240	newsk = unix_create1(sock_net(sk), NULL, 0);
1241	if (newsk == NULL)
1242		goto out;
1243
1244	/* Allocate skb for sending to listening sock */
1245	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1246	if (skb == NULL)
1247		goto out;
1248
1249restart:
1250	/*  Find listening sock. */
1251	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1252	if (!other)
1253		goto out;
1254
1255	/* Latch state of peer */
1256	unix_state_lock(other);
1257
1258	/* Apparently VFS overslept socket death. Retry. */
1259	if (sock_flag(other, SOCK_DEAD)) {
1260		unix_state_unlock(other);
1261		sock_put(other);
1262		goto restart;
1263	}
1264
1265	err = -ECONNREFUSED;
1266	if (other->sk_state != TCP_LISTEN)
1267		goto out_unlock;
1268	if (other->sk_shutdown & RCV_SHUTDOWN)
1269		goto out_unlock;
1270
1271	if (unix_recvq_full(other)) {
1272		err = -EAGAIN;
1273		if (!timeo)
1274			goto out_unlock;
1275
1276		timeo = unix_wait_for_peer(other, timeo);
1277
1278		err = sock_intr_errno(timeo);
1279		if (signal_pending(current))
1280			goto out;
1281		sock_put(other);
1282		goto restart;
1283	}
1284
1285	/* Latch our state.
1286
1287	   It is tricky place. We need to grab our state lock and cannot
1288	   drop lock on peer. It is dangerous because deadlock is
1289	   possible. Connect to self case and simultaneous
1290	   attempt to connect are eliminated by checking socket
1291	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1292	   check this before attempt to grab lock.
1293
1294	   Well, and we have to recheck the state after socket locked.
1295	 */
1296	st = sk->sk_state;
1297
1298	switch (st) {
1299	case TCP_CLOSE:
1300		/* This is ok... continue with connect */
1301		break;
1302	case TCP_ESTABLISHED:
1303		/* Socket is already connected */
1304		err = -EISCONN;
1305		goto out_unlock;
1306	default:
1307		err = -EINVAL;
1308		goto out_unlock;
1309	}
1310
1311	unix_state_lock_nested(sk);
1312
1313	if (sk->sk_state != st) {
1314		unix_state_unlock(sk);
1315		unix_state_unlock(other);
1316		sock_put(other);
1317		goto restart;
1318	}
1319
1320	err = security_unix_stream_connect(sk, other, newsk);
1321	if (err) {
1322		unix_state_unlock(sk);
1323		goto out_unlock;
1324	}
1325
1326	/* The way is open! Fastly set all the necessary fields... */
1327
1328	sock_hold(sk);
1329	unix_peer(newsk)	= sk;
1330	newsk->sk_state		= TCP_ESTABLISHED;
1331	newsk->sk_type		= sk->sk_type;
1332	init_peercred(newsk);
1333	newu = unix_sk(newsk);
1334	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1335	otheru = unix_sk(other);
1336
1337	/* copy address information from listening to new sock*/
1338	if (otheru->addr) {
1339		atomic_inc(&otheru->addr->refcnt);
1340		newu->addr = otheru->addr;
1341	}
1342	if (otheru->path.dentry) {
1343		path_get(&otheru->path);
1344		newu->path = otheru->path;
1345	}
1346
1347	/* Set credentials */
1348	copy_peercred(sk, other);
1349
1350	sock->state	= SS_CONNECTED;
1351	sk->sk_state	= TCP_ESTABLISHED;
1352	sock_hold(newsk);
1353
1354	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1355	unix_peer(sk)	= newsk;
1356
1357	unix_state_unlock(sk);
1358
1359	/* take ten and and send info to listening sock */
1360	spin_lock(&other->sk_receive_queue.lock);
1361	__skb_queue_tail(&other->sk_receive_queue, skb);
1362	spin_unlock(&other->sk_receive_queue.lock);
1363	unix_state_unlock(other);
1364	other->sk_data_ready(other);
1365	sock_put(other);
1366	return 0;
1367
1368out_unlock:
1369	if (other)
1370		unix_state_unlock(other);
1371
1372out:
1373	kfree_skb(skb);
1374	if (newsk)
1375		unix_release_sock(newsk, 0);
1376	if (other)
1377		sock_put(other);
1378	return err;
1379}
1380
1381static int unix_socketpair(struct socket *socka, struct socket *sockb)
1382{
1383	struct sock *ska = socka->sk, *skb = sockb->sk;
1384
1385	/* Join our sockets back to back */
1386	sock_hold(ska);
1387	sock_hold(skb);
1388	unix_peer(ska) = skb;
1389	unix_peer(skb) = ska;
1390	init_peercred(ska);
1391	init_peercred(skb);
1392
1393	if (ska->sk_type != SOCK_DGRAM) {
1394		ska->sk_state = TCP_ESTABLISHED;
1395		skb->sk_state = TCP_ESTABLISHED;
1396		socka->state  = SS_CONNECTED;
1397		sockb->state  = SS_CONNECTED;
1398	}
1399	return 0;
1400}
1401
1402static void unix_sock_inherit_flags(const struct socket *old,
1403				    struct socket *new)
1404{
1405	if (test_bit(SOCK_PASSCRED, &old->flags))
1406		set_bit(SOCK_PASSCRED, &new->flags);
1407	if (test_bit(SOCK_PASSSEC, &old->flags))
1408		set_bit(SOCK_PASSSEC, &new->flags);
1409}
1410
1411static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1412{
1413	struct sock *sk = sock->sk;
1414	struct sock *tsk;
1415	struct sk_buff *skb;
1416	int err;
1417
1418	err = -EOPNOTSUPP;
1419	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1420		goto out;
1421
1422	err = -EINVAL;
1423	if (sk->sk_state != TCP_LISTEN)
1424		goto out;
1425
1426	/* If socket state is TCP_LISTEN it cannot change (for now...),
1427	 * so that no locks are necessary.
1428	 */
1429
1430	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1431	if (!skb) {
1432		/* This means receive shutdown. */
1433		if (err == 0)
1434			err = -EINVAL;
1435		goto out;
1436	}
1437
1438	tsk = skb->sk;
1439	skb_free_datagram(sk, skb);
1440	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1441
1442	/* attach accepted sock to socket */
1443	unix_state_lock(tsk);
1444	newsock->state = SS_CONNECTED;
1445	unix_sock_inherit_flags(sock, newsock);
1446	sock_graft(tsk, newsock);
1447	unix_state_unlock(tsk);
1448	return 0;
1449
1450out:
1451	return err;
1452}
1453
1454
1455static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1456{
1457	struct sock *sk = sock->sk;
1458	struct unix_sock *u;
1459	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1460	int err = 0;
1461
1462	if (peer) {
1463		sk = unix_peer_get(sk);
1464
1465		err = -ENOTCONN;
1466		if (!sk)
1467			goto out;
1468		err = 0;
1469	} else {
1470		sock_hold(sk);
1471	}
1472
1473	u = unix_sk(sk);
1474	unix_state_lock(sk);
1475	if (!u->addr) {
1476		sunaddr->sun_family = AF_UNIX;
1477		sunaddr->sun_path[0] = 0;
1478		*uaddr_len = sizeof(short);
1479	} else {
1480		struct unix_address *addr = u->addr;
1481
1482		*uaddr_len = addr->len;
1483		memcpy(sunaddr, addr->name, *uaddr_len);
1484	}
1485	unix_state_unlock(sk);
1486	sock_put(sk);
1487out:
1488	return err;
1489}
1490
1491static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1492{
1493	int i;
1494
1495	scm->fp = UNIXCB(skb).fp;
1496	UNIXCB(skb).fp = NULL;
1497
1498	for (i = scm->fp->count-1; i >= 0; i--)
1499		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500}
1501
1502static void unix_destruct_scm(struct sk_buff *skb)
1503{
1504	struct scm_cookie scm;
1505	memset(&scm, 0, sizeof(scm));
1506	scm.pid  = UNIXCB(skb).pid;
1507	if (UNIXCB(skb).fp)
1508		unix_detach_fds(&scm, skb);
1509
1510	/* Alas, it calls VFS */
1511	/* So fscking what? fput() had been SMP-safe since the last Summer */
1512	scm_destroy(&scm);
1513	sock_wfree(skb);
1514}
1515
1516/*
1517 * The "user->unix_inflight" variable is protected by the garbage
1518 * collection lock, and we just read it locklessly here. If you go
1519 * over the limit, there might be a tiny race in actually noticing
1520 * it across threads. Tough.
1521 */
1522static inline bool too_many_unix_fds(struct task_struct *p)
1523{
1524	struct user_struct *user = current_user();
1525
1526	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528	return false;
1529}
1530
1531#define MAX_RECURSION_LEVEL 4
1532
1533static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534{
1535	int i;
1536	unsigned char max_level = 0;
1537	int unix_sock_count = 0;
1538
1539	if (too_many_unix_fds(current))
1540		return -ETOOMANYREFS;
1541
1542	for (i = scm->fp->count - 1; i >= 0; i--) {
1543		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1544
1545		if (sk) {
1546			unix_sock_count++;
1547			max_level = max(max_level,
1548					unix_sk(sk)->recursion_level);
1549		}
1550	}
1551	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1552		return -ETOOMANYREFS;
1553
1554	/*
1555	 * Need to duplicate file references for the sake of garbage
1556	 * collection.  Otherwise a socket in the fps might become a
1557	 * candidate for GC while the skb is not yet queued.
1558	 */
1559	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1560	if (!UNIXCB(skb).fp)
1561		return -ENOMEM;
1562
1563	for (i = scm->fp->count - 1; i >= 0; i--)
1564		unix_inflight(scm->fp->user, scm->fp->fp[i]);
1565	return max_level;
1566}
1567
1568static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1569{
1570	int err = 0;
1571
1572	UNIXCB(skb).pid  = get_pid(scm->pid);
1573	UNIXCB(skb).uid = scm->creds.uid;
1574	UNIXCB(skb).gid = scm->creds.gid;
1575	UNIXCB(skb).fp = NULL;
1576	unix_get_secdata(scm, skb);
1577	if (scm->fp && send_fds)
1578		err = unix_attach_fds(scm, skb);
1579
1580	skb->destructor = unix_destruct_scm;
1581	return err;
1582}
1583
1584static bool unix_passcred_enabled(const struct socket *sock,
1585				  const struct sock *other)
1586{
1587	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1588	       !other->sk_socket ||
1589	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1590}
1591
1592/*
1593 * Some apps rely on write() giving SCM_CREDENTIALS
1594 * We include credentials if source or destination socket
1595 * asserted SOCK_PASSCRED.
1596 */
1597static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1598			    const struct sock *other)
1599{
1600	if (UNIXCB(skb).pid)
1601		return;
1602	if (unix_passcred_enabled(sock, other)) {
1603		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1604		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1605	}
1606}
1607
1608static int maybe_init_creds(struct scm_cookie *scm,
1609			    struct socket *socket,
1610			    const struct sock *other)
1611{
1612	int err;
1613	struct msghdr msg = { .msg_controllen = 0 };
1614
1615	err = scm_send(socket, &msg, scm, false);
1616	if (err)
1617		return err;
1618
1619	if (unix_passcred_enabled(socket, other)) {
1620		scm->pid = get_pid(task_tgid(current));
1621		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1622	}
1623	return err;
1624}
1625
1626static bool unix_skb_scm_eq(struct sk_buff *skb,
1627			    struct scm_cookie *scm)
1628{
1629	const struct unix_skb_parms *u = &UNIXCB(skb);
1630
1631	return u->pid == scm->pid &&
1632	       uid_eq(u->uid, scm->creds.uid) &&
1633	       gid_eq(u->gid, scm->creds.gid) &&
1634	       unix_secdata_eq(scm, skb);
1635}
1636
1637/*
1638 *	Send AF_UNIX data.
1639 */
1640
1641static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1642			      size_t len)
1643{
1644	struct sock *sk = sock->sk;
1645	struct net *net = sock_net(sk);
1646	struct unix_sock *u = unix_sk(sk);
1647	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1648	struct sock *other = NULL;
1649	int namelen = 0; /* fake GCC */
1650	int err;
1651	unsigned int hash;
1652	struct sk_buff *skb;
1653	long timeo;
1654	struct scm_cookie scm;
1655	int max_level;
1656	int data_len = 0;
1657	int sk_locked;
1658
1659	wait_for_unix_gc();
1660	err = scm_send(sock, msg, &scm, false);
1661	if (err < 0)
1662		return err;
1663
1664	err = -EOPNOTSUPP;
1665	if (msg->msg_flags&MSG_OOB)
1666		goto out;
1667
1668	if (msg->msg_namelen) {
1669		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1670		if (err < 0)
1671			goto out;
1672		namelen = err;
1673	} else {
1674		sunaddr = NULL;
1675		err = -ENOTCONN;
1676		other = unix_peer_get(sk);
1677		if (!other)
1678			goto out;
1679	}
1680
1681	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1682	    && (err = unix_autobind(sock)) != 0)
1683		goto out;
1684
1685	err = -EMSGSIZE;
1686	if (len > sk->sk_sndbuf - 32)
1687		goto out;
1688
1689	if (len > SKB_MAX_ALLOC) {
1690		data_len = min_t(size_t,
1691				 len - SKB_MAX_ALLOC,
1692				 MAX_SKB_FRAGS * PAGE_SIZE);
1693		data_len = PAGE_ALIGN(data_len);
1694
1695		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1696	}
1697
1698	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1699				   msg->msg_flags & MSG_DONTWAIT, &err,
1700				   PAGE_ALLOC_COSTLY_ORDER);
1701	if (skb == NULL)
1702		goto out;
1703
1704	err = unix_scm_to_skb(&scm, skb, true);
1705	if (err < 0)
1706		goto out_free;
1707	max_level = err + 1;
1708
1709	skb_put(skb, len - data_len);
1710	skb->data_len = data_len;
1711	skb->len = len;
1712	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1713	if (err)
1714		goto out_free;
1715
1716	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1717
1718restart:
1719	if (!other) {
1720		err = -ECONNRESET;
1721		if (sunaddr == NULL)
1722			goto out_free;
1723
1724		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1725					hash, &err);
1726		if (other == NULL)
1727			goto out_free;
1728	}
1729
1730	if (sk_filter(other, skb) < 0) {
1731		/* Toss the packet but do not return any error to the sender */
1732		err = len;
1733		goto out_free;
1734	}
1735
1736	sk_locked = 0;
1737	unix_state_lock(other);
1738restart_locked:
1739	err = -EPERM;
1740	if (!unix_may_send(sk, other))
1741		goto out_unlock;
1742
1743	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1744		/*
1745		 *	Check with 1003.1g - what should
1746		 *	datagram error
1747		 */
1748		unix_state_unlock(other);
1749		sock_put(other);
1750
1751		if (!sk_locked)
1752			unix_state_lock(sk);
1753
1754		err = 0;
1755		if (unix_peer(sk) == other) {
1756			unix_peer(sk) = NULL;
1757			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1758
1759			unix_state_unlock(sk);
1760
1761			unix_dgram_disconnected(sk, other);
1762			sock_put(other);
1763			err = -ECONNREFUSED;
1764		} else {
1765			unix_state_unlock(sk);
1766		}
1767
1768		other = NULL;
1769		if (err)
1770			goto out_free;
1771		goto restart;
1772	}
1773
1774	err = -EPIPE;
1775	if (other->sk_shutdown & RCV_SHUTDOWN)
1776		goto out_unlock;
1777
1778	if (sk->sk_type != SOCK_SEQPACKET) {
1779		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1780		if (err)
1781			goto out_unlock;
1782	}
1783
1784	/* other == sk && unix_peer(other) != sk if
1785	 * - unix_peer(sk) == NULL, destination address bound to sk
1786	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1787	 */
1788	if (other != sk &&
1789	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1790		if (timeo) {
1791			timeo = unix_wait_for_peer(other, timeo);
1792
1793			err = sock_intr_errno(timeo);
1794			if (signal_pending(current))
1795				goto out_free;
1796
1797			goto restart;
1798		}
1799
1800		if (!sk_locked) {
1801			unix_state_unlock(other);
1802			unix_state_double_lock(sk, other);
1803		}
1804
1805		if (unix_peer(sk) != other ||
1806		    unix_dgram_peer_wake_me(sk, other)) {
1807			err = -EAGAIN;
1808			sk_locked = 1;
1809			goto out_unlock;
1810		}
1811
1812		if (!sk_locked) {
1813			sk_locked = 1;
1814			goto restart_locked;
1815		}
1816	}
1817
1818	if (unlikely(sk_locked))
1819		unix_state_unlock(sk);
1820
1821	if (sock_flag(other, SOCK_RCVTSTAMP))
1822		__net_timestamp(skb);
1823	maybe_add_creds(skb, sock, other);
1824	skb_queue_tail(&other->sk_receive_queue, skb);
1825	if (max_level > unix_sk(other)->recursion_level)
1826		unix_sk(other)->recursion_level = max_level;
1827	unix_state_unlock(other);
1828	other->sk_data_ready(other);
1829	sock_put(other);
1830	scm_destroy(&scm);
1831	return len;
1832
1833out_unlock:
1834	if (sk_locked)
1835		unix_state_unlock(sk);
1836	unix_state_unlock(other);
1837out_free:
1838	kfree_skb(skb);
1839out:
1840	if (other)
1841		sock_put(other);
1842	scm_destroy(&scm);
1843	return err;
1844}
1845
1846/* We use paged skbs for stream sockets, and limit occupancy to 32768
1847 * bytes, and a minimun of a full page.
1848 */
1849#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1850
1851static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1852			       size_t len)
1853{
1854	struct sock *sk = sock->sk;
1855	struct sock *other = NULL;
1856	int err, size;
1857	struct sk_buff *skb;
1858	int sent = 0;
1859	struct scm_cookie scm;
1860	bool fds_sent = false;
1861	int max_level;
1862	int data_len;
1863
1864	wait_for_unix_gc();
1865	err = scm_send(sock, msg, &scm, false);
1866	if (err < 0)
1867		return err;
1868
1869	err = -EOPNOTSUPP;
1870	if (msg->msg_flags&MSG_OOB)
1871		goto out_err;
1872
1873	if (msg->msg_namelen) {
1874		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1875		goto out_err;
1876	} else {
1877		err = -ENOTCONN;
1878		other = unix_peer(sk);
1879		if (!other)
1880			goto out_err;
1881	}
1882
1883	if (sk->sk_shutdown & SEND_SHUTDOWN)
1884		goto pipe_err;
1885
1886	while (sent < len) {
1887		size = len - sent;
1888
1889		/* Keep two messages in the pipe so it schedules better */
1890		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1891
1892		/* allow fallback to order-0 allocations */
1893		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1894
1895		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1896
1897		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1898
1899		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1900					   msg->msg_flags & MSG_DONTWAIT, &err,
1901					   get_order(UNIX_SKB_FRAGS_SZ));
1902		if (!skb)
1903			goto out_err;
1904
1905		/* Only send the fds in the first buffer */
1906		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1907		if (err < 0) {
1908			kfree_skb(skb);
1909			goto out_err;
1910		}
1911		max_level = err + 1;
1912		fds_sent = true;
1913
1914		skb_put(skb, size - data_len);
1915		skb->data_len = data_len;
1916		skb->len = size;
1917		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1918		if (err) {
1919			kfree_skb(skb);
1920			goto out_err;
1921		}
1922
1923		unix_state_lock(other);
1924
1925		if (sock_flag(other, SOCK_DEAD) ||
1926		    (other->sk_shutdown & RCV_SHUTDOWN))
1927			goto pipe_err_free;
1928
1929		maybe_add_creds(skb, sock, other);
1930		skb_queue_tail(&other->sk_receive_queue, skb);
1931		if (max_level > unix_sk(other)->recursion_level)
1932			unix_sk(other)->recursion_level = max_level;
1933		unix_state_unlock(other);
1934		other->sk_data_ready(other);
1935		sent += size;
1936	}
1937
1938	scm_destroy(&scm);
1939
1940	return sent;
1941
1942pipe_err_free:
1943	unix_state_unlock(other);
1944	kfree_skb(skb);
1945pipe_err:
1946	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1947		send_sig(SIGPIPE, current, 0);
1948	err = -EPIPE;
1949out_err:
1950	scm_destroy(&scm);
1951	return sent ? : err;
1952}
1953
1954static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1955				    int offset, size_t size, int flags)
1956{
1957	int err;
1958	bool send_sigpipe = false;
1959	bool init_scm = true;
1960	struct scm_cookie scm;
1961	struct sock *other, *sk = socket->sk;
1962	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1963
1964	if (flags & MSG_OOB)
1965		return -EOPNOTSUPP;
1966
1967	other = unix_peer(sk);
1968	if (!other || sk->sk_state != TCP_ESTABLISHED)
1969		return -ENOTCONN;
1970
1971	if (false) {
1972alloc_skb:
1973		unix_state_unlock(other);
1974		mutex_unlock(&unix_sk(other)->readlock);
1975		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1976					      &err, 0);
1977		if (!newskb)
1978			goto err;
1979	}
1980
1981	/* we must acquire readlock as we modify already present
1982	 * skbs in the sk_receive_queue and mess with skb->len
1983	 */
1984	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1985	if (err) {
1986		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1987		goto err;
1988	}
1989
1990	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1991		err = -EPIPE;
1992		send_sigpipe = true;
1993		goto err_unlock;
1994	}
1995
1996	unix_state_lock(other);
1997
1998	if (sock_flag(other, SOCK_DEAD) ||
1999	    other->sk_shutdown & RCV_SHUTDOWN) {
2000		err = -EPIPE;
2001		send_sigpipe = true;
2002		goto err_state_unlock;
2003	}
2004
2005	if (init_scm) {
2006		err = maybe_init_creds(&scm, socket, other);
2007		if (err)
2008			goto err_state_unlock;
2009		init_scm = false;
2010	}
2011
2012	skb = skb_peek_tail(&other->sk_receive_queue);
2013	if (tail && tail == skb) {
2014		skb = newskb;
2015	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2016		if (newskb) {
2017			skb = newskb;
2018		} else {
2019			tail = skb;
2020			goto alloc_skb;
2021		}
2022	} else if (newskb) {
2023		/* this is fast path, we don't necessarily need to
2024		 * call to kfree_skb even though with newskb == NULL
2025		 * this - does no harm
2026		 */
2027		consume_skb(newskb);
2028		newskb = NULL;
2029	}
2030
2031	if (skb_append_pagefrags(skb, page, offset, size)) {
2032		tail = skb;
2033		goto alloc_skb;
2034	}
2035
2036	skb->len += size;
2037	skb->data_len += size;
2038	skb->truesize += size;
2039	atomic_add(size, &sk->sk_wmem_alloc);
2040
2041	if (newskb) {
2042		err = unix_scm_to_skb(&scm, skb, false);
2043		if (err)
2044			goto err_state_unlock;
2045		spin_lock(&other->sk_receive_queue.lock);
2046		__skb_queue_tail(&other->sk_receive_queue, newskb);
2047		spin_unlock(&other->sk_receive_queue.lock);
2048	}
2049
2050	unix_state_unlock(other);
2051	mutex_unlock(&unix_sk(other)->readlock);
2052
2053	other->sk_data_ready(other);
2054	scm_destroy(&scm);
2055	return size;
2056
2057err_state_unlock:
2058	unix_state_unlock(other);
2059err_unlock:
2060	mutex_unlock(&unix_sk(other)->readlock);
2061err:
2062	kfree_skb(newskb);
2063	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2064		send_sig(SIGPIPE, current, 0);
2065	if (!init_scm)
2066		scm_destroy(&scm);
2067	return err;
2068}
2069
2070static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2071				  size_t len)
2072{
2073	int err;
2074	struct sock *sk = sock->sk;
2075
2076	err = sock_error(sk);
2077	if (err)
2078		return err;
2079
2080	if (sk->sk_state != TCP_ESTABLISHED)
2081		return -ENOTCONN;
2082
2083	if (msg->msg_namelen)
2084		msg->msg_namelen = 0;
2085
2086	return unix_dgram_sendmsg(sock, msg, len);
2087}
2088
2089static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2090				  size_t size, int flags)
2091{
2092	struct sock *sk = sock->sk;
2093
2094	if (sk->sk_state != TCP_ESTABLISHED)
2095		return -ENOTCONN;
2096
2097	return unix_dgram_recvmsg(sock, msg, size, flags);
2098}
2099
2100static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2101{
2102	struct unix_sock *u = unix_sk(sk);
2103
2104	if (u->addr) {
2105		msg->msg_namelen = u->addr->len;
2106		memcpy(msg->msg_name, u->addr->name, u->addr->len);
2107	}
2108}
2109
2110static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2111			      size_t size, int flags)
2112{
2113	struct scm_cookie scm;
2114	struct sock *sk = sock->sk;
2115	struct unix_sock *u = unix_sk(sk);
2116	int noblock = flags & MSG_DONTWAIT;
2117	struct sk_buff *skb;
2118	int err;
2119	int peeked, skip;
2120
2121	err = -EOPNOTSUPP;
2122	if (flags&MSG_OOB)
2123		goto out;
2124
2125	err = mutex_lock_interruptible(&u->readlock);
2126	if (unlikely(err)) {
2127		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
2128		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2129		 */
2130		err = noblock ? -EAGAIN : -ERESTARTSYS;
2131		goto out;
2132	}
2133
2134	skip = sk_peek_offset(sk, flags);
2135
2136	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2137	if (!skb) {
2138		unix_state_lock(sk);
2139		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2140		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2141		    (sk->sk_shutdown & RCV_SHUTDOWN))
2142			err = 0;
2143		unix_state_unlock(sk);
2144		goto out_unlock;
2145	}
2146
2147	wake_up_interruptible_sync_poll(&u->peer_wait,
2148					POLLOUT | POLLWRNORM | POLLWRBAND);
2149
2150	if (msg->msg_name)
2151		unix_copy_addr(msg, skb->sk);
2152
2153	if (size > skb->len - skip)
2154		size = skb->len - skip;
2155	else if (size < skb->len - skip)
2156		msg->msg_flags |= MSG_TRUNC;
2157
2158	err = skb_copy_datagram_msg(skb, skip, msg, size);
2159	if (err)
2160		goto out_free;
2161
2162	if (sock_flag(sk, SOCK_RCVTSTAMP))
2163		__sock_recv_timestamp(msg, sk, skb);
2164
2165	memset(&scm, 0, sizeof(scm));
2166
2167	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2168	unix_set_secdata(&scm, skb);
2169
2170	if (!(flags & MSG_PEEK)) {
2171		if (UNIXCB(skb).fp)
2172			unix_detach_fds(&scm, skb);
2173
2174		sk_peek_offset_bwd(sk, skb->len);
2175	} else {
2176		/* It is questionable: on PEEK we could:
2177		   - do not return fds - good, but too simple 8)
2178		   - return fds, and do not return them on read (old strategy,
2179		     apparently wrong)
2180		   - clone fds (I chose it for now, it is the most universal
2181		     solution)
2182
2183		   POSIX 1003.1g does not actually define this clearly
2184		   at all. POSIX 1003.1g doesn't define a lot of things
2185		   clearly however!
2186
2187		*/
2188
2189		sk_peek_offset_fwd(sk, size);
2190
2191		if (UNIXCB(skb).fp)
2192			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2193	}
2194	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2195
2196	scm_recv(sock, msg, &scm, flags);
2197
2198out_free:
2199	skb_free_datagram(sk, skb);
2200out_unlock:
2201	mutex_unlock(&u->readlock);
2202out:
2203	return err;
2204}
2205
2206/*
2207 *	Sleep until more data has arrived. But check for races..
2208 */
2209static long unix_stream_data_wait(struct sock *sk, long timeo,
2210				  struct sk_buff *last, unsigned int last_len)
2211{
2212	struct sk_buff *tail;
2213	DEFINE_WAIT(wait);
2214
2215	unix_state_lock(sk);
2216
2217	for (;;) {
2218		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2219
2220		tail = skb_peek_tail(&sk->sk_receive_queue);
2221		if (tail != last ||
2222		    (tail && tail->len != last_len) ||
2223		    sk->sk_err ||
2224		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2225		    signal_pending(current) ||
2226		    !timeo)
2227			break;
2228
2229		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2230		unix_state_unlock(sk);
2231		timeo = freezable_schedule_timeout(timeo);
2232		unix_state_lock(sk);
2233
2234		if (sock_flag(sk, SOCK_DEAD))
2235			break;
2236
2237		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2238	}
2239
2240	finish_wait(sk_sleep(sk), &wait);
2241	unix_state_unlock(sk);
2242	return timeo;
2243}
2244
2245static unsigned int unix_skb_len(const struct sk_buff *skb)
2246{
2247	return skb->len - UNIXCB(skb).consumed;
2248}
2249
2250struct unix_stream_read_state {
2251	int (*recv_actor)(struct sk_buff *, int, int,
2252			  struct unix_stream_read_state *);
2253	struct socket *socket;
2254	struct msghdr *msg;
2255	struct pipe_inode_info *pipe;
2256	size_t size;
2257	int flags;
2258	unsigned int splice_flags;
2259};
2260
2261static int unix_stream_read_generic(struct unix_stream_read_state *state)
2262{
2263	struct scm_cookie scm;
2264	struct socket *sock = state->socket;
2265	struct sock *sk = sock->sk;
2266	struct unix_sock *u = unix_sk(sk);
2267	int copied = 0;
2268	int flags = state->flags;
2269	int noblock = flags & MSG_DONTWAIT;
2270	bool check_creds = false;
2271	int target;
2272	int err = 0;
2273	long timeo;
2274	int skip;
2275	size_t size = state->size;
2276	unsigned int last_len;
2277
2278	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2279		err = -EINVAL;
2280		goto out;
2281	}
2282
2283	if (unlikely(flags & MSG_OOB)) {
2284		err = -EOPNOTSUPP;
2285		goto out;
2286	}
2287
2288	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2289	timeo = sock_rcvtimeo(sk, noblock);
2290
2291	memset(&scm, 0, sizeof(scm));
2292
2293	/* Lock the socket to prevent queue disordering
2294	 * while sleeps in memcpy_tomsg
2295	 */
2296	mutex_lock(&u->readlock);
2297
2298	if (flags & MSG_PEEK)
2299		skip = sk_peek_offset(sk, flags);
2300	else
2301		skip = 0;
2302
2303	do {
2304		int chunk;
2305		bool drop_skb;
2306		struct sk_buff *skb, *last;
2307
2308		unix_state_lock(sk);
2309		if (sock_flag(sk, SOCK_DEAD)) {
2310			err = -ECONNRESET;
2311			goto unlock;
2312		}
2313		last = skb = skb_peek(&sk->sk_receive_queue);
2314		last_len = last ? last->len : 0;
2315again:
2316		if (skb == NULL) {
2317			unix_sk(sk)->recursion_level = 0;
2318			if (copied >= target)
2319				goto unlock;
2320
2321			/*
2322			 *	POSIX 1003.1g mandates this order.
2323			 */
2324
2325			err = sock_error(sk);
2326			if (err)
2327				goto unlock;
2328			if (sk->sk_shutdown & RCV_SHUTDOWN)
2329				goto unlock;
2330
2331			unix_state_unlock(sk);
2332			if (!timeo) {
2333				err = -EAGAIN;
2334				break;
2335			}
2336
2337			mutex_unlock(&u->readlock);
2338
2339			timeo = unix_stream_data_wait(sk, timeo, last,
2340						      last_len);
2341
2342			if (signal_pending(current)) {
2343				err = sock_intr_errno(timeo);
2344				scm_destroy(&scm);
2345				goto out;
2346			}
2347
2348			mutex_lock(&u->readlock);
2349			continue;
2350unlock:
2351			unix_state_unlock(sk);
2352			break;
2353		}
2354
2355		while (skip >= unix_skb_len(skb)) {
2356			skip -= unix_skb_len(skb);
2357			last = skb;
2358			last_len = skb->len;
2359			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2360			if (!skb)
2361				goto again;
2362		}
2363
2364		unix_state_unlock(sk);
2365
2366		if (check_creds) {
2367			/* Never glue messages from different writers */
2368			if (!unix_skb_scm_eq(skb, &scm))
2369				break;
2370		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2371			/* Copy credentials */
2372			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2373			unix_set_secdata(&scm, skb);
2374			check_creds = true;
2375		}
2376
2377		/* Copy address just once */
2378		if (state->msg && state->msg->msg_name) {
2379			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2380					 state->msg->msg_name);
2381			unix_copy_addr(state->msg, skb->sk);
2382			sunaddr = NULL;
2383		}
2384
2385		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2386		skb_get(skb);
2387		chunk = state->recv_actor(skb, skip, chunk, state);
2388		drop_skb = !unix_skb_len(skb);
2389		/* skb is only safe to use if !drop_skb */
2390		consume_skb(skb);
2391		if (chunk < 0) {
2392			if (copied == 0)
2393				copied = -EFAULT;
2394			break;
2395		}
2396		copied += chunk;
2397		size -= chunk;
2398
2399		if (drop_skb) {
2400			/* the skb was touched by a concurrent reader;
2401			 * we should not expect anything from this skb
2402			 * anymore and assume it invalid - we can be
2403			 * sure it was dropped from the socket queue
2404			 *
2405			 * let's report a short read
2406			 */
2407			err = 0;
2408			break;
2409		}
2410
2411		/* Mark read part of skb as used */
2412		if (!(flags & MSG_PEEK)) {
2413			UNIXCB(skb).consumed += chunk;
2414
2415			sk_peek_offset_bwd(sk, chunk);
2416
2417			if (UNIXCB(skb).fp)
2418				unix_detach_fds(&scm, skb);
2419
2420			if (unix_skb_len(skb))
2421				break;
2422
2423			skb_unlink(skb, &sk->sk_receive_queue);
2424			consume_skb(skb);
2425
2426			if (scm.fp)
2427				break;
2428		} else {
2429			/* It is questionable, see note in unix_dgram_recvmsg.
2430			 */
2431			if (UNIXCB(skb).fp)
2432				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2433
2434			sk_peek_offset_fwd(sk, chunk);
2435
2436			if (UNIXCB(skb).fp)
2437				break;
2438
2439			skip = 0;
2440			last = skb;
2441			last_len = skb->len;
2442			unix_state_lock(sk);
2443			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2444			if (skb)
2445				goto again;
2446			unix_state_unlock(sk);
2447			break;
2448		}
2449	} while (size);
2450
2451	mutex_unlock(&u->readlock);
2452	if (state->msg)
2453		scm_recv(sock, state->msg, &scm, flags);
2454	else
2455		scm_destroy(&scm);
2456out:
2457	return copied ? : err;
2458}
2459
2460static int unix_stream_read_actor(struct sk_buff *skb,
2461				  int skip, int chunk,
2462				  struct unix_stream_read_state *state)
2463{
2464	int ret;
2465
2466	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2467				    state->msg, chunk);
2468	return ret ?: chunk;
2469}
2470
2471static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2472			       size_t size, int flags)
2473{
2474	struct unix_stream_read_state state = {
2475		.recv_actor = unix_stream_read_actor,
2476		.socket = sock,
2477		.msg = msg,
2478		.size = size,
2479		.flags = flags
2480	};
2481
2482	return unix_stream_read_generic(&state);
2483}
2484
2485static ssize_t skb_unix_socket_splice(struct sock *sk,
2486				      struct pipe_inode_info *pipe,
2487				      struct splice_pipe_desc *spd)
2488{
2489	int ret;
2490	struct unix_sock *u = unix_sk(sk);
2491
2492	mutex_unlock(&u->readlock);
2493	ret = splice_to_pipe(pipe, spd);
2494	mutex_lock(&u->readlock);
2495
2496	return ret;
2497}
2498
2499static int unix_stream_splice_actor(struct sk_buff *skb,
2500				    int skip, int chunk,
2501				    struct unix_stream_read_state *state)
2502{
2503	return skb_splice_bits(skb, state->socket->sk,
2504			       UNIXCB(skb).consumed + skip,
2505			       state->pipe, chunk, state->splice_flags,
2506			       skb_unix_socket_splice);
2507}
2508
2509static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2510				       struct pipe_inode_info *pipe,
2511				       size_t size, unsigned int flags)
2512{
2513	struct unix_stream_read_state state = {
2514		.recv_actor = unix_stream_splice_actor,
2515		.socket = sock,
2516		.pipe = pipe,
2517		.size = size,
2518		.splice_flags = flags,
2519	};
2520
2521	if (unlikely(*ppos))
2522		return -ESPIPE;
2523
2524	if (sock->file->f_flags & O_NONBLOCK ||
2525	    flags & SPLICE_F_NONBLOCK)
2526		state.flags = MSG_DONTWAIT;
2527
2528	return unix_stream_read_generic(&state);
2529}
2530
2531static int unix_shutdown(struct socket *sock, int mode)
2532{
2533	struct sock *sk = sock->sk;
2534	struct sock *other;
2535
2536	if (mode < SHUT_RD || mode > SHUT_RDWR)
2537		return -EINVAL;
2538	/* This maps:
2539	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2540	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2541	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2542	 */
2543	++mode;
2544
2545	unix_state_lock(sk);
2546	sk->sk_shutdown |= mode;
2547	other = unix_peer(sk);
2548	if (other)
2549		sock_hold(other);
2550	unix_state_unlock(sk);
2551	sk->sk_state_change(sk);
2552
2553	if (other &&
2554		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2555
2556		int peer_mode = 0;
2557
2558		if (mode&RCV_SHUTDOWN)
2559			peer_mode |= SEND_SHUTDOWN;
2560		if (mode&SEND_SHUTDOWN)
2561			peer_mode |= RCV_SHUTDOWN;
2562		unix_state_lock(other);
2563		other->sk_shutdown |= peer_mode;
2564		unix_state_unlock(other);
2565		other->sk_state_change(other);
2566		if (peer_mode == SHUTDOWN_MASK)
2567			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2568		else if (peer_mode & RCV_SHUTDOWN)
2569			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2570	}
2571	if (other)
2572		sock_put(other);
2573
2574	return 0;
2575}
2576
2577long unix_inq_len(struct sock *sk)
2578{
2579	struct sk_buff *skb;
2580	long amount = 0;
2581
2582	if (sk->sk_state == TCP_LISTEN)
2583		return -EINVAL;
2584
2585	spin_lock(&sk->sk_receive_queue.lock);
2586	if (sk->sk_type == SOCK_STREAM ||
2587	    sk->sk_type == SOCK_SEQPACKET) {
2588		skb_queue_walk(&sk->sk_receive_queue, skb)
2589			amount += unix_skb_len(skb);
2590	} else {
2591		skb = skb_peek(&sk->sk_receive_queue);
2592		if (skb)
2593			amount = skb->len;
2594	}
2595	spin_unlock(&sk->sk_receive_queue.lock);
2596
2597	return amount;
2598}
2599EXPORT_SYMBOL_GPL(unix_inq_len);
2600
2601long unix_outq_len(struct sock *sk)
2602{
2603	return sk_wmem_alloc_get(sk);
2604}
2605EXPORT_SYMBOL_GPL(unix_outq_len);
2606
2607static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2608{
2609	struct sock *sk = sock->sk;
2610	long amount = 0;
2611	int err;
2612
2613	switch (cmd) {
2614	case SIOCOUTQ:
2615		amount = unix_outq_len(sk);
2616		err = put_user(amount, (int __user *)arg);
2617		break;
2618	case SIOCINQ:
2619		amount = unix_inq_len(sk);
2620		if (amount < 0)
2621			err = amount;
2622		else
2623			err = put_user(amount, (int __user *)arg);
2624		break;
2625	default:
2626		err = -ENOIOCTLCMD;
2627		break;
2628	}
2629	return err;
2630}
2631
2632static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2633{
2634	struct sock *sk = sock->sk;
2635	unsigned int mask;
2636
2637	sock_poll_wait(file, sk_sleep(sk), wait);
2638	mask = 0;
2639
2640	/* exceptional events? */
2641	if (sk->sk_err)
2642		mask |= POLLERR;
2643	if (sk->sk_shutdown == SHUTDOWN_MASK)
2644		mask |= POLLHUP;
2645	if (sk->sk_shutdown & RCV_SHUTDOWN)
2646		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2647
2648	/* readable? */
2649	if (!skb_queue_empty(&sk->sk_receive_queue))
2650		mask |= POLLIN | POLLRDNORM;
2651
2652	/* Connection-based need to check for termination and startup */
2653	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2654	    sk->sk_state == TCP_CLOSE)
2655		mask |= POLLHUP;
2656
2657	/*
2658	 * we set writable also when the other side has shut down the
2659	 * connection. This prevents stuck sockets.
2660	 */
2661	if (unix_writable(sk))
2662		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2663
2664	return mask;
2665}
2666
2667static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2668				    poll_table *wait)
2669{
2670	struct sock *sk = sock->sk, *other;
2671	unsigned int mask, writable;
2672
2673	sock_poll_wait(file, sk_sleep(sk), wait);
2674	mask = 0;
2675
2676	/* exceptional events? */
2677	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2678		mask |= POLLERR |
2679			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2680
2681	if (sk->sk_shutdown & RCV_SHUTDOWN)
2682		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2683	if (sk->sk_shutdown == SHUTDOWN_MASK)
2684		mask |= POLLHUP;
2685
2686	/* readable? */
2687	if (!skb_queue_empty(&sk->sk_receive_queue))
2688		mask |= POLLIN | POLLRDNORM;
2689
2690	/* Connection-based need to check for termination and startup */
2691	if (sk->sk_type == SOCK_SEQPACKET) {
2692		if (sk->sk_state == TCP_CLOSE)
2693			mask |= POLLHUP;
2694		/* connection hasn't started yet? */
2695		if (sk->sk_state == TCP_SYN_SENT)
2696			return mask;
2697	}
2698
2699	/* No write status requested, avoid expensive OUT tests. */
2700	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2701		return mask;
2702
2703	writable = unix_writable(sk);
2704	if (writable) {
2705		unix_state_lock(sk);
2706
2707		other = unix_peer(sk);
2708		if (other && unix_peer(other) != sk &&
2709		    unix_recvq_full(other) &&
2710		    unix_dgram_peer_wake_me(sk, other))
2711			writable = 0;
2712
2713		unix_state_unlock(sk);
2714	}
2715
2716	if (writable)
2717		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2718	else
2719		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2720
2721	return mask;
2722}
2723
2724#ifdef CONFIG_PROC_FS
2725
2726#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2727
2728#define get_bucket(x) ((x) >> BUCKET_SPACE)
2729#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2730#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2731
2732static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2733{
2734	unsigned long offset = get_offset(*pos);
2735	unsigned long bucket = get_bucket(*pos);
2736	struct sock *sk;
2737	unsigned long count = 0;
2738
2739	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2740		if (sock_net(sk) != seq_file_net(seq))
2741			continue;
2742		if (++count == offset)
2743			break;
2744	}
2745
2746	return sk;
2747}
2748
2749static struct sock *unix_next_socket(struct seq_file *seq,
2750				     struct sock *sk,
2751				     loff_t *pos)
2752{
2753	unsigned long bucket;
2754
2755	while (sk > (struct sock *)SEQ_START_TOKEN) {
2756		sk = sk_next(sk);
2757		if (!sk)
2758			goto next_bucket;
2759		if (sock_net(sk) == seq_file_net(seq))
2760			return sk;
2761	}
2762
2763	do {
2764		sk = unix_from_bucket(seq, pos);
2765		if (sk)
2766			return sk;
2767
2768next_bucket:
2769		bucket = get_bucket(*pos) + 1;
2770		*pos = set_bucket_offset(bucket, 1);
2771	} while (bucket < ARRAY_SIZE(unix_socket_table));
2772
2773	return NULL;
2774}
2775
2776static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2777	__acquires(unix_table_lock)
2778{
2779	spin_lock(&unix_table_lock);
2780
2781	if (!*pos)
2782		return SEQ_START_TOKEN;
2783
2784	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2785		return NULL;
2786
2787	return unix_next_socket(seq, NULL, pos);
2788}
2789
2790static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2791{
2792	++*pos;
2793	return unix_next_socket(seq, v, pos);
2794}
2795
2796static void unix_seq_stop(struct seq_file *seq, void *v)
2797	__releases(unix_table_lock)
2798{
2799	spin_unlock(&unix_table_lock);
2800}
2801
2802static int unix_seq_show(struct seq_file *seq, void *v)
2803{
2804
2805	if (v == SEQ_START_TOKEN)
2806		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2807			 "Inode Path\n");
2808	else {
2809		struct sock *s = v;
2810		struct unix_sock *u = unix_sk(s);
2811		unix_state_lock(s);
2812
2813		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2814			s,
2815			atomic_read(&s->sk_refcnt),
2816			0,
2817			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2818			s->sk_type,
2819			s->sk_socket ?
2820			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2821			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2822			sock_i_ino(s));
2823
2824		if (u->addr) {
2825			int i, len;
2826			seq_putc(seq, ' ');
2827
2828			i = 0;
2829			len = u->addr->len - sizeof(short);
2830			if (!UNIX_ABSTRACT(s))
2831				len--;
2832			else {
2833				seq_putc(seq, '@');
2834				i++;
2835			}
2836			for ( ; i < len; i++)
2837				seq_putc(seq, u->addr->name->sun_path[i]);
2838		}
2839		unix_state_unlock(s);
2840		seq_putc(seq, '\n');
2841	}
2842
2843	return 0;
2844}
2845
2846static const struct seq_operations unix_seq_ops = {
2847	.start  = unix_seq_start,
2848	.next   = unix_seq_next,
2849	.stop   = unix_seq_stop,
2850	.show   = unix_seq_show,
2851};
2852
2853static int unix_seq_open(struct inode *inode, struct file *file)
2854{
2855	return seq_open_net(inode, file, &unix_seq_ops,
2856			    sizeof(struct seq_net_private));
2857}
2858
2859static const struct file_operations unix_seq_fops = {
2860	.owner		= THIS_MODULE,
2861	.open		= unix_seq_open,
2862	.read		= seq_read,
2863	.llseek		= seq_lseek,
2864	.release	= seq_release_net,
2865};
2866
2867#endif
2868
2869static const struct net_proto_family unix_family_ops = {
2870	.family = PF_UNIX,
2871	.create = unix_create,
2872	.owner	= THIS_MODULE,
2873};
2874
2875
2876static int __net_init unix_net_init(struct net *net)
2877{
2878	int error = -ENOMEM;
2879
2880	net->unx.sysctl_max_dgram_qlen = 10;
2881	if (unix_sysctl_register(net))
2882		goto out;
2883
2884#ifdef CONFIG_PROC_FS
2885	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2886		unix_sysctl_unregister(net);
2887		goto out;
2888	}
2889#endif
2890	error = 0;
2891out:
2892	return error;
2893}
2894
2895static void __net_exit unix_net_exit(struct net *net)
2896{
2897	unix_sysctl_unregister(net);
2898	remove_proc_entry("unix", net->proc_net);
2899}
2900
2901static struct pernet_operations unix_net_ops = {
2902	.init = unix_net_init,
2903	.exit = unix_net_exit,
2904};
2905
2906static int __init af_unix_init(void)
2907{
2908	int rc = -1;
2909
2910	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2911
2912	rc = proto_register(&unix_proto, 1);
2913	if (rc != 0) {
2914		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2915		goto out;
2916	}
2917
2918	sock_register(&unix_family_ops);
2919	register_pernet_subsys(&unix_net_ops);
2920out:
2921	return rc;
2922}
2923
2924static void __exit af_unix_exit(void)
2925{
2926	sock_unregister(PF_UNIX);
2927	proto_unregister(&unix_proto);
2928	unregister_pernet_subsys(&unix_net_ops);
2929}
2930
2931/* Earlier than device_initcall() so that other drivers invoking
2932   request_module() don't end up in a loop when modprobe tries
2933   to use a UNIX socket. But later than subsys_initcall() because
2934   we depend on stuff initialised there */
2935fs_initcall(af_unix_init);
2936module_exit(af_unix_exit);
2937
2938MODULE_LICENSE("GPL");
2939MODULE_ALIAS_NETPROTO(PF_UNIX);
2940