1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120 
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
126 
127 
unix_sockets_unbound(void * addr)128 static struct hlist_head *unix_sockets_unbound(void *addr)
129 {
130 	unsigned long hash = (unsigned long)addr;
131 
132 	hash ^= hash >> 16;
133 	hash ^= hash >> 8;
134 	hash %= UNIX_HASH_SIZE;
135 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 }
137 
138 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139 
140 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
144 }
145 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147 {
148 	scm->secid = *UNIXSID(skb);
149 }
150 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)151 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)154 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 #endif /* CONFIG_SECURITY_NETWORK */
157 
158 /*
159  *  SMP locking strategy:
160  *    hash table is protected with spinlock unix_table_lock
161  *    each socket state is protected by separate spin lock.
162  */
163 
unix_hash_fold(__wsum n)164 static inline unsigned int unix_hash_fold(__wsum n)
165 {
166 	unsigned int hash = (__force unsigned int)csum_fold(n);
167 
168 	hash ^= hash>>8;
169 	return hash&(UNIX_HASH_SIZE-1);
170 }
171 
172 #define unix_peer(sk) (unix_sk(sk)->peer)
173 
unix_our_peer(struct sock * sk,struct sock * osk)174 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
175 {
176 	return unix_peer(osk) == sk;
177 }
178 
unix_may_send(struct sock * sk,struct sock * osk)179 static inline int unix_may_send(struct sock *sk, struct sock *osk)
180 {
181 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
182 }
183 
unix_recvq_full(struct sock const * sk)184 static inline int unix_recvq_full(struct sock const *sk)
185 {
186 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
187 }
188 
unix_peer_get(struct sock * s)189 struct sock *unix_peer_get(struct sock *s)
190 {
191 	struct sock *peer;
192 
193 	unix_state_lock(s);
194 	peer = unix_peer(s);
195 	if (peer)
196 		sock_hold(peer);
197 	unix_state_unlock(s);
198 	return peer;
199 }
200 EXPORT_SYMBOL_GPL(unix_peer_get);
201 
unix_release_addr(struct unix_address * addr)202 static inline void unix_release_addr(struct unix_address *addr)
203 {
204 	if (atomic_dec_and_test(&addr->refcnt))
205 		kfree(addr);
206 }
207 
208 /*
209  *	Check unix socket name:
210  *		- should be not zero length.
211  *	        - if started by not zero, should be NULL terminated (FS object)
212  *		- if started by zero, it is abstract name.
213  */
214 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned int * hashp)215 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
216 {
217 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
218 		return -EINVAL;
219 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
220 		return -EINVAL;
221 	if (sunaddr->sun_path[0]) {
222 		/*
223 		 * This may look like an off by one error but it is a bit more
224 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
225 		 * sun_path[108] doesn't as such exist.  However in kernel space
226 		 * we are guaranteed that it is a valid memory location in our
227 		 * kernel address buffer.
228 		 */
229 		((char *)sunaddr)[len] = 0;
230 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
231 		return len;
232 	}
233 
234 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
235 	return len;
236 }
237 
__unix_remove_socket(struct sock * sk)238 static void __unix_remove_socket(struct sock *sk)
239 {
240 	sk_del_node_init(sk);
241 }
242 
__unix_insert_socket(struct hlist_head * list,struct sock * sk)243 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
244 {
245 	WARN_ON(!sk_unhashed(sk));
246 	sk_add_node(sk, list);
247 }
248 
unix_remove_socket(struct sock * sk)249 static inline void unix_remove_socket(struct sock *sk)
250 {
251 	spin_lock(&unix_table_lock);
252 	__unix_remove_socket(sk);
253 	spin_unlock(&unix_table_lock);
254 }
255 
unix_insert_socket(struct hlist_head * list,struct sock * sk)256 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
257 {
258 	spin_lock(&unix_table_lock);
259 	__unix_insert_socket(list, sk);
260 	spin_unlock(&unix_table_lock);
261 }
262 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)263 static struct sock *__unix_find_socket_byname(struct net *net,
264 					      struct sockaddr_un *sunname,
265 					      int len, int type, unsigned int hash)
266 {
267 	struct sock *s;
268 
269 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
270 		struct unix_sock *u = unix_sk(s);
271 
272 		if (!net_eq(sock_net(s), net))
273 			continue;
274 
275 		if (u->addr->len == len &&
276 		    !memcmp(u->addr->name, sunname, len))
277 			goto found;
278 	}
279 	s = NULL;
280 found:
281 	return s;
282 }
283 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash)284 static inline struct sock *unix_find_socket_byname(struct net *net,
285 						   struct sockaddr_un *sunname,
286 						   int len, int type,
287 						   unsigned int hash)
288 {
289 	struct sock *s;
290 
291 	spin_lock(&unix_table_lock);
292 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
293 	if (s)
294 		sock_hold(s);
295 	spin_unlock(&unix_table_lock);
296 	return s;
297 }
298 
unix_find_socket_byinode(struct inode * i)299 static struct sock *unix_find_socket_byinode(struct inode *i)
300 {
301 	struct sock *s;
302 
303 	spin_lock(&unix_table_lock);
304 	sk_for_each(s,
305 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
306 		struct dentry *dentry = unix_sk(s)->path.dentry;
307 
308 		if (dentry && d_backing_inode(dentry) == i) {
309 			sock_hold(s);
310 			goto found;
311 		}
312 	}
313 	s = NULL;
314 found:
315 	spin_unlock(&unix_table_lock);
316 	return s;
317 }
318 
319 /* Support code for asymmetrically connected dgram sockets
320  *
321  * If a datagram socket is connected to a socket not itself connected
322  * to the first socket (eg, /dev/log), clients may only enqueue more
323  * messages if the present receive queue of the server socket is not
324  * "too large". This means there's a second writeability condition
325  * poll and sendmsg need to test. The dgram recv code will do a wake
326  * up on the peer_wait wait queue of a socket upon reception of a
327  * datagram which needs to be propagated to sleeping would-be writers
328  * since these might not have sent anything so far. This can't be
329  * accomplished via poll_wait because the lifetime of the server
330  * socket might be less than that of its clients if these break their
331  * association with it or if the server socket is closed while clients
332  * are still connected to it and there's no way to inform "a polling
333  * implementation" that it should let go of a certain wait queue
334  *
335  * In order to propagate a wake up, a wait_queue_t of the client
336  * socket is enqueued on the peer_wait queue of the server socket
337  * whose wake function does a wake_up on the ordinary client socket
338  * wait queue. This connection is established whenever a write (or
339  * poll for write) hit the flow control condition and broken when the
340  * association to the server socket is dissolved or after a wake up
341  * was relayed.
342  */
343 
unix_dgram_peer_wake_relay(wait_queue_t * q,unsigned mode,int flags,void * key)344 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
345 				      void *key)
346 {
347 	struct unix_sock *u;
348 	wait_queue_head_t *u_sleep;
349 
350 	u = container_of(q, struct unix_sock, peer_wake);
351 
352 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
353 			    q);
354 	u->peer_wake.private = NULL;
355 
356 	/* relaying can only happen while the wq still exists */
357 	u_sleep = sk_sleep(&u->sk);
358 	if (u_sleep)
359 		wake_up_interruptible_poll(u_sleep, key);
360 
361 	return 0;
362 }
363 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)364 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
365 {
366 	struct unix_sock *u, *u_other;
367 	int rc;
368 
369 	u = unix_sk(sk);
370 	u_other = unix_sk(other);
371 	rc = 0;
372 	spin_lock(&u_other->peer_wait.lock);
373 
374 	if (!u->peer_wake.private) {
375 		u->peer_wake.private = other;
376 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
377 
378 		rc = 1;
379 	}
380 
381 	spin_unlock(&u_other->peer_wait.lock);
382 	return rc;
383 }
384 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)385 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
386 					    struct sock *other)
387 {
388 	struct unix_sock *u, *u_other;
389 
390 	u = unix_sk(sk);
391 	u_other = unix_sk(other);
392 	spin_lock(&u_other->peer_wait.lock);
393 
394 	if (u->peer_wake.private == other) {
395 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
396 		u->peer_wake.private = NULL;
397 	}
398 
399 	spin_unlock(&u_other->peer_wait.lock);
400 }
401 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)402 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
403 						   struct sock *other)
404 {
405 	unix_dgram_peer_wake_disconnect(sk, other);
406 	wake_up_interruptible_poll(sk_sleep(sk),
407 				   POLLOUT |
408 				   POLLWRNORM |
409 				   POLLWRBAND);
410 }
411 
412 /* preconditions:
413  *	- unix_peer(sk) == other
414  *	- association is stable
415  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)416 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
417 {
418 	int connected;
419 
420 	connected = unix_dgram_peer_wake_connect(sk, other);
421 
422 	if (unix_recvq_full(other))
423 		return 1;
424 
425 	if (connected)
426 		unix_dgram_peer_wake_disconnect(sk, other);
427 
428 	return 0;
429 }
430 
unix_writable(struct sock * sk)431 static inline int unix_writable(struct sock *sk)
432 {
433 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
434 }
435 
unix_write_space(struct sock * sk)436 static void unix_write_space(struct sock *sk)
437 {
438 	struct socket_wq *wq;
439 
440 	rcu_read_lock();
441 	if (unix_writable(sk)) {
442 		wq = rcu_dereference(sk->sk_wq);
443 		if (wq_has_sleeper(wq))
444 			wake_up_interruptible_sync_poll(&wq->wait,
445 				POLLOUT | POLLWRNORM | POLLWRBAND);
446 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
447 	}
448 	rcu_read_unlock();
449 }
450 
451 /* When dgram socket disconnects (or changes its peer), we clear its receive
452  * queue of packets arrived from previous peer. First, it allows to do
453  * flow control based only on wmem_alloc; second, sk connected to peer
454  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)455 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
456 {
457 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
458 		skb_queue_purge(&sk->sk_receive_queue);
459 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
460 
461 		/* If one link of bidirectional dgram pipe is disconnected,
462 		 * we signal error. Messages are lost. Do not make this,
463 		 * when peer was not connected to us.
464 		 */
465 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
466 			other->sk_err = ECONNRESET;
467 			other->sk_error_report(other);
468 		}
469 	}
470 }
471 
unix_sock_destructor(struct sock * sk)472 static void unix_sock_destructor(struct sock *sk)
473 {
474 	struct unix_sock *u = unix_sk(sk);
475 
476 	skb_queue_purge(&sk->sk_receive_queue);
477 
478 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
479 	WARN_ON(!sk_unhashed(sk));
480 	WARN_ON(sk->sk_socket);
481 	if (!sock_flag(sk, SOCK_DEAD)) {
482 		pr_info("Attempt to release alive unix socket: %p\n", sk);
483 		return;
484 	}
485 
486 	if (u->addr)
487 		unix_release_addr(u->addr);
488 
489 	atomic_long_dec(&unix_nr_socks);
490 	local_bh_disable();
491 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
492 	local_bh_enable();
493 #ifdef UNIX_REFCNT_DEBUG
494 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
495 		atomic_long_read(&unix_nr_socks));
496 #endif
497 }
498 
unix_release_sock(struct sock * sk,int embrion)499 static void unix_release_sock(struct sock *sk, int embrion)
500 {
501 	struct unix_sock *u = unix_sk(sk);
502 	struct path path;
503 	struct sock *skpair;
504 	struct sk_buff *skb;
505 	int state;
506 
507 	unix_remove_socket(sk);
508 
509 	/* Clear state */
510 	unix_state_lock(sk);
511 	sock_orphan(sk);
512 	sk->sk_shutdown = SHUTDOWN_MASK;
513 	path	     = u->path;
514 	u->path.dentry = NULL;
515 	u->path.mnt = NULL;
516 	state = sk->sk_state;
517 	sk->sk_state = TCP_CLOSE;
518 	unix_state_unlock(sk);
519 
520 	wake_up_interruptible_all(&u->peer_wait);
521 
522 	skpair = unix_peer(sk);
523 
524 	if (skpair != NULL) {
525 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
526 			unix_state_lock(skpair);
527 			/* No more writes */
528 			skpair->sk_shutdown = SHUTDOWN_MASK;
529 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
530 				skpair->sk_err = ECONNRESET;
531 			unix_state_unlock(skpair);
532 			skpair->sk_state_change(skpair);
533 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
534 		}
535 
536 		unix_dgram_peer_wake_disconnect(sk, skpair);
537 		sock_put(skpair); /* It may now die */
538 		unix_peer(sk) = NULL;
539 	}
540 
541 	/* Try to flush out this socket. Throw out buffers at least */
542 
543 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
544 		if (state == TCP_LISTEN)
545 			unix_release_sock(skb->sk, 1);
546 		/* passed fds are erased in the kfree_skb hook	      */
547 		kfree_skb(skb);
548 	}
549 
550 	if (path.dentry)
551 		path_put(&path);
552 
553 	sock_put(sk);
554 
555 	/* ---- Socket is dead now and most probably destroyed ---- */
556 
557 	/*
558 	 * Fixme: BSD difference: In BSD all sockets connected to us get
559 	 *	  ECONNRESET and we die on the spot. In Linux we behave
560 	 *	  like files and pipes do and wait for the last
561 	 *	  dereference.
562 	 *
563 	 * Can't we simply set sock->err?
564 	 *
565 	 *	  What the above comment does talk about? --ANK(980817)
566 	 */
567 
568 	if (unix_tot_inflight)
569 		unix_gc();		/* Garbage collect fds */
570 }
571 
init_peercred(struct sock * sk)572 static void init_peercred(struct sock *sk)
573 {
574 	put_pid(sk->sk_peer_pid);
575 	if (sk->sk_peer_cred)
576 		put_cred(sk->sk_peer_cred);
577 	sk->sk_peer_pid  = get_pid(task_tgid(current));
578 	sk->sk_peer_cred = get_current_cred();
579 }
580 
copy_peercred(struct sock * sk,struct sock * peersk)581 static void copy_peercred(struct sock *sk, struct sock *peersk)
582 {
583 	put_pid(sk->sk_peer_pid);
584 	if (sk->sk_peer_cred)
585 		put_cred(sk->sk_peer_cred);
586 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
587 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
588 }
589 
unix_listen(struct socket * sock,int backlog)590 static int unix_listen(struct socket *sock, int backlog)
591 {
592 	int err;
593 	struct sock *sk = sock->sk;
594 	struct unix_sock *u = unix_sk(sk);
595 	struct pid *old_pid = NULL;
596 
597 	err = -EOPNOTSUPP;
598 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
599 		goto out;	/* Only stream/seqpacket sockets accept */
600 	err = -EINVAL;
601 	if (!u->addr)
602 		goto out;	/* No listens on an unbound socket */
603 	unix_state_lock(sk);
604 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
605 		goto out_unlock;
606 	if (backlog > sk->sk_max_ack_backlog)
607 		wake_up_interruptible_all(&u->peer_wait);
608 	sk->sk_max_ack_backlog	= backlog;
609 	sk->sk_state		= TCP_LISTEN;
610 	/* set credentials so connect can copy them */
611 	init_peercred(sk);
612 	err = 0;
613 
614 out_unlock:
615 	unix_state_unlock(sk);
616 	put_pid(old_pid);
617 out:
618 	return err;
619 }
620 
621 static int unix_release(struct socket *);
622 static int unix_bind(struct socket *, struct sockaddr *, int);
623 static int unix_stream_connect(struct socket *, struct sockaddr *,
624 			       int addr_len, int flags);
625 static int unix_socketpair(struct socket *, struct socket *);
626 static int unix_accept(struct socket *, struct socket *, int);
627 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
628 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
629 static unsigned int unix_dgram_poll(struct file *, struct socket *,
630 				    poll_table *);
631 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
632 static int unix_shutdown(struct socket *, int);
633 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
634 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
635 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
636 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
637 static int unix_dgram_connect(struct socket *, struct sockaddr *,
638 			      int, int);
639 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
640 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
641 				  int);
642 
unix_set_peek_off(struct sock * sk,int val)643 static int unix_set_peek_off(struct sock *sk, int val)
644 {
645 	struct unix_sock *u = unix_sk(sk);
646 
647 	if (mutex_lock_interruptible(&u->readlock))
648 		return -EINTR;
649 
650 	sk->sk_peek_off = val;
651 	mutex_unlock(&u->readlock);
652 
653 	return 0;
654 }
655 
656 
657 static const struct proto_ops unix_stream_ops = {
658 	.family =	PF_UNIX,
659 	.owner =	THIS_MODULE,
660 	.release =	unix_release,
661 	.bind =		unix_bind,
662 	.connect =	unix_stream_connect,
663 	.socketpair =	unix_socketpair,
664 	.accept =	unix_accept,
665 	.getname =	unix_getname,
666 	.poll =		unix_poll,
667 	.ioctl =	unix_ioctl,
668 	.listen =	unix_listen,
669 	.shutdown =	unix_shutdown,
670 	.setsockopt =	sock_no_setsockopt,
671 	.getsockopt =	sock_no_getsockopt,
672 	.sendmsg =	unix_stream_sendmsg,
673 	.recvmsg =	unix_stream_recvmsg,
674 	.mmap =		sock_no_mmap,
675 	.sendpage =	sock_no_sendpage,
676 	.set_peek_off =	unix_set_peek_off,
677 };
678 
679 static const struct proto_ops unix_dgram_ops = {
680 	.family =	PF_UNIX,
681 	.owner =	THIS_MODULE,
682 	.release =	unix_release,
683 	.bind =		unix_bind,
684 	.connect =	unix_dgram_connect,
685 	.socketpair =	unix_socketpair,
686 	.accept =	sock_no_accept,
687 	.getname =	unix_getname,
688 	.poll =		unix_dgram_poll,
689 	.ioctl =	unix_ioctl,
690 	.listen =	sock_no_listen,
691 	.shutdown =	unix_shutdown,
692 	.setsockopt =	sock_no_setsockopt,
693 	.getsockopt =	sock_no_getsockopt,
694 	.sendmsg =	unix_dgram_sendmsg,
695 	.recvmsg =	unix_dgram_recvmsg,
696 	.mmap =		sock_no_mmap,
697 	.sendpage =	sock_no_sendpage,
698 	.set_peek_off =	unix_set_peek_off,
699 };
700 
701 static const struct proto_ops unix_seqpacket_ops = {
702 	.family =	PF_UNIX,
703 	.owner =	THIS_MODULE,
704 	.release =	unix_release,
705 	.bind =		unix_bind,
706 	.connect =	unix_stream_connect,
707 	.socketpair =	unix_socketpair,
708 	.accept =	unix_accept,
709 	.getname =	unix_getname,
710 	.poll =		unix_dgram_poll,
711 	.ioctl =	unix_ioctl,
712 	.listen =	unix_listen,
713 	.shutdown =	unix_shutdown,
714 	.setsockopt =	sock_no_setsockopt,
715 	.getsockopt =	sock_no_getsockopt,
716 	.sendmsg =	unix_seqpacket_sendmsg,
717 	.recvmsg =	unix_seqpacket_recvmsg,
718 	.mmap =		sock_no_mmap,
719 	.sendpage =	sock_no_sendpage,
720 	.set_peek_off =	unix_set_peek_off,
721 };
722 
723 static struct proto unix_proto = {
724 	.name			= "UNIX",
725 	.owner			= THIS_MODULE,
726 	.obj_size		= sizeof(struct unix_sock),
727 };
728 
729 /*
730  * AF_UNIX sockets do not interact with hardware, hence they
731  * dont trigger interrupts - so it's safe for them to have
732  * bh-unsafe locking for their sk_receive_queue.lock. Split off
733  * this special lock-class by reinitializing the spinlock key:
734  */
735 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
736 
unix_create1(struct net * net,struct socket * sock)737 static struct sock *unix_create1(struct net *net, struct socket *sock)
738 {
739 	struct sock *sk = NULL;
740 	struct unix_sock *u;
741 
742 	atomic_long_inc(&unix_nr_socks);
743 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
744 		goto out;
745 
746 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
747 	if (!sk)
748 		goto out;
749 
750 	sock_init_data(sock, sk);
751 	lockdep_set_class(&sk->sk_receive_queue.lock,
752 				&af_unix_sk_receive_queue_lock_key);
753 
754 	sk->sk_write_space	= unix_write_space;
755 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
756 	sk->sk_destruct		= unix_sock_destructor;
757 	u	  = unix_sk(sk);
758 	u->path.dentry = NULL;
759 	u->path.mnt = NULL;
760 	spin_lock_init(&u->lock);
761 	atomic_long_set(&u->inflight, 0);
762 	INIT_LIST_HEAD(&u->link);
763 	mutex_init(&u->readlock); /* single task reading lock */
764 	init_waitqueue_head(&u->peer_wait);
765 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
766 	unix_insert_socket(unix_sockets_unbound(sk), sk);
767 out:
768 	if (sk == NULL)
769 		atomic_long_dec(&unix_nr_socks);
770 	else {
771 		local_bh_disable();
772 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
773 		local_bh_enable();
774 	}
775 	return sk;
776 }
777 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)778 static int unix_create(struct net *net, struct socket *sock, int protocol,
779 		       int kern)
780 {
781 	if (protocol && protocol != PF_UNIX)
782 		return -EPROTONOSUPPORT;
783 
784 	sock->state = SS_UNCONNECTED;
785 
786 	switch (sock->type) {
787 	case SOCK_STREAM:
788 		sock->ops = &unix_stream_ops;
789 		break;
790 		/*
791 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
792 		 *	nothing uses it.
793 		 */
794 	case SOCK_RAW:
795 		sock->type = SOCK_DGRAM;
796 	case SOCK_DGRAM:
797 		sock->ops = &unix_dgram_ops;
798 		break;
799 	case SOCK_SEQPACKET:
800 		sock->ops = &unix_seqpacket_ops;
801 		break;
802 	default:
803 		return -ESOCKTNOSUPPORT;
804 	}
805 
806 	return unix_create1(net, sock) ? 0 : -ENOMEM;
807 }
808 
unix_release(struct socket * sock)809 static int unix_release(struct socket *sock)
810 {
811 	struct sock *sk = sock->sk;
812 
813 	if (!sk)
814 		return 0;
815 
816 	unix_release_sock(sk, 0);
817 	sock->sk = NULL;
818 
819 	return 0;
820 }
821 
unix_autobind(struct socket * sock)822 static int unix_autobind(struct socket *sock)
823 {
824 	struct sock *sk = sock->sk;
825 	struct net *net = sock_net(sk);
826 	struct unix_sock *u = unix_sk(sk);
827 	static u32 ordernum = 1;
828 	struct unix_address *addr;
829 	int err;
830 	unsigned int retries = 0;
831 
832 	err = mutex_lock_interruptible(&u->readlock);
833 	if (err)
834 		return err;
835 
836 	err = 0;
837 	if (u->addr)
838 		goto out;
839 
840 	err = -ENOMEM;
841 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
842 	if (!addr)
843 		goto out;
844 
845 	addr->name->sun_family = AF_UNIX;
846 	atomic_set(&addr->refcnt, 1);
847 
848 retry:
849 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
850 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
851 
852 	spin_lock(&unix_table_lock);
853 	ordernum = (ordernum+1)&0xFFFFF;
854 
855 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
856 				      addr->hash)) {
857 		spin_unlock(&unix_table_lock);
858 		/*
859 		 * __unix_find_socket_byname() may take long time if many names
860 		 * are already in use.
861 		 */
862 		cond_resched();
863 		/* Give up if all names seems to be in use. */
864 		if (retries++ == 0xFFFFF) {
865 			err = -ENOSPC;
866 			kfree(addr);
867 			goto out;
868 		}
869 		goto retry;
870 	}
871 	addr->hash ^= sk->sk_type;
872 
873 	__unix_remove_socket(sk);
874 	u->addr = addr;
875 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
876 	spin_unlock(&unix_table_lock);
877 	err = 0;
878 
879 out:	mutex_unlock(&u->readlock);
880 	return err;
881 }
882 
unix_find_other(struct net * net,struct sockaddr_un * sunname,int len,int type,unsigned int hash,int * error)883 static struct sock *unix_find_other(struct net *net,
884 				    struct sockaddr_un *sunname, int len,
885 				    int type, unsigned int hash, int *error)
886 {
887 	struct sock *u;
888 	struct path path;
889 	int err = 0;
890 
891 	if (sunname->sun_path[0]) {
892 		struct inode *inode;
893 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
894 		if (err)
895 			goto fail;
896 		inode = d_backing_inode(path.dentry);
897 		err = inode_permission(inode, MAY_WRITE);
898 		if (err)
899 			goto put_fail;
900 
901 		err = -ECONNREFUSED;
902 		if (!S_ISSOCK(inode->i_mode))
903 			goto put_fail;
904 		u = unix_find_socket_byinode(inode);
905 		if (!u)
906 			goto put_fail;
907 
908 		if (u->sk_type == type)
909 			touch_atime(&path);
910 
911 		path_put(&path);
912 
913 		err = -EPROTOTYPE;
914 		if (u->sk_type != type) {
915 			sock_put(u);
916 			goto fail;
917 		}
918 	} else {
919 		err = -ECONNREFUSED;
920 		u = unix_find_socket_byname(net, sunname, len, type, hash);
921 		if (u) {
922 			struct dentry *dentry;
923 			dentry = unix_sk(u)->path.dentry;
924 			if (dentry)
925 				touch_atime(&unix_sk(u)->path);
926 		} else
927 			goto fail;
928 	}
929 	return u;
930 
931 put_fail:
932 	path_put(&path);
933 fail:
934 	*error = err;
935 	return NULL;
936 }
937 
unix_mknod(const char * sun_path,umode_t mode,struct path * res)938 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
939 {
940 	struct dentry *dentry;
941 	struct path path;
942 	int err = 0;
943 	/*
944 	 * Get the parent directory, calculate the hash for last
945 	 * component.
946 	 */
947 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
948 	err = PTR_ERR(dentry);
949 	if (IS_ERR(dentry))
950 		return err;
951 
952 	/*
953 	 * All right, let's create it.
954 	 */
955 	err = security_path_mknod(&path, dentry, mode, 0);
956 	if (!err) {
957 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
958 		if (!err) {
959 			res->mnt = mntget(path.mnt);
960 			res->dentry = dget(dentry);
961 		}
962 	}
963 	done_path_create(&path, dentry);
964 	return err;
965 }
966 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)967 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
968 {
969 	struct sock *sk = sock->sk;
970 	struct net *net = sock_net(sk);
971 	struct unix_sock *u = unix_sk(sk);
972 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
973 	char *sun_path = sunaddr->sun_path;
974 	int err;
975 	unsigned int hash;
976 	struct unix_address *addr;
977 	struct hlist_head *list;
978 
979 	err = -EINVAL;
980 	if (sunaddr->sun_family != AF_UNIX)
981 		goto out;
982 
983 	if (addr_len == sizeof(short)) {
984 		err = unix_autobind(sock);
985 		goto out;
986 	}
987 
988 	err = unix_mkname(sunaddr, addr_len, &hash);
989 	if (err < 0)
990 		goto out;
991 	addr_len = err;
992 
993 	err = mutex_lock_interruptible(&u->readlock);
994 	if (err)
995 		goto out;
996 
997 	err = -EINVAL;
998 	if (u->addr)
999 		goto out_up;
1000 
1001 	err = -ENOMEM;
1002 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1003 	if (!addr)
1004 		goto out_up;
1005 
1006 	memcpy(addr->name, sunaddr, addr_len);
1007 	addr->len = addr_len;
1008 	addr->hash = hash ^ sk->sk_type;
1009 	atomic_set(&addr->refcnt, 1);
1010 
1011 	if (sun_path[0]) {
1012 		struct path path;
1013 		umode_t mode = S_IFSOCK |
1014 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1015 		err = unix_mknod(sun_path, mode, &path);
1016 		if (err) {
1017 			if (err == -EEXIST)
1018 				err = -EADDRINUSE;
1019 			unix_release_addr(addr);
1020 			goto out_up;
1021 		}
1022 		addr->hash = UNIX_HASH_SIZE;
1023 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
1024 		spin_lock(&unix_table_lock);
1025 		u->path = path;
1026 		list = &unix_socket_table[hash];
1027 	} else {
1028 		spin_lock(&unix_table_lock);
1029 		err = -EADDRINUSE;
1030 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1031 					      sk->sk_type, hash)) {
1032 			unix_release_addr(addr);
1033 			goto out_unlock;
1034 		}
1035 
1036 		list = &unix_socket_table[addr->hash];
1037 	}
1038 
1039 	err = 0;
1040 	__unix_remove_socket(sk);
1041 	u->addr = addr;
1042 	__unix_insert_socket(list, sk);
1043 
1044 out_unlock:
1045 	spin_unlock(&unix_table_lock);
1046 out_up:
1047 	mutex_unlock(&u->readlock);
1048 out:
1049 	return err;
1050 }
1051 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1052 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1053 {
1054 	if (unlikely(sk1 == sk2) || !sk2) {
1055 		unix_state_lock(sk1);
1056 		return;
1057 	}
1058 	if (sk1 < sk2) {
1059 		unix_state_lock(sk1);
1060 		unix_state_lock_nested(sk2);
1061 	} else {
1062 		unix_state_lock(sk2);
1063 		unix_state_lock_nested(sk1);
1064 	}
1065 }
1066 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1067 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1068 {
1069 	if (unlikely(sk1 == sk2) || !sk2) {
1070 		unix_state_unlock(sk1);
1071 		return;
1072 	}
1073 	unix_state_unlock(sk1);
1074 	unix_state_unlock(sk2);
1075 }
1076 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1077 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1078 			      int alen, int flags)
1079 {
1080 	struct sock *sk = sock->sk;
1081 	struct net *net = sock_net(sk);
1082 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1083 	struct sock *other;
1084 	unsigned int hash;
1085 	int err;
1086 
1087 	if (addr->sa_family != AF_UNSPEC) {
1088 		err = unix_mkname(sunaddr, alen, &hash);
1089 		if (err < 0)
1090 			goto out;
1091 		alen = err;
1092 
1093 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1094 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1095 			goto out;
1096 
1097 restart:
1098 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1099 		if (!other)
1100 			goto out;
1101 
1102 		unix_state_double_lock(sk, other);
1103 
1104 		/* Apparently VFS overslept socket death. Retry. */
1105 		if (sock_flag(other, SOCK_DEAD)) {
1106 			unix_state_double_unlock(sk, other);
1107 			sock_put(other);
1108 			goto restart;
1109 		}
1110 
1111 		err = -EPERM;
1112 		if (!unix_may_send(sk, other))
1113 			goto out_unlock;
1114 
1115 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1116 		if (err)
1117 			goto out_unlock;
1118 
1119 	} else {
1120 		/*
1121 		 *	1003.1g breaking connected state with AF_UNSPEC
1122 		 */
1123 		other = NULL;
1124 		unix_state_double_lock(sk, other);
1125 	}
1126 
1127 	/*
1128 	 * If it was connected, reconnect.
1129 	 */
1130 	if (unix_peer(sk)) {
1131 		struct sock *old_peer = unix_peer(sk);
1132 		unix_peer(sk) = other;
1133 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1134 
1135 		unix_state_double_unlock(sk, other);
1136 
1137 		if (other != old_peer)
1138 			unix_dgram_disconnected(sk, old_peer);
1139 		sock_put(old_peer);
1140 	} else {
1141 		unix_peer(sk) = other;
1142 		unix_state_double_unlock(sk, other);
1143 	}
1144 	return 0;
1145 
1146 out_unlock:
1147 	unix_state_double_unlock(sk, other);
1148 	sock_put(other);
1149 out:
1150 	return err;
1151 }
1152 
unix_wait_for_peer(struct sock * other,long timeo)1153 static long unix_wait_for_peer(struct sock *other, long timeo)
1154 {
1155 	struct unix_sock *u = unix_sk(other);
1156 	int sched;
1157 	DEFINE_WAIT(wait);
1158 
1159 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1160 
1161 	sched = !sock_flag(other, SOCK_DEAD) &&
1162 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1163 		unix_recvq_full(other);
1164 
1165 	unix_state_unlock(other);
1166 
1167 	if (sched)
1168 		timeo = schedule_timeout(timeo);
1169 
1170 	finish_wait(&u->peer_wait, &wait);
1171 	return timeo;
1172 }
1173 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1174 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1175 			       int addr_len, int flags)
1176 {
1177 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1178 	struct sock *sk = sock->sk;
1179 	struct net *net = sock_net(sk);
1180 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1181 	struct sock *newsk = NULL;
1182 	struct sock *other = NULL;
1183 	struct sk_buff *skb = NULL;
1184 	unsigned int hash;
1185 	int st;
1186 	int err;
1187 	long timeo;
1188 
1189 	err = unix_mkname(sunaddr, addr_len, &hash);
1190 	if (err < 0)
1191 		goto out;
1192 	addr_len = err;
1193 
1194 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1195 	    (err = unix_autobind(sock)) != 0)
1196 		goto out;
1197 
1198 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1199 
1200 	/* First of all allocate resources.
1201 	   If we will make it after state is locked,
1202 	   we will have to recheck all again in any case.
1203 	 */
1204 
1205 	err = -ENOMEM;
1206 
1207 	/* create new sock for complete connection */
1208 	newsk = unix_create1(sock_net(sk), NULL);
1209 	if (newsk == NULL)
1210 		goto out;
1211 
1212 	/* Allocate skb for sending to listening sock */
1213 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1214 	if (skb == NULL)
1215 		goto out;
1216 
1217 restart:
1218 	/*  Find listening sock. */
1219 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1220 	if (!other)
1221 		goto out;
1222 
1223 	/* Latch state of peer */
1224 	unix_state_lock(other);
1225 
1226 	/* Apparently VFS overslept socket death. Retry. */
1227 	if (sock_flag(other, SOCK_DEAD)) {
1228 		unix_state_unlock(other);
1229 		sock_put(other);
1230 		goto restart;
1231 	}
1232 
1233 	err = -ECONNREFUSED;
1234 	if (other->sk_state != TCP_LISTEN)
1235 		goto out_unlock;
1236 	if (other->sk_shutdown & RCV_SHUTDOWN)
1237 		goto out_unlock;
1238 
1239 	if (unix_recvq_full(other)) {
1240 		err = -EAGAIN;
1241 		if (!timeo)
1242 			goto out_unlock;
1243 
1244 		timeo = unix_wait_for_peer(other, timeo);
1245 
1246 		err = sock_intr_errno(timeo);
1247 		if (signal_pending(current))
1248 			goto out;
1249 		sock_put(other);
1250 		goto restart;
1251 	}
1252 
1253 	/* Latch our state.
1254 
1255 	   It is tricky place. We need to grab our state lock and cannot
1256 	   drop lock on peer. It is dangerous because deadlock is
1257 	   possible. Connect to self case and simultaneous
1258 	   attempt to connect are eliminated by checking socket
1259 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1260 	   check this before attempt to grab lock.
1261 
1262 	   Well, and we have to recheck the state after socket locked.
1263 	 */
1264 	st = sk->sk_state;
1265 
1266 	switch (st) {
1267 	case TCP_CLOSE:
1268 		/* This is ok... continue with connect */
1269 		break;
1270 	case TCP_ESTABLISHED:
1271 		/* Socket is already connected */
1272 		err = -EISCONN;
1273 		goto out_unlock;
1274 	default:
1275 		err = -EINVAL;
1276 		goto out_unlock;
1277 	}
1278 
1279 	unix_state_lock_nested(sk);
1280 
1281 	if (sk->sk_state != st) {
1282 		unix_state_unlock(sk);
1283 		unix_state_unlock(other);
1284 		sock_put(other);
1285 		goto restart;
1286 	}
1287 
1288 	err = security_unix_stream_connect(sk, other, newsk);
1289 	if (err) {
1290 		unix_state_unlock(sk);
1291 		goto out_unlock;
1292 	}
1293 
1294 	/* The way is open! Fastly set all the necessary fields... */
1295 
1296 	sock_hold(sk);
1297 	unix_peer(newsk)	= sk;
1298 	newsk->sk_state		= TCP_ESTABLISHED;
1299 	newsk->sk_type		= sk->sk_type;
1300 	init_peercred(newsk);
1301 	newu = unix_sk(newsk);
1302 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1303 	otheru = unix_sk(other);
1304 
1305 	/* copy address information from listening to new sock*/
1306 	if (otheru->addr) {
1307 		atomic_inc(&otheru->addr->refcnt);
1308 		newu->addr = otheru->addr;
1309 	}
1310 	if (otheru->path.dentry) {
1311 		path_get(&otheru->path);
1312 		newu->path = otheru->path;
1313 	}
1314 
1315 	/* Set credentials */
1316 	copy_peercred(sk, other);
1317 
1318 	sock->state	= SS_CONNECTED;
1319 	sk->sk_state	= TCP_ESTABLISHED;
1320 	sock_hold(newsk);
1321 
1322 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1323 	unix_peer(sk)	= newsk;
1324 
1325 	unix_state_unlock(sk);
1326 
1327 	/* take ten and and send info to listening sock */
1328 	spin_lock(&other->sk_receive_queue.lock);
1329 	__skb_queue_tail(&other->sk_receive_queue, skb);
1330 	spin_unlock(&other->sk_receive_queue.lock);
1331 	unix_state_unlock(other);
1332 	other->sk_data_ready(other);
1333 	sock_put(other);
1334 	return 0;
1335 
1336 out_unlock:
1337 	if (other)
1338 		unix_state_unlock(other);
1339 
1340 out:
1341 	kfree_skb(skb);
1342 	if (newsk)
1343 		unix_release_sock(newsk, 0);
1344 	if (other)
1345 		sock_put(other);
1346 	return err;
1347 }
1348 
unix_socketpair(struct socket * socka,struct socket * sockb)1349 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1350 {
1351 	struct sock *ska = socka->sk, *skb = sockb->sk;
1352 
1353 	/* Join our sockets back to back */
1354 	sock_hold(ska);
1355 	sock_hold(skb);
1356 	unix_peer(ska) = skb;
1357 	unix_peer(skb) = ska;
1358 	init_peercred(ska);
1359 	init_peercred(skb);
1360 
1361 	if (ska->sk_type != SOCK_DGRAM) {
1362 		ska->sk_state = TCP_ESTABLISHED;
1363 		skb->sk_state = TCP_ESTABLISHED;
1364 		socka->state  = SS_CONNECTED;
1365 		sockb->state  = SS_CONNECTED;
1366 	}
1367 	return 0;
1368 }
1369 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1370 static void unix_sock_inherit_flags(const struct socket *old,
1371 				    struct socket *new)
1372 {
1373 	if (test_bit(SOCK_PASSCRED, &old->flags))
1374 		set_bit(SOCK_PASSCRED, &new->flags);
1375 	if (test_bit(SOCK_PASSSEC, &old->flags))
1376 		set_bit(SOCK_PASSSEC, &new->flags);
1377 }
1378 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1379 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1380 {
1381 	struct sock *sk = sock->sk;
1382 	struct sock *tsk;
1383 	struct sk_buff *skb;
1384 	int err;
1385 
1386 	err = -EOPNOTSUPP;
1387 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1388 		goto out;
1389 
1390 	err = -EINVAL;
1391 	if (sk->sk_state != TCP_LISTEN)
1392 		goto out;
1393 
1394 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1395 	 * so that no locks are necessary.
1396 	 */
1397 
1398 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1399 	if (!skb) {
1400 		/* This means receive shutdown. */
1401 		if (err == 0)
1402 			err = -EINVAL;
1403 		goto out;
1404 	}
1405 
1406 	tsk = skb->sk;
1407 	skb_free_datagram(sk, skb);
1408 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1409 
1410 	/* attach accepted sock to socket */
1411 	unix_state_lock(tsk);
1412 	newsock->state = SS_CONNECTED;
1413 	unix_sock_inherit_flags(sock, newsock);
1414 	sock_graft(tsk, newsock);
1415 	unix_state_unlock(tsk);
1416 	return 0;
1417 
1418 out:
1419 	return err;
1420 }
1421 
1422 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1423 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1424 {
1425 	struct sock *sk = sock->sk;
1426 	struct unix_sock *u;
1427 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1428 	int err = 0;
1429 
1430 	if (peer) {
1431 		sk = unix_peer_get(sk);
1432 
1433 		err = -ENOTCONN;
1434 		if (!sk)
1435 			goto out;
1436 		err = 0;
1437 	} else {
1438 		sock_hold(sk);
1439 	}
1440 
1441 	u = unix_sk(sk);
1442 	unix_state_lock(sk);
1443 	if (!u->addr) {
1444 		sunaddr->sun_family = AF_UNIX;
1445 		sunaddr->sun_path[0] = 0;
1446 		*uaddr_len = sizeof(short);
1447 	} else {
1448 		struct unix_address *addr = u->addr;
1449 
1450 		*uaddr_len = addr->len;
1451 		memcpy(sunaddr, addr->name, *uaddr_len);
1452 	}
1453 	unix_state_unlock(sk);
1454 	sock_put(sk);
1455 out:
1456 	return err;
1457 }
1458 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1459 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1460 {
1461 	int i;
1462 
1463 	scm->fp = UNIXCB(skb).fp;
1464 	UNIXCB(skb).fp = NULL;
1465 
1466 	for (i = scm->fp->count-1; i >= 0; i--)
1467 		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1468 }
1469 
unix_destruct_scm(struct sk_buff * skb)1470 static void unix_destruct_scm(struct sk_buff *skb)
1471 {
1472 	struct scm_cookie scm;
1473 	memset(&scm, 0, sizeof(scm));
1474 	scm.pid  = UNIXCB(skb).pid;
1475 	if (UNIXCB(skb).fp)
1476 		unix_detach_fds(&scm, skb);
1477 
1478 	/* Alas, it calls VFS */
1479 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1480 	scm_destroy(&scm);
1481 	sock_wfree(skb);
1482 }
1483 
1484 /*
1485  * The "user->unix_inflight" variable is protected by the garbage
1486  * collection lock, and we just read it locklessly here. If you go
1487  * over the limit, there might be a tiny race in actually noticing
1488  * it across threads. Tough.
1489  */
too_many_unix_fds(struct task_struct * p)1490 static inline bool too_many_unix_fds(struct task_struct *p)
1491 {
1492 	struct user_struct *user = current_user();
1493 
1494 	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1495 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1496 	return false;
1497 }
1498 
1499 #define MAX_RECURSION_LEVEL 4
1500 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1501 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1502 {
1503 	int i;
1504 	unsigned char max_level = 0;
1505 	int unix_sock_count = 0;
1506 
1507 	if (too_many_unix_fds(current))
1508 		return -ETOOMANYREFS;
1509 
1510 	for (i = scm->fp->count - 1; i >= 0; i--) {
1511 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1512 
1513 		if (sk) {
1514 			unix_sock_count++;
1515 			max_level = max(max_level,
1516 					unix_sk(sk)->recursion_level);
1517 		}
1518 	}
1519 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1520 		return -ETOOMANYREFS;
1521 
1522 	/*
1523 	 * Need to duplicate file references for the sake of garbage
1524 	 * collection.  Otherwise a socket in the fps might become a
1525 	 * candidate for GC while the skb is not yet queued.
1526 	 */
1527 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1528 	if (!UNIXCB(skb).fp)
1529 		return -ENOMEM;
1530 
1531 	for (i = scm->fp->count - 1; i >= 0; i--)
1532 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
1533 	return max_level;
1534 }
1535 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1536 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1537 {
1538 	int err = 0;
1539 
1540 	UNIXCB(skb).pid  = get_pid(scm->pid);
1541 	UNIXCB(skb).uid = scm->creds.uid;
1542 	UNIXCB(skb).gid = scm->creds.gid;
1543 	UNIXCB(skb).fp = NULL;
1544 	if (scm->fp && send_fds)
1545 		err = unix_attach_fds(scm, skb);
1546 
1547 	skb->destructor = unix_destruct_scm;
1548 	return err;
1549 }
1550 
1551 /*
1552  * Some apps rely on write() giving SCM_CREDENTIALS
1553  * We include credentials if source or destination socket
1554  * asserted SOCK_PASSCRED.
1555  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1556 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1557 			    const struct sock *other)
1558 {
1559 	if (UNIXCB(skb).pid)
1560 		return;
1561 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1562 	    !other->sk_socket ||
1563 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1564 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1565 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1566 	}
1567 }
1568 
1569 /*
1570  *	Send AF_UNIX data.
1571  */
1572 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1573 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1574 			      size_t len)
1575 {
1576 	struct sock *sk = sock->sk;
1577 	struct net *net = sock_net(sk);
1578 	struct unix_sock *u = unix_sk(sk);
1579 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1580 	struct sock *other = NULL;
1581 	int namelen = 0; /* fake GCC */
1582 	int err;
1583 	unsigned int hash;
1584 	struct sk_buff *skb;
1585 	long timeo;
1586 	struct scm_cookie scm;
1587 	int max_level;
1588 	int data_len = 0;
1589 	int sk_locked;
1590 
1591 	wait_for_unix_gc();
1592 	err = scm_send(sock, msg, &scm, false);
1593 	if (err < 0)
1594 		return err;
1595 
1596 	err = -EOPNOTSUPP;
1597 	if (msg->msg_flags&MSG_OOB)
1598 		goto out;
1599 
1600 	if (msg->msg_namelen) {
1601 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1602 		if (err < 0)
1603 			goto out;
1604 		namelen = err;
1605 	} else {
1606 		sunaddr = NULL;
1607 		err = -ENOTCONN;
1608 		other = unix_peer_get(sk);
1609 		if (!other)
1610 			goto out;
1611 	}
1612 
1613 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1614 	    && (err = unix_autobind(sock)) != 0)
1615 		goto out;
1616 
1617 	err = -EMSGSIZE;
1618 	if (len > sk->sk_sndbuf - 32)
1619 		goto out;
1620 
1621 	if (len > SKB_MAX_ALLOC) {
1622 		data_len = min_t(size_t,
1623 				 len - SKB_MAX_ALLOC,
1624 				 MAX_SKB_FRAGS * PAGE_SIZE);
1625 		data_len = PAGE_ALIGN(data_len);
1626 
1627 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1628 	}
1629 
1630 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1631 				   msg->msg_flags & MSG_DONTWAIT, &err,
1632 				   PAGE_ALLOC_COSTLY_ORDER);
1633 	if (skb == NULL)
1634 		goto out;
1635 
1636 	err = unix_scm_to_skb(&scm, skb, true);
1637 	if (err < 0)
1638 		goto out_free;
1639 	max_level = err + 1;
1640 	unix_get_secdata(&scm, skb);
1641 
1642 	skb_put(skb, len - data_len);
1643 	skb->data_len = data_len;
1644 	skb->len = len;
1645 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1646 	if (err)
1647 		goto out_free;
1648 
1649 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1650 
1651 restart:
1652 	if (!other) {
1653 		err = -ECONNRESET;
1654 		if (sunaddr == NULL)
1655 			goto out_free;
1656 
1657 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1658 					hash, &err);
1659 		if (other == NULL)
1660 			goto out_free;
1661 	}
1662 
1663 	if (sk_filter(other, skb) < 0) {
1664 		/* Toss the packet but do not return any error to the sender */
1665 		err = len;
1666 		goto out_free;
1667 	}
1668 
1669 	sk_locked = 0;
1670 	unix_state_lock(other);
1671 restart_locked:
1672 	err = -EPERM;
1673 	if (!unix_may_send(sk, other))
1674 		goto out_unlock;
1675 
1676 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1677 		/*
1678 		 *	Check with 1003.1g - what should
1679 		 *	datagram error
1680 		 */
1681 		unix_state_unlock(other);
1682 		sock_put(other);
1683 
1684 		if (!sk_locked)
1685 			unix_state_lock(sk);
1686 
1687 		err = 0;
1688 		if (unix_peer(sk) == other) {
1689 			unix_peer(sk) = NULL;
1690 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1691 
1692 			unix_state_unlock(sk);
1693 
1694 			unix_dgram_disconnected(sk, other);
1695 			sock_put(other);
1696 			err = -ECONNREFUSED;
1697 		} else {
1698 			unix_state_unlock(sk);
1699 		}
1700 
1701 		other = NULL;
1702 		if (err)
1703 			goto out_free;
1704 		goto restart;
1705 	}
1706 
1707 	err = -EPIPE;
1708 	if (other->sk_shutdown & RCV_SHUTDOWN)
1709 		goto out_unlock;
1710 
1711 	if (sk->sk_type != SOCK_SEQPACKET) {
1712 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1713 		if (err)
1714 			goto out_unlock;
1715 	}
1716 
1717 	/* other == sk && unix_peer(other) != sk if
1718 	 * - unix_peer(sk) == NULL, destination address bound to sk
1719 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1720 	 */
1721 	if (other != sk &&
1722 	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1723 		if (timeo) {
1724 			timeo = unix_wait_for_peer(other, timeo);
1725 
1726 			err = sock_intr_errno(timeo);
1727 			if (signal_pending(current))
1728 				goto out_free;
1729 
1730 			goto restart;
1731 		}
1732 
1733 		if (!sk_locked) {
1734 			unix_state_unlock(other);
1735 			unix_state_double_lock(sk, other);
1736 		}
1737 
1738 		if (unix_peer(sk) != other ||
1739 		    unix_dgram_peer_wake_me(sk, other)) {
1740 			err = -EAGAIN;
1741 			sk_locked = 1;
1742 			goto out_unlock;
1743 		}
1744 
1745 		if (!sk_locked) {
1746 			sk_locked = 1;
1747 			goto restart_locked;
1748 		}
1749 	}
1750 
1751 	if (unlikely(sk_locked))
1752 		unix_state_unlock(sk);
1753 
1754 	if (sock_flag(other, SOCK_RCVTSTAMP))
1755 		__net_timestamp(skb);
1756 	maybe_add_creds(skb, sock, other);
1757 	skb_queue_tail(&other->sk_receive_queue, skb);
1758 	if (max_level > unix_sk(other)->recursion_level)
1759 		unix_sk(other)->recursion_level = max_level;
1760 	unix_state_unlock(other);
1761 	other->sk_data_ready(other);
1762 	sock_put(other);
1763 	scm_destroy(&scm);
1764 	return len;
1765 
1766 out_unlock:
1767 	if (sk_locked)
1768 		unix_state_unlock(sk);
1769 	unix_state_unlock(other);
1770 out_free:
1771 	kfree_skb(skb);
1772 out:
1773 	if (other)
1774 		sock_put(other);
1775 	scm_destroy(&scm);
1776 	return err;
1777 }
1778 
1779 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1780  * bytes, and a minimun of a full page.
1781  */
1782 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1783 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1784 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1785 			       size_t len)
1786 {
1787 	struct sock *sk = sock->sk;
1788 	struct sock *other = NULL;
1789 	int err, size;
1790 	struct sk_buff *skb;
1791 	int sent = 0;
1792 	struct scm_cookie scm;
1793 	bool fds_sent = false;
1794 	int max_level;
1795 	int data_len;
1796 
1797 	wait_for_unix_gc();
1798 	err = scm_send(sock, msg, &scm, false);
1799 	if (err < 0)
1800 		return err;
1801 
1802 	err = -EOPNOTSUPP;
1803 	if (msg->msg_flags&MSG_OOB)
1804 		goto out_err;
1805 
1806 	if (msg->msg_namelen) {
1807 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1808 		goto out_err;
1809 	} else {
1810 		err = -ENOTCONN;
1811 		other = unix_peer(sk);
1812 		if (!other)
1813 			goto out_err;
1814 	}
1815 
1816 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1817 		goto pipe_err;
1818 
1819 	while (sent < len) {
1820 		size = len - sent;
1821 
1822 		/* Keep two messages in the pipe so it schedules better */
1823 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1824 
1825 		/* allow fallback to order-0 allocations */
1826 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1827 
1828 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1829 
1830 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1831 
1832 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1833 					   msg->msg_flags & MSG_DONTWAIT, &err,
1834 					   get_order(UNIX_SKB_FRAGS_SZ));
1835 		if (!skb)
1836 			goto out_err;
1837 
1838 		/* Only send the fds in the first buffer */
1839 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1840 		if (err < 0) {
1841 			kfree_skb(skb);
1842 			goto out_err;
1843 		}
1844 		max_level = err + 1;
1845 		fds_sent = true;
1846 
1847 		skb_put(skb, size - data_len);
1848 		skb->data_len = data_len;
1849 		skb->len = size;
1850 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1851 		if (err) {
1852 			kfree_skb(skb);
1853 			goto out_err;
1854 		}
1855 
1856 		unix_state_lock(other);
1857 
1858 		if (sock_flag(other, SOCK_DEAD) ||
1859 		    (other->sk_shutdown & RCV_SHUTDOWN))
1860 			goto pipe_err_free;
1861 
1862 		maybe_add_creds(skb, sock, other);
1863 		skb_queue_tail(&other->sk_receive_queue, skb);
1864 		if (max_level > unix_sk(other)->recursion_level)
1865 			unix_sk(other)->recursion_level = max_level;
1866 		unix_state_unlock(other);
1867 		other->sk_data_ready(other);
1868 		sent += size;
1869 	}
1870 
1871 	scm_destroy(&scm);
1872 
1873 	return sent;
1874 
1875 pipe_err_free:
1876 	unix_state_unlock(other);
1877 	kfree_skb(skb);
1878 pipe_err:
1879 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1880 		send_sig(SIGPIPE, current, 0);
1881 	err = -EPIPE;
1882 out_err:
1883 	scm_destroy(&scm);
1884 	return sent ? : err;
1885 }
1886 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1887 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1888 				  size_t len)
1889 {
1890 	int err;
1891 	struct sock *sk = sock->sk;
1892 
1893 	err = sock_error(sk);
1894 	if (err)
1895 		return err;
1896 
1897 	if (sk->sk_state != TCP_ESTABLISHED)
1898 		return -ENOTCONN;
1899 
1900 	if (msg->msg_namelen)
1901 		msg->msg_namelen = 0;
1902 
1903 	return unix_dgram_sendmsg(sock, msg, len);
1904 }
1905 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)1906 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
1907 				  size_t size, int flags)
1908 {
1909 	struct sock *sk = sock->sk;
1910 
1911 	if (sk->sk_state != TCP_ESTABLISHED)
1912 		return -ENOTCONN;
1913 
1914 	return unix_dgram_recvmsg(sock, msg, size, flags);
1915 }
1916 
unix_copy_addr(struct msghdr * msg,struct sock * sk)1917 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1918 {
1919 	struct unix_sock *u = unix_sk(sk);
1920 
1921 	if (u->addr) {
1922 		msg->msg_namelen = u->addr->len;
1923 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1924 	}
1925 }
1926 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)1927 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1928 			      size_t size, int flags)
1929 {
1930 	struct scm_cookie scm;
1931 	struct sock *sk = sock->sk;
1932 	struct unix_sock *u = unix_sk(sk);
1933 	int noblock = flags & MSG_DONTWAIT;
1934 	struct sk_buff *skb;
1935 	int err;
1936 	int peeked, skip;
1937 
1938 	err = -EOPNOTSUPP;
1939 	if (flags&MSG_OOB)
1940 		goto out;
1941 
1942 	err = mutex_lock_interruptible(&u->readlock);
1943 	if (unlikely(err)) {
1944 		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
1945 		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1946 		 */
1947 		err = noblock ? -EAGAIN : -ERESTARTSYS;
1948 		goto out;
1949 	}
1950 
1951 	skip = sk_peek_offset(sk, flags);
1952 
1953 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1954 	if (!skb) {
1955 		unix_state_lock(sk);
1956 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1957 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1958 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1959 			err = 0;
1960 		unix_state_unlock(sk);
1961 		goto out_unlock;
1962 	}
1963 
1964 	wake_up_interruptible_sync_poll(&u->peer_wait,
1965 					POLLOUT | POLLWRNORM | POLLWRBAND);
1966 
1967 	if (msg->msg_name)
1968 		unix_copy_addr(msg, skb->sk);
1969 
1970 	if (size > skb->len - skip)
1971 		size = skb->len - skip;
1972 	else if (size < skb->len - skip)
1973 		msg->msg_flags |= MSG_TRUNC;
1974 
1975 	err = skb_copy_datagram_msg(skb, skip, msg, size);
1976 	if (err)
1977 		goto out_free;
1978 
1979 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1980 		__sock_recv_timestamp(msg, sk, skb);
1981 
1982 	memset(&scm, 0, sizeof(scm));
1983 
1984 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1985 	unix_set_secdata(&scm, skb);
1986 
1987 	if (!(flags & MSG_PEEK)) {
1988 		if (UNIXCB(skb).fp)
1989 			unix_detach_fds(&scm, skb);
1990 
1991 		sk_peek_offset_bwd(sk, skb->len);
1992 	} else {
1993 		/* It is questionable: on PEEK we could:
1994 		   - do not return fds - good, but too simple 8)
1995 		   - return fds, and do not return them on read (old strategy,
1996 		     apparently wrong)
1997 		   - clone fds (I chose it for now, it is the most universal
1998 		     solution)
1999 
2000 		   POSIX 1003.1g does not actually define this clearly
2001 		   at all. POSIX 1003.1g doesn't define a lot of things
2002 		   clearly however!
2003 
2004 		*/
2005 
2006 		sk_peek_offset_fwd(sk, size);
2007 
2008 		if (UNIXCB(skb).fp)
2009 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2010 	}
2011 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2012 
2013 	scm_recv(sock, msg, &scm, flags);
2014 
2015 out_free:
2016 	skb_free_datagram(sk, skb);
2017 out_unlock:
2018 	mutex_unlock(&u->readlock);
2019 out:
2020 	return err;
2021 }
2022 
2023 /*
2024  *	Sleep until more data has arrived. But check for races..
2025  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last)2026 static long unix_stream_data_wait(struct sock *sk, long timeo,
2027 				  struct sk_buff *last)
2028 {
2029 	DEFINE_WAIT(wait);
2030 
2031 	unix_state_lock(sk);
2032 
2033 	for (;;) {
2034 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2035 
2036 		if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2037 		    sk->sk_err ||
2038 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2039 		    signal_pending(current) ||
2040 		    !timeo)
2041 			break;
2042 
2043 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2044 		unix_state_unlock(sk);
2045 		timeo = freezable_schedule_timeout(timeo);
2046 		unix_state_lock(sk);
2047 
2048 		if (sock_flag(sk, SOCK_DEAD))
2049 			break;
2050 
2051 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2052 	}
2053 
2054 	finish_wait(sk_sleep(sk), &wait);
2055 	unix_state_unlock(sk);
2056 	return timeo;
2057 }
2058 
unix_skb_len(const struct sk_buff * skb)2059 static unsigned int unix_skb_len(const struct sk_buff *skb)
2060 {
2061 	return skb->len - UNIXCB(skb).consumed;
2062 }
2063 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2064 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2065 			       size_t size, int flags)
2066 {
2067 	struct scm_cookie scm;
2068 	struct sock *sk = sock->sk;
2069 	struct unix_sock *u = unix_sk(sk);
2070 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
2071 	int copied = 0;
2072 	int noblock = flags & MSG_DONTWAIT;
2073 	int check_creds = 0;
2074 	int target;
2075 	int err = 0;
2076 	long timeo;
2077 	int skip;
2078 
2079 	err = -EINVAL;
2080 	if (sk->sk_state != TCP_ESTABLISHED)
2081 		goto out;
2082 
2083 	err = -EOPNOTSUPP;
2084 	if (flags&MSG_OOB)
2085 		goto out;
2086 
2087 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2088 	timeo = sock_rcvtimeo(sk, noblock);
2089 
2090 	/* Lock the socket to prevent queue disordering
2091 	 * while sleeps in memcpy_tomsg
2092 	 */
2093 
2094 	memset(&scm, 0, sizeof(scm));
2095 
2096 	mutex_lock(&u->readlock);
2097 
2098 	if (flags & MSG_PEEK)
2099 		skip = sk_peek_offset(sk, flags);
2100 	else
2101 		skip = 0;
2102 
2103 	do {
2104 		int chunk;
2105 		struct sk_buff *skb, *last;
2106 
2107 		unix_state_lock(sk);
2108 		if (sock_flag(sk, SOCK_DEAD)) {
2109 			err = -ECONNRESET;
2110 			goto unlock;
2111 		}
2112 		last = skb = skb_peek(&sk->sk_receive_queue);
2113 again:
2114 		if (skb == NULL) {
2115 			unix_sk(sk)->recursion_level = 0;
2116 			if (copied >= target)
2117 				goto unlock;
2118 
2119 			/*
2120 			 *	POSIX 1003.1g mandates this order.
2121 			 */
2122 
2123 			err = sock_error(sk);
2124 			if (err)
2125 				goto unlock;
2126 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2127 				goto unlock;
2128 
2129 			unix_state_unlock(sk);
2130 			err = -EAGAIN;
2131 			if (!timeo)
2132 				break;
2133 			mutex_unlock(&u->readlock);
2134 
2135 			timeo = unix_stream_data_wait(sk, timeo, last);
2136 
2137 			if (signal_pending(current)) {
2138 				err = sock_intr_errno(timeo);
2139 				scm_destroy(&scm);
2140 				goto out;
2141 			}
2142 
2143 			mutex_lock(&u->readlock);
2144 			continue;
2145  unlock:
2146 			unix_state_unlock(sk);
2147 			break;
2148 		}
2149 
2150 		while (skip >= unix_skb_len(skb)) {
2151 			skip -= unix_skb_len(skb);
2152 			last = skb;
2153 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2154 			if (!skb)
2155 				goto again;
2156 		}
2157 
2158 		unix_state_unlock(sk);
2159 
2160 		if (check_creds) {
2161 			/* Never glue messages from different writers */
2162 			if ((UNIXCB(skb).pid  != scm.pid) ||
2163 			    !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
2164 			    !gid_eq(UNIXCB(skb).gid, scm.creds.gid))
2165 				break;
2166 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2167 			/* Copy credentials */
2168 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2169 			check_creds = 1;
2170 		}
2171 
2172 		/* Copy address just once */
2173 		if (sunaddr) {
2174 			unix_copy_addr(msg, skb->sk);
2175 			sunaddr = NULL;
2176 		}
2177 
2178 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2179 		if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2180 					  msg, chunk)) {
2181 			if (copied == 0)
2182 				copied = -EFAULT;
2183 			break;
2184 		}
2185 		copied += chunk;
2186 		size -= chunk;
2187 
2188 		/* Mark read part of skb as used */
2189 		if (!(flags & MSG_PEEK)) {
2190 			UNIXCB(skb).consumed += chunk;
2191 
2192 			sk_peek_offset_bwd(sk, chunk);
2193 
2194 			if (UNIXCB(skb).fp)
2195 				unix_detach_fds(&scm, skb);
2196 
2197 			if (unix_skb_len(skb))
2198 				break;
2199 
2200 			skb_unlink(skb, &sk->sk_receive_queue);
2201 			consume_skb(skb);
2202 
2203 			if (scm.fp)
2204 				break;
2205 		} else {
2206 			/* It is questionable, see note in unix_dgram_recvmsg.
2207 			 */
2208 			if (UNIXCB(skb).fp)
2209 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2210 
2211 			sk_peek_offset_fwd(sk, chunk);
2212 
2213 			if (UNIXCB(skb).fp)
2214 				break;
2215 
2216 			skip = 0;
2217 			last = skb;
2218 			unix_state_lock(sk);
2219 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2220 			if (skb)
2221 				goto again;
2222 			unix_state_unlock(sk);
2223 			break;
2224 		}
2225 	} while (size);
2226 
2227 	mutex_unlock(&u->readlock);
2228 	scm_recv(sock, msg, &scm, flags);
2229 out:
2230 	return copied ? : err;
2231 }
2232 
unix_shutdown(struct socket * sock,int mode)2233 static int unix_shutdown(struct socket *sock, int mode)
2234 {
2235 	struct sock *sk = sock->sk;
2236 	struct sock *other;
2237 
2238 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2239 		return -EINVAL;
2240 	/* This maps:
2241 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2242 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2243 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2244 	 */
2245 	++mode;
2246 
2247 	unix_state_lock(sk);
2248 	sk->sk_shutdown |= mode;
2249 	other = unix_peer(sk);
2250 	if (other)
2251 		sock_hold(other);
2252 	unix_state_unlock(sk);
2253 	sk->sk_state_change(sk);
2254 
2255 	if (other &&
2256 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2257 
2258 		int peer_mode = 0;
2259 
2260 		if (mode&RCV_SHUTDOWN)
2261 			peer_mode |= SEND_SHUTDOWN;
2262 		if (mode&SEND_SHUTDOWN)
2263 			peer_mode |= RCV_SHUTDOWN;
2264 		unix_state_lock(other);
2265 		other->sk_shutdown |= peer_mode;
2266 		unix_state_unlock(other);
2267 		other->sk_state_change(other);
2268 		if (peer_mode == SHUTDOWN_MASK)
2269 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2270 		else if (peer_mode & RCV_SHUTDOWN)
2271 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2272 	}
2273 	if (other)
2274 		sock_put(other);
2275 
2276 	return 0;
2277 }
2278 
unix_inq_len(struct sock * sk)2279 long unix_inq_len(struct sock *sk)
2280 {
2281 	struct sk_buff *skb;
2282 	long amount = 0;
2283 
2284 	if (sk->sk_state == TCP_LISTEN)
2285 		return -EINVAL;
2286 
2287 	spin_lock(&sk->sk_receive_queue.lock);
2288 	if (sk->sk_type == SOCK_STREAM ||
2289 	    sk->sk_type == SOCK_SEQPACKET) {
2290 		skb_queue_walk(&sk->sk_receive_queue, skb)
2291 			amount += unix_skb_len(skb);
2292 	} else {
2293 		skb = skb_peek(&sk->sk_receive_queue);
2294 		if (skb)
2295 			amount = skb->len;
2296 	}
2297 	spin_unlock(&sk->sk_receive_queue.lock);
2298 
2299 	return amount;
2300 }
2301 EXPORT_SYMBOL_GPL(unix_inq_len);
2302 
unix_outq_len(struct sock * sk)2303 long unix_outq_len(struct sock *sk)
2304 {
2305 	return sk_wmem_alloc_get(sk);
2306 }
2307 EXPORT_SYMBOL_GPL(unix_outq_len);
2308 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)2309 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2310 {
2311 	struct sock *sk = sock->sk;
2312 	long amount = 0;
2313 	int err;
2314 
2315 	switch (cmd) {
2316 	case SIOCOUTQ:
2317 		amount = unix_outq_len(sk);
2318 		err = put_user(amount, (int __user *)arg);
2319 		break;
2320 	case SIOCINQ:
2321 		amount = unix_inq_len(sk);
2322 		if (amount < 0)
2323 			err = amount;
2324 		else
2325 			err = put_user(amount, (int __user *)arg);
2326 		break;
2327 	default:
2328 		err = -ENOIOCTLCMD;
2329 		break;
2330 	}
2331 	return err;
2332 }
2333 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)2334 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2335 {
2336 	struct sock *sk = sock->sk;
2337 	unsigned int mask;
2338 
2339 	sock_poll_wait(file, sk_sleep(sk), wait);
2340 	mask = 0;
2341 
2342 	/* exceptional events? */
2343 	if (sk->sk_err)
2344 		mask |= POLLERR;
2345 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2346 		mask |= POLLHUP;
2347 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2348 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2349 
2350 	/* readable? */
2351 	if (!skb_queue_empty(&sk->sk_receive_queue))
2352 		mask |= POLLIN | POLLRDNORM;
2353 
2354 	/* Connection-based need to check for termination and startup */
2355 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2356 	    sk->sk_state == TCP_CLOSE)
2357 		mask |= POLLHUP;
2358 
2359 	/*
2360 	 * we set writable also when the other side has shut down the
2361 	 * connection. This prevents stuck sockets.
2362 	 */
2363 	if (unix_writable(sk))
2364 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2365 
2366 	return mask;
2367 }
2368 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)2369 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2370 				    poll_table *wait)
2371 {
2372 	struct sock *sk = sock->sk, *other;
2373 	unsigned int mask, writable;
2374 
2375 	sock_poll_wait(file, sk_sleep(sk), wait);
2376 	mask = 0;
2377 
2378 	/* exceptional events? */
2379 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2380 		mask |= POLLERR |
2381 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2382 
2383 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2384 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2385 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2386 		mask |= POLLHUP;
2387 
2388 	/* readable? */
2389 	if (!skb_queue_empty(&sk->sk_receive_queue))
2390 		mask |= POLLIN | POLLRDNORM;
2391 
2392 	/* Connection-based need to check for termination and startup */
2393 	if (sk->sk_type == SOCK_SEQPACKET) {
2394 		if (sk->sk_state == TCP_CLOSE)
2395 			mask |= POLLHUP;
2396 		/* connection hasn't started yet? */
2397 		if (sk->sk_state == TCP_SYN_SENT)
2398 			return mask;
2399 	}
2400 
2401 	/* No write status requested, avoid expensive OUT tests. */
2402 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2403 		return mask;
2404 
2405 	writable = unix_writable(sk);
2406 	if (writable) {
2407 		unix_state_lock(sk);
2408 
2409 		other = unix_peer(sk);
2410 		if (other && unix_peer(other) != sk &&
2411 		    unix_recvq_full(other) &&
2412 		    unix_dgram_peer_wake_me(sk, other))
2413 			writable = 0;
2414 
2415 		unix_state_unlock(sk);
2416 	}
2417 
2418 	if (writable)
2419 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2420 	else
2421 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2422 
2423 	return mask;
2424 }
2425 
2426 #ifdef CONFIG_PROC_FS
2427 
2428 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2429 
2430 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2431 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2432 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2433 
unix_from_bucket(struct seq_file * seq,loff_t * pos)2434 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2435 {
2436 	unsigned long offset = get_offset(*pos);
2437 	unsigned long bucket = get_bucket(*pos);
2438 	struct sock *sk;
2439 	unsigned long count = 0;
2440 
2441 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2442 		if (sock_net(sk) != seq_file_net(seq))
2443 			continue;
2444 		if (++count == offset)
2445 			break;
2446 	}
2447 
2448 	return sk;
2449 }
2450 
unix_next_socket(struct seq_file * seq,struct sock * sk,loff_t * pos)2451 static struct sock *unix_next_socket(struct seq_file *seq,
2452 				     struct sock *sk,
2453 				     loff_t *pos)
2454 {
2455 	unsigned long bucket;
2456 
2457 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2458 		sk = sk_next(sk);
2459 		if (!sk)
2460 			goto next_bucket;
2461 		if (sock_net(sk) == seq_file_net(seq))
2462 			return sk;
2463 	}
2464 
2465 	do {
2466 		sk = unix_from_bucket(seq, pos);
2467 		if (sk)
2468 			return sk;
2469 
2470 next_bucket:
2471 		bucket = get_bucket(*pos) + 1;
2472 		*pos = set_bucket_offset(bucket, 1);
2473 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2474 
2475 	return NULL;
2476 }
2477 
unix_seq_start(struct seq_file * seq,loff_t * pos)2478 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2479 	__acquires(unix_table_lock)
2480 {
2481 	spin_lock(&unix_table_lock);
2482 
2483 	if (!*pos)
2484 		return SEQ_START_TOKEN;
2485 
2486 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2487 		return NULL;
2488 
2489 	return unix_next_socket(seq, NULL, pos);
2490 }
2491 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)2492 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2493 {
2494 	++*pos;
2495 	return unix_next_socket(seq, v, pos);
2496 }
2497 
unix_seq_stop(struct seq_file * seq,void * v)2498 static void unix_seq_stop(struct seq_file *seq, void *v)
2499 	__releases(unix_table_lock)
2500 {
2501 	spin_unlock(&unix_table_lock);
2502 }
2503 
unix_seq_show(struct seq_file * seq,void * v)2504 static int unix_seq_show(struct seq_file *seq, void *v)
2505 {
2506 
2507 	if (v == SEQ_START_TOKEN)
2508 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2509 			 "Inode Path\n");
2510 	else {
2511 		struct sock *s = v;
2512 		struct unix_sock *u = unix_sk(s);
2513 		unix_state_lock(s);
2514 
2515 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2516 			s,
2517 			atomic_read(&s->sk_refcnt),
2518 			0,
2519 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2520 			s->sk_type,
2521 			s->sk_socket ?
2522 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2523 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2524 			sock_i_ino(s));
2525 
2526 		if (u->addr) {
2527 			int i, len;
2528 			seq_putc(seq, ' ');
2529 
2530 			i = 0;
2531 			len = u->addr->len - sizeof(short);
2532 			if (!UNIX_ABSTRACT(s))
2533 				len--;
2534 			else {
2535 				seq_putc(seq, '@');
2536 				i++;
2537 			}
2538 			for ( ; i < len; i++)
2539 				seq_putc(seq, u->addr->name->sun_path[i]);
2540 		}
2541 		unix_state_unlock(s);
2542 		seq_putc(seq, '\n');
2543 	}
2544 
2545 	return 0;
2546 }
2547 
2548 static const struct seq_operations unix_seq_ops = {
2549 	.start  = unix_seq_start,
2550 	.next   = unix_seq_next,
2551 	.stop   = unix_seq_stop,
2552 	.show   = unix_seq_show,
2553 };
2554 
unix_seq_open(struct inode * inode,struct file * file)2555 static int unix_seq_open(struct inode *inode, struct file *file)
2556 {
2557 	return seq_open_net(inode, file, &unix_seq_ops,
2558 			    sizeof(struct seq_net_private));
2559 }
2560 
2561 static const struct file_operations unix_seq_fops = {
2562 	.owner		= THIS_MODULE,
2563 	.open		= unix_seq_open,
2564 	.read		= seq_read,
2565 	.llseek		= seq_lseek,
2566 	.release	= seq_release_net,
2567 };
2568 
2569 #endif
2570 
2571 static const struct net_proto_family unix_family_ops = {
2572 	.family = PF_UNIX,
2573 	.create = unix_create,
2574 	.owner	= THIS_MODULE,
2575 };
2576 
2577 
unix_net_init(struct net * net)2578 static int __net_init unix_net_init(struct net *net)
2579 {
2580 	int error = -ENOMEM;
2581 
2582 	net->unx.sysctl_max_dgram_qlen = 10;
2583 	if (unix_sysctl_register(net))
2584 		goto out;
2585 
2586 #ifdef CONFIG_PROC_FS
2587 	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2588 		unix_sysctl_unregister(net);
2589 		goto out;
2590 	}
2591 #endif
2592 	error = 0;
2593 out:
2594 	return error;
2595 }
2596 
unix_net_exit(struct net * net)2597 static void __net_exit unix_net_exit(struct net *net)
2598 {
2599 	unix_sysctl_unregister(net);
2600 	remove_proc_entry("unix", net->proc_net);
2601 }
2602 
2603 static struct pernet_operations unix_net_ops = {
2604 	.init = unix_net_init,
2605 	.exit = unix_net_exit,
2606 };
2607 
af_unix_init(void)2608 static int __init af_unix_init(void)
2609 {
2610 	int rc = -1;
2611 
2612 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2613 
2614 	rc = proto_register(&unix_proto, 1);
2615 	if (rc != 0) {
2616 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2617 		goto out;
2618 	}
2619 
2620 	sock_register(&unix_family_ops);
2621 	register_pernet_subsys(&unix_net_ops);
2622 out:
2623 	return rc;
2624 }
2625 
af_unix_exit(void)2626 static void __exit af_unix_exit(void)
2627 {
2628 	sock_unregister(PF_UNIX);
2629 	proto_unregister(&unix_proto);
2630 	unregister_pernet_subsys(&unix_net_ops);
2631 }
2632 
2633 /* Earlier than device_initcall() so that other drivers invoking
2634    request_module() don't end up in a loop when modprobe tries
2635    to use a UNIX socket. But later than subsys_initcall() because
2636    we depend on stuff initialised there */
2637 fs_initcall(af_unix_init);
2638 module_exit(af_unix_exit);
2639 
2640 MODULE_LICENSE("GPL");
2641 MODULE_ALIAS_NETPROTO(PF_UNIX);
2642