1/* RxRPC packet transmission
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/net.h>
13#include <linux/gfp.h>
14#include <linux/skbuff.h>
15#include <linux/circ_buf.h>
16#include <linux/export.h>
17#include <net/sock.h>
18#include <net/af_rxrpc.h>
19#include "ar-internal.h"
20
21/*
22 * Time till packet resend (in jiffies).
23 */
24unsigned rxrpc_resend_timeout = 4 * HZ;
25
26static int rxrpc_send_data(struct rxrpc_sock *rx,
27			   struct rxrpc_call *call,
28			   struct msghdr *msg, size_t len);
29
30/*
31 * extract control messages from the sendmsg() control buffer
32 */
33static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
34			      unsigned long *user_call_ID,
35			      enum rxrpc_command *command,
36			      u32 *abort_code,
37			      bool server)
38{
39	struct cmsghdr *cmsg;
40	int len;
41
42	*command = RXRPC_CMD_SEND_DATA;
43
44	if (msg->msg_controllen == 0)
45		return -EINVAL;
46
47	for_each_cmsghdr(cmsg, msg) {
48		if (!CMSG_OK(msg, cmsg))
49			return -EINVAL;
50
51		len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
52		_debug("CMSG %d, %d, %d",
53		       cmsg->cmsg_level, cmsg->cmsg_type, len);
54
55		if (cmsg->cmsg_level != SOL_RXRPC)
56			continue;
57
58		switch (cmsg->cmsg_type) {
59		case RXRPC_USER_CALL_ID:
60			if (msg->msg_flags & MSG_CMSG_COMPAT) {
61				if (len != sizeof(u32))
62					return -EINVAL;
63				*user_call_ID = *(u32 *) CMSG_DATA(cmsg);
64			} else {
65				if (len != sizeof(unsigned long))
66					return -EINVAL;
67				*user_call_ID = *(unsigned long *)
68					CMSG_DATA(cmsg);
69			}
70			_debug("User Call ID %lx", *user_call_ID);
71			break;
72
73		case RXRPC_ABORT:
74			if (*command != RXRPC_CMD_SEND_DATA)
75				return -EINVAL;
76			*command = RXRPC_CMD_SEND_ABORT;
77			if (len != sizeof(*abort_code))
78				return -EINVAL;
79			*abort_code = *(unsigned int *) CMSG_DATA(cmsg);
80			_debug("Abort %x", *abort_code);
81			if (*abort_code == 0)
82				return -EINVAL;
83			break;
84
85		case RXRPC_ACCEPT:
86			if (*command != RXRPC_CMD_SEND_DATA)
87				return -EINVAL;
88			*command = RXRPC_CMD_ACCEPT;
89			if (len != 0)
90				return -EINVAL;
91			if (!server)
92				return -EISCONN;
93			break;
94
95		default:
96			return -EINVAL;
97		}
98	}
99
100	_leave(" = 0");
101	return 0;
102}
103
104/*
105 * abort a call, sending an ABORT packet to the peer
106 */
107static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
108{
109	write_lock_bh(&call->state_lock);
110
111	if (call->state <= RXRPC_CALL_COMPLETE) {
112		call->state = RXRPC_CALL_LOCALLY_ABORTED;
113		call->abort_code = abort_code;
114		set_bit(RXRPC_CALL_ABORT, &call->events);
115		del_timer_sync(&call->resend_timer);
116		del_timer_sync(&call->ack_timer);
117		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
118		clear_bit(RXRPC_CALL_ACK, &call->events);
119		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
120		rxrpc_queue_call(call);
121	}
122
123	write_unlock_bh(&call->state_lock);
124}
125
126/*
127 * send a message forming part of a client call through an RxRPC socket
128 * - caller holds the socket locked
129 * - the socket may be either a client socket or a server socket
130 */
131int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
132			 struct msghdr *msg, size_t len)
133{
134	struct rxrpc_conn_bundle *bundle;
135	enum rxrpc_command cmd;
136	struct rxrpc_call *call;
137	unsigned long user_call_ID = 0;
138	struct key *key;
139	__be16 service_id;
140	u32 abort_code = 0;
141	int ret;
142
143	_enter("");
144
145	ASSERT(trans != NULL);
146
147	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
148				 false);
149	if (ret < 0)
150		return ret;
151
152	bundle = NULL;
153	if (trans) {
154		service_id = rx->service_id;
155		if (msg->msg_name) {
156			DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx,
157					 msg->msg_name);
158			service_id = htons(srx->srx_service);
159		}
160		key = rx->key;
161		if (key && !rx->key->payload.data)
162			key = NULL;
163		bundle = rxrpc_get_bundle(rx, trans, key, service_id,
164					  GFP_KERNEL);
165		if (IS_ERR(bundle))
166			return PTR_ERR(bundle);
167	}
168
169	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
170				     abort_code == 0, GFP_KERNEL);
171	if (trans)
172		rxrpc_put_bundle(trans, bundle);
173	if (IS_ERR(call)) {
174		_leave(" = %ld", PTR_ERR(call));
175		return PTR_ERR(call);
176	}
177
178	_debug("CALL %d USR %lx ST %d on CONN %p",
179	       call->debug_id, call->user_call_ID, call->state, call->conn);
180
181	if (call->state >= RXRPC_CALL_COMPLETE) {
182		/* it's too late for this call */
183		ret = -ESHUTDOWN;
184	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
185		rxrpc_send_abort(call, abort_code);
186	} else if (cmd != RXRPC_CMD_SEND_DATA) {
187		ret = -EINVAL;
188	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
189		/* request phase complete for this client call */
190		ret = -EPROTO;
191	} else {
192		ret = rxrpc_send_data(rx, call, msg, len);
193	}
194
195	rxrpc_put_call(call);
196	_leave(" = %d", ret);
197	return ret;
198}
199
200/**
201 * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
202 * @call: The call to send data through
203 * @msg: The data to send
204 * @len: The amount of data to send
205 *
206 * Allow a kernel service to send data on a call.  The call must be in an state
207 * appropriate to sending data.  No control data should be supplied in @msg,
208 * nor should an address be supplied.  MSG_MORE should be flagged if there's
209 * more data to come, otherwise this data will end the transmission phase.
210 */
211int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
212			   size_t len)
213{
214	int ret;
215
216	_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
217
218	ASSERTCMP(msg->msg_name, ==, NULL);
219	ASSERTCMP(msg->msg_control, ==, NULL);
220
221	lock_sock(&call->socket->sk);
222
223	_debug("CALL %d USR %lx ST %d on CONN %p",
224	       call->debug_id, call->user_call_ID, call->state, call->conn);
225
226	if (call->state >= RXRPC_CALL_COMPLETE) {
227		ret = -ESHUTDOWN; /* it's too late for this call */
228	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
229		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
230		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
231		ret = -EPROTO; /* request phase complete for this client call */
232	} else {
233		ret = rxrpc_send_data(call->socket, call, msg, len);
234	}
235
236	release_sock(&call->socket->sk);
237	_leave(" = %d", ret);
238	return ret;
239}
240
241EXPORT_SYMBOL(rxrpc_kernel_send_data);
242
243/**
244 * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
245 * @call: The call to be aborted
246 * @abort_code: The abort code to stick into the ABORT packet
247 *
248 * Allow a kernel service to abort a call, if it's still in an abortable state.
249 */
250void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
251{
252	_enter("{%d},%d", call->debug_id, abort_code);
253
254	lock_sock(&call->socket->sk);
255
256	_debug("CALL %d USR %lx ST %d on CONN %p",
257	       call->debug_id, call->user_call_ID, call->state, call->conn);
258
259	if (call->state < RXRPC_CALL_COMPLETE)
260		rxrpc_send_abort(call, abort_code);
261
262	release_sock(&call->socket->sk);
263	_leave("");
264}
265
266EXPORT_SYMBOL(rxrpc_kernel_abort_call);
267
268/*
269 * send a message through a server socket
270 * - caller holds the socket locked
271 */
272int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
273{
274	enum rxrpc_command cmd;
275	struct rxrpc_call *call;
276	unsigned long user_call_ID = 0;
277	u32 abort_code = 0;
278	int ret;
279
280	_enter("");
281
282	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
283				 true);
284	if (ret < 0)
285		return ret;
286
287	if (cmd == RXRPC_CMD_ACCEPT) {
288		call = rxrpc_accept_call(rx, user_call_ID);
289		if (IS_ERR(call))
290			return PTR_ERR(call);
291		rxrpc_put_call(call);
292		return 0;
293	}
294
295	call = rxrpc_find_server_call(rx, user_call_ID);
296	if (!call)
297		return -EBADSLT;
298	if (call->state >= RXRPC_CALL_COMPLETE) {
299		ret = -ESHUTDOWN;
300		goto out;
301	}
302
303	switch (cmd) {
304	case RXRPC_CMD_SEND_DATA:
305		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
306		    call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
307		    call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
308			/* Tx phase not yet begun for this call */
309			ret = -EPROTO;
310			break;
311		}
312
313		ret = rxrpc_send_data(rx, call, msg, len);
314		break;
315
316	case RXRPC_CMD_SEND_ABORT:
317		rxrpc_send_abort(call, abort_code);
318		break;
319	default:
320		BUG();
321	}
322
323	out:
324	rxrpc_put_call(call);
325	_leave(" = %d", ret);
326	return ret;
327}
328
329/*
330 * send a packet through the transport endpoint
331 */
332int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
333{
334	struct kvec iov[1];
335	struct msghdr msg;
336	int ret, opt;
337
338	_enter(",{%d}", skb->len);
339
340	iov[0].iov_base = skb->head;
341	iov[0].iov_len = skb->len;
342
343	msg.msg_name = &trans->peer->srx.transport.sin;
344	msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
345	msg.msg_control = NULL;
346	msg.msg_controllen = 0;
347	msg.msg_flags = 0;
348
349	/* send the packet with the don't fragment bit set if we currently
350	 * think it's small enough */
351	if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
352		down_read(&trans->local->defrag_sem);
353		/* send the packet by UDP
354		 * - returns -EMSGSIZE if UDP would have to fragment the packet
355		 *   to go out of the interface
356		 *   - in which case, we'll have processed the ICMP error
357		 *     message and update the peer record
358		 */
359		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
360				     iov[0].iov_len);
361
362		up_read(&trans->local->defrag_sem);
363		if (ret == -EMSGSIZE)
364			goto send_fragmentable;
365
366		_leave(" = %d [%u]", ret, trans->peer->maxdata);
367		return ret;
368	}
369
370send_fragmentable:
371	/* attempt to send this message with fragmentation enabled */
372	_debug("send fragment");
373
374	down_write(&trans->local->defrag_sem);
375	opt = IP_PMTUDISC_DONT;
376	ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
377				(char *) &opt, sizeof(opt));
378	if (ret == 0) {
379		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
380				     iov[0].iov_len);
381
382		opt = IP_PMTUDISC_DO;
383		kernel_setsockopt(trans->local->socket, SOL_IP,
384				  IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
385	}
386
387	up_write(&trans->local->defrag_sem);
388	_leave(" = %d [frag %u]", ret, trans->peer->maxdata);
389	return ret;
390}
391
392/*
393 * wait for space to appear in the transmit/ACK window
394 * - caller holds the socket locked
395 */
396static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
397				    struct rxrpc_call *call,
398				    long *timeo)
399{
400	DECLARE_WAITQUEUE(myself, current);
401	int ret;
402
403	_enter(",{%d},%ld",
404	       CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
405	       *timeo);
406
407	add_wait_queue(&call->tx_waitq, &myself);
408
409	for (;;) {
410		set_current_state(TASK_INTERRUPTIBLE);
411		ret = 0;
412		if (CIRC_SPACE(call->acks_head, call->acks_tail,
413			       call->acks_winsz) > 0)
414			break;
415		if (signal_pending(current)) {
416			ret = sock_intr_errno(*timeo);
417			break;
418		}
419
420		release_sock(&rx->sk);
421		*timeo = schedule_timeout(*timeo);
422		lock_sock(&rx->sk);
423	}
424
425	remove_wait_queue(&call->tx_waitq, &myself);
426	set_current_state(TASK_RUNNING);
427	_leave(" = %d", ret);
428	return ret;
429}
430
431/*
432 * attempt to schedule an instant Tx resend
433 */
434static inline void rxrpc_instant_resend(struct rxrpc_call *call)
435{
436	read_lock_bh(&call->state_lock);
437	if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
438		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
439		if (call->state < RXRPC_CALL_COMPLETE &&
440		    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
441			rxrpc_queue_call(call);
442	}
443	read_unlock_bh(&call->state_lock);
444}
445
446/*
447 * queue a packet for transmission, set the resend timer and attempt
448 * to send the packet immediately
449 */
450static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
451			       bool last)
452{
453	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
454	int ret;
455
456	_net("queue skb %p [%d]", skb, call->acks_head);
457
458	ASSERT(call->acks_window != NULL);
459	call->acks_window[call->acks_head] = (unsigned long) skb;
460	smp_wmb();
461	call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
462
463	if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
464		_debug("________awaiting reply/ACK__________");
465		write_lock_bh(&call->state_lock);
466		switch (call->state) {
467		case RXRPC_CALL_CLIENT_SEND_REQUEST:
468			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
469			break;
470		case RXRPC_CALL_SERVER_ACK_REQUEST:
471			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
472			if (!last)
473				break;
474		case RXRPC_CALL_SERVER_SEND_REPLY:
475			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
476			break;
477		default:
478			break;
479		}
480		write_unlock_bh(&call->state_lock);
481	}
482
483	_proto("Tx DATA %%%u { #%u }",
484	       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
485
486	sp->need_resend = false;
487	sp->resend_at = jiffies + rxrpc_resend_timeout;
488	if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
489		_debug("run timer");
490		call->resend_timer.expires = sp->resend_at;
491		add_timer(&call->resend_timer);
492	}
493
494	/* attempt to cancel the rx-ACK timer, deferring reply transmission if
495	 * we're ACK'ing the request phase of an incoming call */
496	ret = -EAGAIN;
497	if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
498		/* the packet may be freed by rxrpc_process_call() before this
499		 * returns */
500		ret = rxrpc_send_packet(call->conn->trans, skb);
501		_net("sent skb %p", skb);
502	} else {
503		_debug("failed to delete ACK timer");
504	}
505
506	if (ret < 0) {
507		_debug("need instant resend %d", ret);
508		sp->need_resend = true;
509		rxrpc_instant_resend(call);
510	}
511
512	_leave("");
513}
514
515/*
516 * send data through a socket
517 * - must be called in process context
518 * - caller holds the socket locked
519 */
520static int rxrpc_send_data(struct rxrpc_sock *rx,
521			   struct rxrpc_call *call,
522			   struct msghdr *msg, size_t len)
523{
524	struct rxrpc_skb_priv *sp;
525	struct sk_buff *skb;
526	struct sock *sk = &rx->sk;
527	long timeo;
528	bool more;
529	int ret, copied;
530
531	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
532
533	/* this should be in poll */
534	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
535
536	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
537		return -EPIPE;
538
539	more = msg->msg_flags & MSG_MORE;
540
541	skb = call->tx_pending;
542	call->tx_pending = NULL;
543
544	copied = 0;
545	do {
546		if (!skb) {
547			size_t size, chunk, max, space;
548
549			_debug("alloc");
550
551			if (CIRC_SPACE(call->acks_head, call->acks_tail,
552				       call->acks_winsz) <= 0) {
553				ret = -EAGAIN;
554				if (msg->msg_flags & MSG_DONTWAIT)
555					goto maybe_error;
556				ret = rxrpc_wait_for_tx_window(rx, call,
557							       &timeo);
558				if (ret < 0)
559					goto maybe_error;
560			}
561
562			max = call->conn->trans->peer->maxdata;
563			max -= call->conn->security_size;
564			max &= ~(call->conn->size_align - 1UL);
565
566			chunk = max;
567			if (chunk > msg_data_left(msg) && !more)
568				chunk = msg_data_left(msg);
569
570			space = chunk + call->conn->size_align;
571			space &= ~(call->conn->size_align - 1UL);
572
573			size = space + call->conn->header_size;
574
575			_debug("SIZE: %zu/%zu/%zu", chunk, space, size);
576
577			/* create a buffer that we can retain until it's ACK'd */
578			skb = sock_alloc_send_skb(
579				sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
580			if (!skb)
581				goto maybe_error;
582
583			rxrpc_new_skb(skb);
584
585			_debug("ALLOC SEND %p", skb);
586
587			ASSERTCMP(skb->mark, ==, 0);
588
589			_debug("HS: %u", call->conn->header_size);
590			skb_reserve(skb, call->conn->header_size);
591			skb->len += call->conn->header_size;
592
593			sp = rxrpc_skb(skb);
594			sp->remain = chunk;
595			if (sp->remain > skb_tailroom(skb))
596				sp->remain = skb_tailroom(skb);
597
598			_net("skb: hr %d, tr %d, hl %d, rm %d",
599			       skb_headroom(skb),
600			       skb_tailroom(skb),
601			       skb_headlen(skb),
602			       sp->remain);
603
604			skb->ip_summed = CHECKSUM_UNNECESSARY;
605		}
606
607		_debug("append");
608		sp = rxrpc_skb(skb);
609
610		/* append next segment of data to the current buffer */
611		if (msg_data_left(msg) > 0) {
612			int copy = skb_tailroom(skb);
613			ASSERTCMP(copy, >, 0);
614			if (copy > msg_data_left(msg))
615				copy = msg_data_left(msg);
616			if (copy > sp->remain)
617				copy = sp->remain;
618
619			_debug("add");
620			ret = skb_add_data(skb, &msg->msg_iter, copy);
621			_debug("added");
622			if (ret < 0)
623				goto efault;
624			sp->remain -= copy;
625			skb->mark += copy;
626			copied += copy;
627		}
628
629		/* check for the far side aborting the call or a network error
630		 * occurring */
631		if (call->state > RXRPC_CALL_COMPLETE)
632			goto call_aborted;
633
634		/* add the packet to the send queue if it's now full */
635		if (sp->remain <= 0 ||
636		    (msg_data_left(msg) == 0 && !more)) {
637			struct rxrpc_connection *conn = call->conn;
638			uint32_t seq;
639			size_t pad;
640
641			/* pad out if we're using security */
642			if (conn->security) {
643				pad = conn->security_size + skb->mark;
644				pad = conn->size_align - pad;
645				pad &= conn->size_align - 1;
646				_debug("pad %zu", pad);
647				if (pad)
648					memset(skb_put(skb, pad), 0, pad);
649			}
650
651			seq = atomic_inc_return(&call->sequence);
652
653			sp->hdr.epoch = conn->epoch;
654			sp->hdr.cid = call->cid;
655			sp->hdr.callNumber = call->call_id;
656			sp->hdr.seq = htonl(seq);
657			sp->hdr.serial =
658				htonl(atomic_inc_return(&conn->serial));
659			sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
660			sp->hdr.userStatus = 0;
661			sp->hdr.securityIndex = conn->security_ix;
662			sp->hdr._rsvd = 0;
663			sp->hdr.serviceId = conn->service_id;
664
665			sp->hdr.flags = conn->out_clientflag;
666			if (msg_data_left(msg) == 0 && !more)
667				sp->hdr.flags |= RXRPC_LAST_PACKET;
668			else if (CIRC_SPACE(call->acks_head, call->acks_tail,
669					    call->acks_winsz) > 1)
670				sp->hdr.flags |= RXRPC_MORE_PACKETS;
671			if (more && seq & 1)
672				sp->hdr.flags |= RXRPC_REQUEST_ACK;
673
674			ret = rxrpc_secure_packet(
675				call, skb, skb->mark,
676				skb->head + sizeof(struct rxrpc_header));
677			if (ret < 0)
678				goto out;
679
680			memcpy(skb->head, &sp->hdr,
681			       sizeof(struct rxrpc_header));
682			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
683			skb = NULL;
684		}
685	} while (msg_data_left(msg) > 0);
686
687success:
688	ret = copied;
689out:
690	call->tx_pending = skb;
691	_leave(" = %d", ret);
692	return ret;
693
694call_aborted:
695	rxrpc_free_skb(skb);
696	if (call->state == RXRPC_CALL_NETWORK_ERROR)
697		ret = call->conn->trans->peer->net_error;
698	else
699		ret = -ECONNABORTED;
700	_leave(" = %d", ret);
701	return ret;
702
703maybe_error:
704	if (copied)
705		goto success;
706	goto out;
707
708efault:
709	ret = -EFAULT;
710	goto out;
711}
712