1/*
2 *  linux/net/ipv4/inet_lro.c
3 *
4 *  Large Receive Offload (ipv4 / tcp)
5 *
6 *  (C) Copyright IBM Corp. 2007
7 *
8 *  Authors:
9 *       Jan-Bernd Themann <themann@de.ibm.com>
10 *       Christoph Raisch <raisch@de.ibm.com>
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 */
27
28
29#include <linux/module.h>
30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h>
32#include <net/checksum.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37
38#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39#define IP_HDR_LEN(iph) (iph->ihl << 2)
40#define TCP_PAYLOAD_LENGTH(iph, tcph) \
41	(ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42
43#define IPH_LEN_WO_OPTIONS 5
44#define TCPH_LEN_WO_OPTIONS 5
45#define TCPH_LEN_W_TIMESTAMP 8
46
47#define LRO_MAX_PG_HLEN 64
48
49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
50
51/*
52 * Basic tcp checks whether packet is suitable for LRO
53 */
54
55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56			    int len, const struct net_lro_desc *lro_desc)
57{
58        /* check ip header: don't aggregate padded frames */
59	if (ntohs(iph->tot_len) != len)
60		return -1;
61
62	if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
63		return -1;
64
65	if (iph->ihl != IPH_LEN_WO_OPTIONS)
66		return -1;
67
68	if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69	    tcph->rst || tcph->syn || tcph->fin)
70		return -1;
71
72	if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
73		return -1;
74
75	if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76	    tcph->doff != TCPH_LEN_W_TIMESTAMP)
77		return -1;
78
79	/* check tcp options (only timestamp allowed) */
80	if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81		__be32 *topt = (__be32 *)(tcph + 1);
82
83		if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84				   | (TCPOPT_TIMESTAMP << 8)
85				   | TCPOLEN_TIMESTAMP))
86			return -1;
87
88		/* timestamp should be in right order */
89		topt++;
90		if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
91				      ntohl(*topt)))
92			return -1;
93
94		/* timestamp reply should not be zero */
95		topt++;
96		if (*topt == 0)
97			return -1;
98	}
99
100	return 0;
101}
102
103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
104{
105	struct iphdr *iph = lro_desc->iph;
106	struct tcphdr *tcph = lro_desc->tcph;
107	__be32 *p;
108	__wsum tcp_hdr_csum;
109
110	tcph->ack_seq = lro_desc->tcp_ack;
111	tcph->window = lro_desc->tcp_window;
112
113	if (lro_desc->tcp_saw_tstamp) {
114		p = (__be32 *)(tcph + 1);
115		*(p+2) = lro_desc->tcp_rcv_tsecr;
116	}
117
118	csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119	iph->tot_len = htons(lro_desc->ip_tot_len);
120
121	tcph->check = 0;
122	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125					lro_desc->ip_tot_len -
126					IP_HDR_LEN(iph), IPPROTO_TCP,
127					lro_desc->data_csum);
128}
129
130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
131{
132	__wsum tcp_csum;
133	__wsum tcp_hdr_csum;
134	__wsum tcp_ps_hdr_csum;
135
136	tcp_csum = ~csum_unfold(tcph->check);
137	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
138
139	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140					     len + TCP_HDR_LEN(tcph),
141					     IPPROTO_TCP, 0);
142
143	return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
144			tcp_ps_hdr_csum);
145}
146
147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148			  struct iphdr *iph, struct tcphdr *tcph)
149{
150	int nr_frags;
151	__be32 *ptr;
152	u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
153
154	nr_frags = skb_shinfo(skb)->nr_frags;
155	lro_desc->parent = skb;
156	lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
157	lro_desc->iph = iph;
158	lro_desc->tcph = tcph;
159	lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160	lro_desc->tcp_ack = tcph->ack_seq;
161	lro_desc->tcp_window = tcph->window;
162
163	lro_desc->pkt_aggr_cnt = 1;
164	lro_desc->ip_tot_len = ntohs(iph->tot_len);
165
166	if (tcph->doff == 8) {
167		ptr = (__be32 *)(tcph+1);
168		lro_desc->tcp_saw_tstamp = 1;
169		lro_desc->tcp_rcv_tsval = *(ptr+1);
170		lro_desc->tcp_rcv_tsecr = *(ptr+2);
171	}
172
173	lro_desc->mss = tcp_data_len;
174	lro_desc->active = 1;
175
176	lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
177						tcp_data_len);
178}
179
180static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
181{
182	memset(lro_desc, 0, sizeof(struct net_lro_desc));
183}
184
185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186			   struct tcphdr *tcph, int tcp_data_len)
187{
188	struct sk_buff *parent = lro_desc->parent;
189	__be32 *topt;
190
191	lro_desc->pkt_aggr_cnt++;
192	lro_desc->ip_tot_len += tcp_data_len;
193	lro_desc->tcp_next_seq += tcp_data_len;
194	lro_desc->tcp_window = tcph->window;
195	lro_desc->tcp_ack = tcph->ack_seq;
196
197	/* don't update tcp_rcv_tsval, would not work with PAWS */
198	if (lro_desc->tcp_saw_tstamp) {
199		topt = (__be32 *) (tcph + 1);
200		lro_desc->tcp_rcv_tsecr = *(topt + 2);
201	}
202
203	lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204					     lro_tcp_data_csum(iph, tcph,
205							       tcp_data_len),
206					     parent->len);
207
208	parent->len += tcp_data_len;
209	parent->data_len += tcp_data_len;
210	if (tcp_data_len > lro_desc->mss)
211		lro_desc->mss = tcp_data_len;
212}
213
214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215			   struct iphdr *iph, struct tcphdr *tcph)
216{
217	struct sk_buff *parent = lro_desc->parent;
218	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
219
220	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
221
222	skb_pull(skb, (skb->len - tcp_data_len));
223	parent->truesize += skb->truesize;
224
225	if (lro_desc->last_skb)
226		lro_desc->last_skb->next = skb;
227	else
228		skb_shinfo(parent)->frag_list = skb;
229
230	lro_desc->last_skb = skb;
231}
232
233
234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
235			      struct iphdr *iph,
236			      struct tcphdr *tcph)
237{
238	if ((lro_desc->iph->saddr != iph->saddr) ||
239	    (lro_desc->iph->daddr != iph->daddr) ||
240	    (lro_desc->tcph->source != tcph->source) ||
241	    (lro_desc->tcph->dest != tcph->dest))
242		return -1;
243	return 0;
244}
245
246static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
247					 struct net_lro_desc *lro_arr,
248					 struct iphdr *iph,
249					 struct tcphdr *tcph)
250{
251	struct net_lro_desc *lro_desc = NULL;
252	struct net_lro_desc *tmp;
253	int max_desc = lro_mgr->max_desc;
254	int i;
255
256	for (i = 0; i < max_desc; i++) {
257		tmp = &lro_arr[i];
258		if (tmp->active)
259			if (!lro_check_tcp_conn(tmp, iph, tcph)) {
260				lro_desc = tmp;
261				goto out;
262			}
263	}
264
265	for (i = 0; i < max_desc; i++) {
266		if (!lro_arr[i].active) {
267			lro_desc = &lro_arr[i];
268			goto out;
269		}
270	}
271
272	LRO_INC_STATS(lro_mgr, no_desc);
273out:
274	return lro_desc;
275}
276
277static void lro_flush(struct net_lro_mgr *lro_mgr,
278		      struct net_lro_desc *lro_desc)
279{
280	if (lro_desc->pkt_aggr_cnt > 1)
281		lro_update_tcp_ip_header(lro_desc);
282
283	skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
284
285	if (lro_mgr->features & LRO_F_NAPI)
286		netif_receive_skb(lro_desc->parent);
287	else
288		netif_rx(lro_desc->parent);
289
290	LRO_INC_STATS(lro_mgr, flushed);
291	lro_clear_desc(lro_desc);
292}
293
294static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
295			  void *priv)
296{
297	struct net_lro_desc *lro_desc;
298	struct iphdr *iph;
299	struct tcphdr *tcph;
300	u64 flags;
301	int vlan_hdr_len = 0;
302
303	if (!lro_mgr->get_skb_header ||
304	    lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
305				    &flags, priv))
306		goto out;
307
308	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
309		goto out;
310
311	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
312	if (!lro_desc)
313		goto out;
314
315	if ((skb->protocol == htons(ETH_P_8021Q)) &&
316	    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
317		vlan_hdr_len = VLAN_HLEN;
318
319	if (!lro_desc->active) { /* start new lro session */
320		if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
321			goto out;
322
323		skb->ip_summed = lro_mgr->ip_summed_aggr;
324		lro_init_desc(lro_desc, skb, iph, tcph);
325		LRO_INC_STATS(lro_mgr, aggregated);
326		return 0;
327	}
328
329	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
330		goto out2;
331
332	if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
333		goto out2;
334
335	lro_add_packet(lro_desc, skb, iph, tcph);
336	LRO_INC_STATS(lro_mgr, aggregated);
337
338	if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
339	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
340		lro_flush(lro_mgr, lro_desc);
341
342	return 0;
343
344out2: /* send aggregated SKBs to stack */
345	lro_flush(lro_mgr, lro_desc);
346
347out:
348	return 1;
349}
350
351void lro_receive_skb(struct net_lro_mgr *lro_mgr,
352		     struct sk_buff *skb,
353		     void *priv)
354{
355	if (__lro_proc_skb(lro_mgr, skb, priv)) {
356		if (lro_mgr->features & LRO_F_NAPI)
357			netif_receive_skb(skb);
358		else
359			netif_rx(skb);
360	}
361}
362EXPORT_SYMBOL(lro_receive_skb);
363
364void lro_flush_all(struct net_lro_mgr *lro_mgr)
365{
366	int i;
367	struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
368
369	for (i = 0; i < lro_mgr->max_desc; i++) {
370		if (lro_desc[i].active)
371			lro_flush(lro_mgr, &lro_desc[i]);
372	}
373}
374EXPORT_SYMBOL(lro_flush_all);
375