1/* 2 * linux/net/ipv4/inet_lro.c 3 * 4 * Large Receive Offload (ipv4 / tcp) 5 * 6 * (C) Copyright IBM Corp. 2007 7 * 8 * Authors: 9 * Jan-Bernd Themann <themann@de.ibm.com> 10 * Christoph Raisch <raisch@de.ibm.com> 11 * 12 * 13 * This program is free software; you can redistribute it and/or modify 14 * it under the terms of the GNU General Public License as published by 15 * the Free Software Foundation; either version 2, or (at your option) 16 * any later version. 17 * 18 * This program is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 * GNU General Public License for more details. 22 * 23 * You should have received a copy of the GNU General Public License 24 * along with this program; if not, write to the Free Software 25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 26 */ 27 28 29#include <linux/module.h> 30#include <linux/if_vlan.h> 31#include <linux/inet_lro.h> 32#include <net/checksum.h> 33 34MODULE_LICENSE("GPL"); 35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); 36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); 37 38#define TCP_HDR_LEN(tcph) (tcph->doff << 2) 39#define IP_HDR_LEN(iph) (iph->ihl << 2) 40#define TCP_PAYLOAD_LENGTH(iph, tcph) \ 41 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) 42 43#define IPH_LEN_WO_OPTIONS 5 44#define TCPH_LEN_WO_OPTIONS 5 45#define TCPH_LEN_W_TIMESTAMP 8 46 47#define LRO_MAX_PG_HLEN 64 48 49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } 50 51/* 52 * Basic tcp checks whether packet is suitable for LRO 53 */ 54 55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph, 56 int len, const struct net_lro_desc *lro_desc) 57{ 58 /* check ip header: don't aggregate padded frames */ 59 if (ntohs(iph->tot_len) != len) 60 return -1; 61 62 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) 63 return -1; 64 65 if (iph->ihl != IPH_LEN_WO_OPTIONS) 66 return -1; 67 68 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || 69 tcph->rst || tcph->syn || tcph->fin) 70 return -1; 71 72 if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) 73 return -1; 74 75 if (tcph->doff != TCPH_LEN_WO_OPTIONS && 76 tcph->doff != TCPH_LEN_W_TIMESTAMP) 77 return -1; 78 79 /* check tcp options (only timestamp allowed) */ 80 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { 81 __be32 *topt = (__be32 *)(tcph + 1); 82 83 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 84 | (TCPOPT_TIMESTAMP << 8) 85 | TCPOLEN_TIMESTAMP)) 86 return -1; 87 88 /* timestamp should be in right order */ 89 topt++; 90 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), 91 ntohl(*topt))) 92 return -1; 93 94 /* timestamp reply should not be zero */ 95 topt++; 96 if (*topt == 0) 97 return -1; 98 } 99 100 return 0; 101} 102 103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) 104{ 105 struct iphdr *iph = lro_desc->iph; 106 struct tcphdr *tcph = lro_desc->tcph; 107 __be32 *p; 108 __wsum tcp_hdr_csum; 109 110 tcph->ack_seq = lro_desc->tcp_ack; 111 tcph->window = lro_desc->tcp_window; 112 113 if (lro_desc->tcp_saw_tstamp) { 114 p = (__be32 *)(tcph + 1); 115 *(p+2) = lro_desc->tcp_rcv_tsecr; 116 } 117 118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len)); 119 iph->tot_len = htons(lro_desc->ip_tot_len); 120 121 tcph->check = 0; 122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); 123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); 124 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 125 lro_desc->ip_tot_len - 126 IP_HDR_LEN(iph), IPPROTO_TCP, 127 lro_desc->data_csum); 128} 129 130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) 131{ 132 __wsum tcp_csum; 133 __wsum tcp_hdr_csum; 134 __wsum tcp_ps_hdr_csum; 135 136 tcp_csum = ~csum_unfold(tcph->check); 137 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); 138 139 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 140 len + TCP_HDR_LEN(tcph), 141 IPPROTO_TCP, 0); 142 143 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), 144 tcp_ps_hdr_csum); 145} 146 147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, 148 struct iphdr *iph, struct tcphdr *tcph) 149{ 150 int nr_frags; 151 __be32 *ptr; 152 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); 153 154 nr_frags = skb_shinfo(skb)->nr_frags; 155 lro_desc->parent = skb; 156 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); 157 lro_desc->iph = iph; 158 lro_desc->tcph = tcph; 159 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; 160 lro_desc->tcp_ack = tcph->ack_seq; 161 lro_desc->tcp_window = tcph->window; 162 163 lro_desc->pkt_aggr_cnt = 1; 164 lro_desc->ip_tot_len = ntohs(iph->tot_len); 165 166 if (tcph->doff == 8) { 167 ptr = (__be32 *)(tcph+1); 168 lro_desc->tcp_saw_tstamp = 1; 169 lro_desc->tcp_rcv_tsval = *(ptr+1); 170 lro_desc->tcp_rcv_tsecr = *(ptr+2); 171 } 172 173 lro_desc->mss = tcp_data_len; 174 lro_desc->active = 1; 175 176 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, 177 tcp_data_len); 178} 179 180static inline void lro_clear_desc(struct net_lro_desc *lro_desc) 181{ 182 memset(lro_desc, 0, sizeof(struct net_lro_desc)); 183} 184 185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, 186 struct tcphdr *tcph, int tcp_data_len) 187{ 188 struct sk_buff *parent = lro_desc->parent; 189 __be32 *topt; 190 191 lro_desc->pkt_aggr_cnt++; 192 lro_desc->ip_tot_len += tcp_data_len; 193 lro_desc->tcp_next_seq += tcp_data_len; 194 lro_desc->tcp_window = tcph->window; 195 lro_desc->tcp_ack = tcph->ack_seq; 196 197 /* don't update tcp_rcv_tsval, would not work with PAWS */ 198 if (lro_desc->tcp_saw_tstamp) { 199 topt = (__be32 *) (tcph + 1); 200 lro_desc->tcp_rcv_tsecr = *(topt + 2); 201 } 202 203 lro_desc->data_csum = csum_block_add(lro_desc->data_csum, 204 lro_tcp_data_csum(iph, tcph, 205 tcp_data_len), 206 parent->len); 207 208 parent->len += tcp_data_len; 209 parent->data_len += tcp_data_len; 210 if (tcp_data_len > lro_desc->mss) 211 lro_desc->mss = tcp_data_len; 212} 213 214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, 215 struct iphdr *iph, struct tcphdr *tcph) 216{ 217 struct sk_buff *parent = lro_desc->parent; 218 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); 219 220 lro_add_common(lro_desc, iph, tcph, tcp_data_len); 221 222 skb_pull(skb, (skb->len - tcp_data_len)); 223 parent->truesize += skb->truesize; 224 225 if (lro_desc->last_skb) 226 lro_desc->last_skb->next = skb; 227 else 228 skb_shinfo(parent)->frag_list = skb; 229 230 lro_desc->last_skb = skb; 231} 232 233 234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, 235 struct iphdr *iph, 236 struct tcphdr *tcph) 237{ 238 if ((lro_desc->iph->saddr != iph->saddr) || 239 (lro_desc->iph->daddr != iph->daddr) || 240 (lro_desc->tcph->source != tcph->source) || 241 (lro_desc->tcph->dest != tcph->dest)) 242 return -1; 243 return 0; 244} 245 246static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, 247 struct net_lro_desc *lro_arr, 248 struct iphdr *iph, 249 struct tcphdr *tcph) 250{ 251 struct net_lro_desc *lro_desc = NULL; 252 struct net_lro_desc *tmp; 253 int max_desc = lro_mgr->max_desc; 254 int i; 255 256 for (i = 0; i < max_desc; i++) { 257 tmp = &lro_arr[i]; 258 if (tmp->active) 259 if (!lro_check_tcp_conn(tmp, iph, tcph)) { 260 lro_desc = tmp; 261 goto out; 262 } 263 } 264 265 for (i = 0; i < max_desc; i++) { 266 if (!lro_arr[i].active) { 267 lro_desc = &lro_arr[i]; 268 goto out; 269 } 270 } 271 272 LRO_INC_STATS(lro_mgr, no_desc); 273out: 274 return lro_desc; 275} 276 277static void lro_flush(struct net_lro_mgr *lro_mgr, 278 struct net_lro_desc *lro_desc) 279{ 280 if (lro_desc->pkt_aggr_cnt > 1) 281 lro_update_tcp_ip_header(lro_desc); 282 283 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; 284 285 if (lro_mgr->features & LRO_F_NAPI) 286 netif_receive_skb(lro_desc->parent); 287 else 288 netif_rx(lro_desc->parent); 289 290 LRO_INC_STATS(lro_mgr, flushed); 291 lro_clear_desc(lro_desc); 292} 293 294static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, 295 void *priv) 296{ 297 struct net_lro_desc *lro_desc; 298 struct iphdr *iph; 299 struct tcphdr *tcph; 300 u64 flags; 301 int vlan_hdr_len = 0; 302 303 if (!lro_mgr->get_skb_header || 304 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, 305 &flags, priv)) 306 goto out; 307 308 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) 309 goto out; 310 311 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); 312 if (!lro_desc) 313 goto out; 314 315 if ((skb->protocol == htons(ETH_P_8021Q)) && 316 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) 317 vlan_hdr_len = VLAN_HLEN; 318 319 if (!lro_desc->active) { /* start new lro session */ 320 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) 321 goto out; 322 323 skb->ip_summed = lro_mgr->ip_summed_aggr; 324 lro_init_desc(lro_desc, skb, iph, tcph); 325 LRO_INC_STATS(lro_mgr, aggregated); 326 return 0; 327 } 328 329 if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) 330 goto out2; 331 332 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) 333 goto out2; 334 335 lro_add_packet(lro_desc, skb, iph, tcph); 336 LRO_INC_STATS(lro_mgr, aggregated); 337 338 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || 339 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) 340 lro_flush(lro_mgr, lro_desc); 341 342 return 0; 343 344out2: /* send aggregated SKBs to stack */ 345 lro_flush(lro_mgr, lro_desc); 346 347out: 348 return 1; 349} 350 351void lro_receive_skb(struct net_lro_mgr *lro_mgr, 352 struct sk_buff *skb, 353 void *priv) 354{ 355 if (__lro_proc_skb(lro_mgr, skb, priv)) { 356 if (lro_mgr->features & LRO_F_NAPI) 357 netif_receive_skb(skb); 358 else 359 netif_rx(skb); 360 } 361} 362EXPORT_SYMBOL(lro_receive_skb); 363 364void lro_flush_all(struct net_lro_mgr *lro_mgr) 365{ 366 int i; 367 struct net_lro_desc *lro_desc = lro_mgr->lro_arr; 368 369 for (i = 0; i < lro_mgr->max_desc; i++) { 370 if (lro_desc[i].active) 371 lro_flush(lro_mgr, &lro_desc[i]); 372 } 373} 374EXPORT_SYMBOL(lro_flush_all); 375