root/net/ipv6/reassembly.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip6_frag_ecn
  2. ip6_frag_expire
  3. fq_find
  4. ip6_frag_queue
  5. ip6_frag_reasm
  6. ipv6_frag_rcv
  7. ip6_frags_ns_sysctl_register
  8. ip6_frags_ns_sysctl_unregister
  9. ip6_frags_sysctl_register
  10. ip6_frags_sysctl_unregister
  11. ip6_frags_ns_sysctl_register
  12. ip6_frags_ns_sysctl_unregister
  13. ip6_frags_sysctl_register
  14. ip6_frags_sysctl_unregister
  15. ipv6_frags_init_net
  16. ipv6_frags_pre_exit_net
  17. ipv6_frags_exit_net
  18. ipv6_frag_init
  19. ipv6_frag_exit

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *      IPv6 fragment reassembly
   4  *      Linux INET6 implementation
   5  *
   6  *      Authors:
   7  *      Pedro Roque             <roque@di.fc.ul.pt>
   8  *
   9  *      Based on: net/ipv4/ip_fragment.c
  10  */
  11 
  12 /*
  13  *      Fixes:
  14  *      Andi Kleen      Make it work with multiple hosts.
  15  *                      More RFC compliance.
  16  *
  17  *      Horst von Brand Add missing #include <linux/string.h>
  18  *      Alexey Kuznetsov        SMP races, threading, cleanup.
  19  *      Patrick McHardy         LRU queue of frag heads for evictor.
  20  *      Mitsuru KANDA @USAGI    Register inet6_protocol{}.
  21  *      David Stevens and
  22  *      YOSHIFUJI,H. @USAGI     Always remove fragment header to
  23  *                              calculate ICV correctly.
  24  */
  25 
  26 #define pr_fmt(fmt) "IPv6: " fmt
  27 
  28 #include <linux/errno.h>
  29 #include <linux/types.h>
  30 #include <linux/string.h>
  31 #include <linux/socket.h>
  32 #include <linux/sockios.h>
  33 #include <linux/jiffies.h>
  34 #include <linux/net.h>
  35 #include <linux/list.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/in6.h>
  38 #include <linux/ipv6.h>
  39 #include <linux/icmpv6.h>
  40 #include <linux/random.h>
  41 #include <linux/jhash.h>
  42 #include <linux/skbuff.h>
  43 #include <linux/slab.h>
  44 #include <linux/export.h>
  45 
  46 #include <net/sock.h>
  47 #include <net/snmp.h>
  48 
  49 #include <net/ipv6.h>
  50 #include <net/ip6_route.h>
  51 #include <net/protocol.h>
  52 #include <net/transp_v6.h>
  53 #include <net/rawv6.h>
  54 #include <net/ndisc.h>
  55 #include <net/addrconf.h>
  56 #include <net/ipv6_frag.h>
  57 #include <net/inet_ecn.h>
  58 
  59 static const char ip6_frag_cache_name[] = "ip6-frags";
  60 
  61 static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
  62 {
  63         return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
  64 }
  65 
  66 static struct inet_frags ip6_frags;
  67 
  68 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
  69                           struct sk_buff *prev_tail, struct net_device *dev);
  70 
  71 static void ip6_frag_expire(struct timer_list *t)
  72 {
  73         struct inet_frag_queue *frag = from_timer(frag, t, timer);
  74         struct frag_queue *fq;
  75 
  76         fq = container_of(frag, struct frag_queue, q);
  77 
  78         ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
  79 }
  80 
  81 static struct frag_queue *
  82 fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
  83 {
  84         struct frag_v6_compare_key key = {
  85                 .id = id,
  86                 .saddr = hdr->saddr,
  87                 .daddr = hdr->daddr,
  88                 .user = IP6_DEFRAG_LOCAL_DELIVER,
  89                 .iif = iif,
  90         };
  91         struct inet_frag_queue *q;
  92 
  93         if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
  94                                             IPV6_ADDR_LINKLOCAL)))
  95                 key.iif = 0;
  96 
  97         q = inet_frag_find(net->ipv6.fqdir, &key);
  98         if (!q)
  99                 return NULL;
 100 
 101         return container_of(q, struct frag_queue, q);
 102 }
 103 
 104 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 105                           struct frag_hdr *fhdr, int nhoff,
 106                           u32 *prob_offset)
 107 {
 108         struct net *net = dev_net(skb_dst(skb)->dev);
 109         int offset, end, fragsize;
 110         struct sk_buff *prev_tail;
 111         struct net_device *dev;
 112         int err = -ENOENT;
 113         u8 ecn;
 114 
 115         if (fq->q.flags & INET_FRAG_COMPLETE)
 116                 goto err;
 117 
 118         err = -EINVAL;
 119         offset = ntohs(fhdr->frag_off) & ~0x7;
 120         end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
 121                         ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 122 
 123         if ((unsigned int)end > IPV6_MAXPLEN) {
 124                 *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
 125                 /* note that if prob_offset is set, the skb is freed elsewhere,
 126                  * we do not free it here.
 127                  */
 128                 return -1;
 129         }
 130 
 131         ecn = ip6_frag_ecn(ipv6_hdr(skb));
 132 
 133         if (skb->ip_summed == CHECKSUM_COMPLETE) {
 134                 const unsigned char *nh = skb_network_header(skb);
 135                 skb->csum = csum_sub(skb->csum,
 136                                      csum_partial(nh, (u8 *)(fhdr + 1) - nh,
 137                                                   0));
 138         }
 139 
 140         /* Is this the final fragment? */
 141         if (!(fhdr->frag_off & htons(IP6_MF))) {
 142                 /* If we already have some bits beyond end
 143                  * or have different end, the segment is corrupted.
 144                  */
 145                 if (end < fq->q.len ||
 146                     ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
 147                         goto discard_fq;
 148                 fq->q.flags |= INET_FRAG_LAST_IN;
 149                 fq->q.len = end;
 150         } else {
 151                 /* Check if the fragment is rounded to 8 bytes.
 152                  * Required by the RFC.
 153                  */
 154                 if (end & 0x7) {
 155                         /* RFC2460 says always send parameter problem in
 156                          * this case. -DaveM
 157                          */
 158                         *prob_offset = offsetof(struct ipv6hdr, payload_len);
 159                         return -1;
 160                 }
 161                 if (end > fq->q.len) {
 162                         /* Some bits beyond end -> corruption. */
 163                         if (fq->q.flags & INET_FRAG_LAST_IN)
 164                                 goto discard_fq;
 165                         fq->q.len = end;
 166                 }
 167         }
 168 
 169         if (end == offset)
 170                 goto discard_fq;
 171 
 172         err = -ENOMEM;
 173         /* Point into the IP datagram 'data' part. */
 174         if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
 175                 goto discard_fq;
 176 
 177         err = pskb_trim_rcsum(skb, end - offset);
 178         if (err)
 179                 goto discard_fq;
 180 
 181         /* Note : skb->rbnode and skb->dev share the same location. */
 182         dev = skb->dev;
 183         /* Makes sure compiler wont do silly aliasing games */
 184         barrier();
 185 
 186         prev_tail = fq->q.fragments_tail;
 187         err = inet_frag_queue_insert(&fq->q, skb, offset, end);
 188         if (err)
 189                 goto insert_error;
 190 
 191         if (dev)
 192                 fq->iif = dev->ifindex;
 193 
 194         fq->q.stamp = skb->tstamp;
 195         fq->q.meat += skb->len;
 196         fq->ecn |= ecn;
 197         add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 198 
 199         fragsize = -skb_network_offset(skb) + skb->len;
 200         if (fragsize > fq->q.max_size)
 201                 fq->q.max_size = fragsize;
 202 
 203         /* The first fragment.
 204          * nhoffset is obtained from the first fragment, of course.
 205          */
 206         if (offset == 0) {
 207                 fq->nhoffset = nhoff;
 208                 fq->q.flags |= INET_FRAG_FIRST_IN;
 209         }
 210 
 211         if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 212             fq->q.meat == fq->q.len) {
 213                 unsigned long orefdst = skb->_skb_refdst;
 214 
 215                 skb->_skb_refdst = 0UL;
 216                 err = ip6_frag_reasm(fq, skb, prev_tail, dev);
 217                 skb->_skb_refdst = orefdst;
 218                 return err;
 219         }
 220 
 221         skb_dst_drop(skb);
 222         return -EINPROGRESS;
 223 
 224 insert_error:
 225         if (err == IPFRAG_DUP) {
 226                 kfree_skb(skb);
 227                 return -EINVAL;
 228         }
 229         err = -EINVAL;
 230         __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 231                         IPSTATS_MIB_REASM_OVERLAPS);
 232 discard_fq:
 233         inet_frag_kill(&fq->q);
 234         __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 235                         IPSTATS_MIB_REASMFAILS);
 236 err:
 237         kfree_skb(skb);
 238         return err;
 239 }
 240 
 241 /*
 242  *      Check if this packet is complete.
 243  *
 244  *      It is called with locked fq, and caller must check that
 245  *      queue is eligible for reassembly i.e. it is not COMPLETE,
 246  *      the last and the first frames arrived and all the bits are here.
 247  */
 248 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 249                           struct sk_buff *prev_tail, struct net_device *dev)
 250 {
 251         struct net *net = fq->q.fqdir->net;
 252         unsigned int nhoff;
 253         void *reasm_data;
 254         int payload_len;
 255         u8 ecn;
 256 
 257         inet_frag_kill(&fq->q);
 258 
 259         ecn = ip_frag_ecn_table[fq->ecn];
 260         if (unlikely(ecn == 0xff))
 261                 goto out_fail;
 262 
 263         reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
 264         if (!reasm_data)
 265                 goto out_oom;
 266 
 267         payload_len = ((skb->data - skb_network_header(skb)) -
 268                        sizeof(struct ipv6hdr) + fq->q.len -
 269                        sizeof(struct frag_hdr));
 270         if (payload_len > IPV6_MAXPLEN)
 271                 goto out_oversize;
 272 
 273         /* We have to remove fragment header from datagram and to relocate
 274          * header in order to calculate ICV correctly. */
 275         nhoff = fq->nhoffset;
 276         skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
 277         memmove(skb->head + sizeof(struct frag_hdr), skb->head,
 278                 (skb->data - skb->head) - sizeof(struct frag_hdr));
 279         if (skb_mac_header_was_set(skb))
 280                 skb->mac_header += sizeof(struct frag_hdr);
 281         skb->network_header += sizeof(struct frag_hdr);
 282 
 283         skb_reset_transport_header(skb);
 284 
 285         inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
 286 
 287         skb->dev = dev;
 288         ipv6_hdr(skb)->payload_len = htons(payload_len);
 289         ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
 290         IP6CB(skb)->nhoff = nhoff;
 291         IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 292         IP6CB(skb)->frag_max_size = fq->q.max_size;
 293 
 294         /* Yes, and fold redundant checksum back. 8) */
 295         skb_postpush_rcsum(skb, skb_network_header(skb),
 296                            skb_network_header_len(skb));
 297 
 298         rcu_read_lock();
 299         __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
 300         rcu_read_unlock();
 301         fq->q.rb_fragments = RB_ROOT;
 302         fq->q.fragments_tail = NULL;
 303         fq->q.last_run_head = NULL;
 304         return 1;
 305 
 306 out_oversize:
 307         net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
 308         goto out_fail;
 309 out_oom:
 310         net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
 311 out_fail:
 312         rcu_read_lock();
 313         __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
 314         rcu_read_unlock();
 315         inet_frag_kill(&fq->q);
 316         return -1;
 317 }
 318 
 319 static int ipv6_frag_rcv(struct sk_buff *skb)
 320 {
 321         struct frag_hdr *fhdr;
 322         struct frag_queue *fq;
 323         const struct ipv6hdr *hdr = ipv6_hdr(skb);
 324         struct net *net = dev_net(skb_dst(skb)->dev);
 325         int iif;
 326 
 327         if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
 328                 goto fail_hdr;
 329 
 330         __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 331 
 332         /* Jumbo payload inhibits frag. header */
 333         if (hdr->payload_len == 0)
 334                 goto fail_hdr;
 335 
 336         if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
 337                                  sizeof(struct frag_hdr))))
 338                 goto fail_hdr;
 339 
 340         hdr = ipv6_hdr(skb);
 341         fhdr = (struct frag_hdr *)skb_transport_header(skb);
 342 
 343         if (!(fhdr->frag_off & htons(0xFFF9))) {
 344                 /* It is not a fragmented frame */
 345                 skb->transport_header += sizeof(struct frag_hdr);
 346                 __IP6_INC_STATS(net,
 347                                 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 348 
 349                 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 350                 IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 351                 return 1;
 352         }
 353 
 354         iif = skb->dev ? skb->dev->ifindex : 0;
 355         fq = fq_find(net, fhdr->identification, hdr, iif);
 356         if (fq) {
 357                 u32 prob_offset = 0;
 358                 int ret;
 359 
 360                 spin_lock(&fq->q.lock);
 361 
 362                 fq->iif = iif;
 363                 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
 364                                      &prob_offset);
 365 
 366                 spin_unlock(&fq->q.lock);
 367                 inet_frag_put(&fq->q);
 368                 if (prob_offset) {
 369                         __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 370                                         IPSTATS_MIB_INHDRERRORS);
 371                         /* icmpv6_param_prob() calls kfree_skb(skb) */
 372                         icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
 373                 }
 374                 return ret;
 375         }
 376 
 377         __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
 378         kfree_skb(skb);
 379         return -1;
 380 
 381 fail_hdr:
 382         __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 383                         IPSTATS_MIB_INHDRERRORS);
 384         icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
 385         return -1;
 386 }
 387 
 388 static const struct inet6_protocol frag_protocol = {
 389         .handler        =       ipv6_frag_rcv,
 390         .flags          =       INET6_PROTO_NOPOLICY,
 391 };
 392 
 393 #ifdef CONFIG_SYSCTL
 394 
 395 static struct ctl_table ip6_frags_ns_ctl_table[] = {
 396         {
 397                 .procname       = "ip6frag_high_thresh",
 398                 .maxlen         = sizeof(unsigned long),
 399                 .mode           = 0644,
 400                 .proc_handler   = proc_doulongvec_minmax,
 401         },
 402         {
 403                 .procname       = "ip6frag_low_thresh",
 404                 .maxlen         = sizeof(unsigned long),
 405                 .mode           = 0644,
 406                 .proc_handler   = proc_doulongvec_minmax,
 407         },
 408         {
 409                 .procname       = "ip6frag_time",
 410                 .maxlen         = sizeof(int),
 411                 .mode           = 0644,
 412                 .proc_handler   = proc_dointvec_jiffies,
 413         },
 414         { }
 415 };
 416 
 417 /* secret interval has been deprecated */
 418 static int ip6_frags_secret_interval_unused;
 419 static struct ctl_table ip6_frags_ctl_table[] = {
 420         {
 421                 .procname       = "ip6frag_secret_interval",
 422                 .data           = &ip6_frags_secret_interval_unused,
 423                 .maxlen         = sizeof(int),
 424                 .mode           = 0644,
 425                 .proc_handler   = proc_dointvec_jiffies,
 426         },
 427         { }
 428 };
 429 
 430 static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 431 {
 432         struct ctl_table *table;
 433         struct ctl_table_header *hdr;
 434 
 435         table = ip6_frags_ns_ctl_table;
 436         if (!net_eq(net, &init_net)) {
 437                 table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
 438                 if (!table)
 439                         goto err_alloc;
 440 
 441         }
 442         table[0].data   = &net->ipv6.fqdir->high_thresh;
 443         table[0].extra1 = &net->ipv6.fqdir->low_thresh;
 444         table[1].data   = &net->ipv6.fqdir->low_thresh;
 445         table[1].extra2 = &net->ipv6.fqdir->high_thresh;
 446         table[2].data   = &net->ipv6.fqdir->timeout;
 447 
 448         hdr = register_net_sysctl(net, "net/ipv6", table);
 449         if (!hdr)
 450                 goto err_reg;
 451 
 452         net->ipv6.sysctl.frags_hdr = hdr;
 453         return 0;
 454 
 455 err_reg:
 456         if (!net_eq(net, &init_net))
 457                 kfree(table);
 458 err_alloc:
 459         return -ENOMEM;
 460 }
 461 
 462 static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
 463 {
 464         struct ctl_table *table;
 465 
 466         table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
 467         unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
 468         if (!net_eq(net, &init_net))
 469                 kfree(table);
 470 }
 471 
 472 static struct ctl_table_header *ip6_ctl_header;
 473 
 474 static int ip6_frags_sysctl_register(void)
 475 {
 476         ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
 477                         ip6_frags_ctl_table);
 478         return ip6_ctl_header == NULL ? -ENOMEM : 0;
 479 }
 480 
 481 static void ip6_frags_sysctl_unregister(void)
 482 {
 483         unregister_net_sysctl_table(ip6_ctl_header);
 484 }
 485 #else
 486 static int ip6_frags_ns_sysctl_register(struct net *net)
 487 {
 488         return 0;
 489 }
 490 
 491 static void ip6_frags_ns_sysctl_unregister(struct net *net)
 492 {
 493 }
 494 
 495 static int ip6_frags_sysctl_register(void)
 496 {
 497         return 0;
 498 }
 499 
 500 static void ip6_frags_sysctl_unregister(void)
 501 {
 502 }
 503 #endif
 504 
 505 static int __net_init ipv6_frags_init_net(struct net *net)
 506 {
 507         int res;
 508 
 509         res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
 510         if (res < 0)
 511                 return res;
 512 
 513         net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
 514         net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
 515         net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
 516 
 517         res = ip6_frags_ns_sysctl_register(net);
 518         if (res < 0)
 519                 fqdir_exit(net->ipv6.fqdir);
 520         return res;
 521 }
 522 
 523 static void __net_exit ipv6_frags_pre_exit_net(struct net *net)
 524 {
 525         fqdir_pre_exit(net->ipv6.fqdir);
 526 }
 527 
 528 static void __net_exit ipv6_frags_exit_net(struct net *net)
 529 {
 530         ip6_frags_ns_sysctl_unregister(net);
 531         fqdir_exit(net->ipv6.fqdir);
 532 }
 533 
 534 static struct pernet_operations ip6_frags_ops = {
 535         .init           = ipv6_frags_init_net,
 536         .pre_exit       = ipv6_frags_pre_exit_net,
 537         .exit           = ipv6_frags_exit_net,
 538 };
 539 
 540 static const struct rhashtable_params ip6_rhash_params = {
 541         .head_offset            = offsetof(struct inet_frag_queue, node),
 542         .hashfn                 = ip6frag_key_hashfn,
 543         .obj_hashfn             = ip6frag_obj_hashfn,
 544         .obj_cmpfn              = ip6frag_obj_cmpfn,
 545         .automatic_shrinking    = true,
 546 };
 547 
 548 int __init ipv6_frag_init(void)
 549 {
 550         int ret;
 551 
 552         ip6_frags.constructor = ip6frag_init;
 553         ip6_frags.destructor = NULL;
 554         ip6_frags.qsize = sizeof(struct frag_queue);
 555         ip6_frags.frag_expire = ip6_frag_expire;
 556         ip6_frags.frags_cache_name = ip6_frag_cache_name;
 557         ip6_frags.rhash_params = ip6_rhash_params;
 558         ret = inet_frags_init(&ip6_frags);
 559         if (ret)
 560                 goto out;
 561 
 562         ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 563         if (ret)
 564                 goto err_protocol;
 565 
 566         ret = ip6_frags_sysctl_register();
 567         if (ret)
 568                 goto err_sysctl;
 569 
 570         ret = register_pernet_subsys(&ip6_frags_ops);
 571         if (ret)
 572                 goto err_pernet;
 573 
 574 out:
 575         return ret;
 576 
 577 err_pernet:
 578         ip6_frags_sysctl_unregister();
 579 err_sysctl:
 580         inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 581 err_protocol:
 582         inet_frags_fini(&ip6_frags);
 583         goto out;
 584 }
 585 
 586 void ipv6_frag_exit(void)
 587 {
 588         ip6_frags_sysctl_unregister();
 589         unregister_pernet_subsys(&ip6_frags_ops);
 590         inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 591         inet_frags_fini(&ip6_frags);
 592 }

/* [<][>][^][v][top][bottom][index][help] */