root/net/ipv4/netfilter/ipt_CLUSTERIP.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. clusterip_pernet
  2. clusterip_config_get
  3. clusterip_config_rcu_free
  4. clusterip_config_put
  5. clusterip_config_entry_put
  6. __clusterip_config_find
  7. clusterip_config_find_get
  8. clusterip_config_init_nodelist
  9. clusterip_netdev_event
  10. clusterip_config_init
  11. clusterip_add_node
  12. clusterip_del_node
  13. clusterip_hashfn
  14. clusterip_responsible
  15. clusterip_tg
  16. clusterip_tg_check
  17. clusterip_tg_destroy
  18. arp_print
  19. arp_mangle
  20. clusterip_seq_start
  21. clusterip_seq_next
  22. clusterip_seq_stop
  23. clusterip_seq_show
  24. clusterip_proc_open
  25. clusterip_proc_release
  26. clusterip_proc_write
  27. clusterip_net_init
  28. clusterip_net_exit
  29. clusterip_tg_init
  30. clusterip_tg_exit

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Cluster IP hashmark target
   3  * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
   4  * based on ideas of Fabio Olive Leite <olive@unixforge.org>
   5  *
   6  * Development of this code funded by SuSE Linux AG, http://www.suse.com/
   7  */
   8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9 #include <linux/module.h>
  10 #include <linux/proc_fs.h>
  11 #include <linux/jhash.h>
  12 #include <linux/bitops.h>
  13 #include <linux/skbuff.h>
  14 #include <linux/slab.h>
  15 #include <linux/ip.h>
  16 #include <linux/tcp.h>
  17 #include <linux/udp.h>
  18 #include <linux/icmp.h>
  19 #include <linux/if_arp.h>
  20 #include <linux/seq_file.h>
  21 #include <linux/refcount.h>
  22 #include <linux/netfilter_arp.h>
  23 #include <linux/netfilter/x_tables.h>
  24 #include <linux/netfilter_ipv4/ip_tables.h>
  25 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
  26 #include <net/netfilter/nf_conntrack.h>
  27 #include <net/net_namespace.h>
  28 #include <net/netns/generic.h>
  29 #include <net/checksum.h>
  30 #include <net/ip.h>
  31 
  32 #define CLUSTERIP_VERSION "0.8"
  33 
  34 MODULE_LICENSE("GPL");
  35 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
  36 MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
  37 
  38 struct clusterip_config {
  39         struct list_head list;                  /* list of all configs */
  40         refcount_t refcount;                    /* reference count */
  41         refcount_t entries;                     /* number of entries/rules
  42                                                  * referencing us */
  43 
  44         __be32 clusterip;                       /* the IP address */
  45         u_int8_t clustermac[ETH_ALEN];          /* the MAC address */
  46         int ifindex;                            /* device ifindex */
  47         u_int16_t num_total_nodes;              /* total number of nodes */
  48         unsigned long local_nodes;              /* node number array */
  49 
  50 #ifdef CONFIG_PROC_FS
  51         struct proc_dir_entry *pde;             /* proc dir entry */
  52 #endif
  53         enum clusterip_hashmode hash_mode;      /* which hashing mode */
  54         u_int32_t hash_initval;                 /* hash initialization */
  55         struct rcu_head rcu;                    /* for call_rcu */
  56         struct net *net;                        /* netns for pernet list */
  57         char ifname[IFNAMSIZ];                  /* device ifname */
  58 };
  59 
  60 #ifdef CONFIG_PROC_FS
  61 static const struct file_operations clusterip_proc_fops;
  62 #endif
  63 
  64 struct clusterip_net {
  65         struct list_head configs;
  66         /* lock protects the configs list */
  67         spinlock_t lock;
  68 
  69 #ifdef CONFIG_PROC_FS
  70         struct proc_dir_entry *procdir;
  71         /* mutex protects the config->pde*/
  72         struct mutex mutex;
  73 #endif
  74 };
  75 
  76 static unsigned int clusterip_net_id __read_mostly;
  77 static inline struct clusterip_net *clusterip_pernet(struct net *net)
  78 {
  79         return net_generic(net, clusterip_net_id);
  80 }
  81 
  82 static inline void
  83 clusterip_config_get(struct clusterip_config *c)
  84 {
  85         refcount_inc(&c->refcount);
  86 }
  87 
  88 static void clusterip_config_rcu_free(struct rcu_head *head)
  89 {
  90         struct clusterip_config *config;
  91         struct net_device *dev;
  92 
  93         config = container_of(head, struct clusterip_config, rcu);
  94         dev = dev_get_by_name(config->net, config->ifname);
  95         if (dev) {
  96                 dev_mc_del(dev, config->clustermac);
  97                 dev_put(dev);
  98         }
  99         kfree(config);
 100 }
 101 
 102 static inline void
 103 clusterip_config_put(struct clusterip_config *c)
 104 {
 105         if (refcount_dec_and_test(&c->refcount))
 106                 call_rcu(&c->rcu, clusterip_config_rcu_free);
 107 }
 108 
 109 /* decrease the count of entries using/referencing this config.  If last
 110  * entry(rule) is removed, remove the config from lists, but don't free it
 111  * yet, since proc-files could still be holding references */
 112 static inline void
 113 clusterip_config_entry_put(struct clusterip_config *c)
 114 {
 115         struct clusterip_net *cn = clusterip_pernet(c->net);
 116 
 117         local_bh_disable();
 118         if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
 119                 list_del_rcu(&c->list);
 120                 spin_unlock(&cn->lock);
 121                 local_bh_enable();
 122                 /* In case anyone still accesses the file, the open/close
 123                  * functions are also incrementing the refcount on their own,
 124                  * so it's safe to remove the entry even if it's in use. */
 125 #ifdef CONFIG_PROC_FS
 126                 mutex_lock(&cn->mutex);
 127                 if (cn->procdir)
 128                         proc_remove(c->pde);
 129                 mutex_unlock(&cn->mutex);
 130 #endif
 131                 return;
 132         }
 133         local_bh_enable();
 134 }
 135 
 136 static struct clusterip_config *
 137 __clusterip_config_find(struct net *net, __be32 clusterip)
 138 {
 139         struct clusterip_config *c;
 140         struct clusterip_net *cn = clusterip_pernet(net);
 141 
 142         list_for_each_entry_rcu(c, &cn->configs, list) {
 143                 if (c->clusterip == clusterip)
 144                         return c;
 145         }
 146 
 147         return NULL;
 148 }
 149 
 150 static inline struct clusterip_config *
 151 clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
 152 {
 153         struct clusterip_config *c;
 154 
 155         rcu_read_lock_bh();
 156         c = __clusterip_config_find(net, clusterip);
 157         if (c) {
 158 #ifdef CONFIG_PROC_FS
 159                 if (!c->pde)
 160                         c = NULL;
 161                 else
 162 #endif
 163                 if (unlikely(!refcount_inc_not_zero(&c->refcount)))
 164                         c = NULL;
 165                 else if (entry) {
 166                         if (unlikely(!refcount_inc_not_zero(&c->entries))) {
 167                                 clusterip_config_put(c);
 168                                 c = NULL;
 169                         }
 170                 }
 171         }
 172         rcu_read_unlock_bh();
 173 
 174         return c;
 175 }
 176 
 177 static void
 178 clusterip_config_init_nodelist(struct clusterip_config *c,
 179                                const struct ipt_clusterip_tgt_info *i)
 180 {
 181         int n;
 182 
 183         for (n = 0; n < i->num_local_nodes; n++)
 184                 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
 185 }
 186 
 187 static int
 188 clusterip_netdev_event(struct notifier_block *this, unsigned long event,
 189                        void *ptr)
 190 {
 191         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 192         struct net *net = dev_net(dev);
 193         struct clusterip_net *cn = clusterip_pernet(net);
 194         struct clusterip_config *c;
 195 
 196         spin_lock_bh(&cn->lock);
 197         list_for_each_entry_rcu(c, &cn->configs, list) {
 198                 switch (event) {
 199                 case NETDEV_REGISTER:
 200                         if (!strcmp(dev->name, c->ifname)) {
 201                                 c->ifindex = dev->ifindex;
 202                                 dev_mc_add(dev, c->clustermac);
 203                         }
 204                         break;
 205                 case NETDEV_UNREGISTER:
 206                         if (dev->ifindex == c->ifindex) {
 207                                 dev_mc_del(dev, c->clustermac);
 208                                 c->ifindex = -1;
 209                         }
 210                         break;
 211                 case NETDEV_CHANGENAME:
 212                         if (!strcmp(dev->name, c->ifname)) {
 213                                 c->ifindex = dev->ifindex;
 214                                 dev_mc_add(dev, c->clustermac);
 215                         } else if (dev->ifindex == c->ifindex) {
 216                                 dev_mc_del(dev, c->clustermac);
 217                                 c->ifindex = -1;
 218                         }
 219                         break;
 220                 }
 221         }
 222         spin_unlock_bh(&cn->lock);
 223 
 224         return NOTIFY_DONE;
 225 }
 226 
 227 static struct clusterip_config *
 228 clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 229                       __be32 ip, const char *iniface)
 230 {
 231         struct clusterip_net *cn = clusterip_pernet(net);
 232         struct clusterip_config *c;
 233         struct net_device *dev;
 234         int err;
 235 
 236         if (iniface[0] == '\0') {
 237                 pr_info("Please specify an interface name\n");
 238                 return ERR_PTR(-EINVAL);
 239         }
 240 
 241         c = kzalloc(sizeof(*c), GFP_ATOMIC);
 242         if (!c)
 243                 return ERR_PTR(-ENOMEM);
 244 
 245         dev = dev_get_by_name(net, iniface);
 246         if (!dev) {
 247                 pr_info("no such interface %s\n", iniface);
 248                 kfree(c);
 249                 return ERR_PTR(-ENOENT);
 250         }
 251         c->ifindex = dev->ifindex;
 252         strcpy(c->ifname, dev->name);
 253         memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
 254         dev_mc_add(dev, c->clustermac);
 255         dev_put(dev);
 256 
 257         c->clusterip = ip;
 258         c->num_total_nodes = i->num_total_nodes;
 259         clusterip_config_init_nodelist(c, i);
 260         c->hash_mode = i->hash_mode;
 261         c->hash_initval = i->hash_initval;
 262         c->net = net;
 263         refcount_set(&c->refcount, 1);
 264 
 265         spin_lock_bh(&cn->lock);
 266         if (__clusterip_config_find(net, ip)) {
 267                 err = -EBUSY;
 268                 goto out_config_put;
 269         }
 270 
 271         list_add_rcu(&c->list, &cn->configs);
 272         spin_unlock_bh(&cn->lock);
 273 
 274 #ifdef CONFIG_PROC_FS
 275         {
 276                 char buffer[16];
 277 
 278                 /* create proc dir entry */
 279                 sprintf(buffer, "%pI4", &ip);
 280                 mutex_lock(&cn->mutex);
 281                 c->pde = proc_create_data(buffer, 0600,
 282                                           cn->procdir,
 283                                           &clusterip_proc_fops, c);
 284                 mutex_unlock(&cn->mutex);
 285                 if (!c->pde) {
 286                         err = -ENOMEM;
 287                         goto err;
 288                 }
 289         }
 290 #endif
 291 
 292         refcount_set(&c->entries, 1);
 293         return c;
 294 
 295 #ifdef CONFIG_PROC_FS
 296 err:
 297 #endif
 298         spin_lock_bh(&cn->lock);
 299         list_del_rcu(&c->list);
 300 out_config_put:
 301         spin_unlock_bh(&cn->lock);
 302         clusterip_config_put(c);
 303         return ERR_PTR(err);
 304 }
 305 
 306 #ifdef CONFIG_PROC_FS
 307 static int
 308 clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
 309 {
 310 
 311         if (nodenum == 0 ||
 312             nodenum > c->num_total_nodes)
 313                 return 1;
 314 
 315         /* check if we already have this number in our bitfield */
 316         if (test_and_set_bit(nodenum - 1, &c->local_nodes))
 317                 return 1;
 318 
 319         return 0;
 320 }
 321 
 322 static bool
 323 clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 324 {
 325         if (nodenum == 0 ||
 326             nodenum > c->num_total_nodes)
 327                 return true;
 328 
 329         if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
 330                 return false;
 331 
 332         return true;
 333 }
 334 #endif
 335 
 336 static inline u_int32_t
 337 clusterip_hashfn(const struct sk_buff *skb,
 338                  const struct clusterip_config *config)
 339 {
 340         const struct iphdr *iph = ip_hdr(skb);
 341         unsigned long hashval;
 342         u_int16_t sport = 0, dport = 0;
 343         int poff;
 344 
 345         poff = proto_ports_offset(iph->protocol);
 346         if (poff >= 0) {
 347                 const u_int16_t *ports;
 348                 u16 _ports[2];
 349 
 350                 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
 351                 if (ports) {
 352                         sport = ports[0];
 353                         dport = ports[1];
 354                 }
 355         } else {
 356                 net_info_ratelimited("unknown protocol %u\n", iph->protocol);
 357         }
 358 
 359         switch (config->hash_mode) {
 360         case CLUSTERIP_HASHMODE_SIP:
 361                 hashval = jhash_1word(ntohl(iph->saddr),
 362                                       config->hash_initval);
 363                 break;
 364         case CLUSTERIP_HASHMODE_SIP_SPT:
 365                 hashval = jhash_2words(ntohl(iph->saddr), sport,
 366                                        config->hash_initval);
 367                 break;
 368         case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
 369                 hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
 370                                        config->hash_initval);
 371                 break;
 372         default:
 373                 /* to make gcc happy */
 374                 hashval = 0;
 375                 /* This cannot happen, unless the check function wasn't called
 376                  * at rule load time */
 377                 pr_info("unknown mode %u\n", config->hash_mode);
 378                 BUG();
 379                 break;
 380         }
 381 
 382         /* node numbers are 1..n, not 0..n */
 383         return reciprocal_scale(hashval, config->num_total_nodes) + 1;
 384 }
 385 
 386 static inline int
 387 clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
 388 {
 389         return test_bit(hash - 1, &config->local_nodes);
 390 }
 391 
 392 /***********************************************************************
 393  * IPTABLES TARGET
 394  ***********************************************************************/
 395 
 396 static unsigned int
 397 clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 398 {
 399         const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 400         struct nf_conn *ct;
 401         enum ip_conntrack_info ctinfo;
 402         u_int32_t hash;
 403 
 404         /* don't need to clusterip_config_get() here, since refcount
 405          * is only decremented by destroy() - and ip_tables guarantees
 406          * that the ->target() function isn't called after ->destroy() */
 407 
 408         ct = nf_ct_get(skb, &ctinfo);
 409         if (ct == NULL)
 410                 return NF_DROP;
 411 
 412         /* special case: ICMP error handling. conntrack distinguishes between
 413          * error messages (RELATED) and information requests (see below) */
 414         if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
 415             (ctinfo == IP_CT_RELATED ||
 416              ctinfo == IP_CT_RELATED_REPLY))
 417                 return XT_CONTINUE;
 418 
 419         /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
 420          * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
 421          * on, which all have an ID field [relevant for hashing]. */
 422 
 423         hash = clusterip_hashfn(skb, cipinfo->config);
 424 
 425         switch (ctinfo) {
 426         case IP_CT_NEW:
 427                 ct->mark = hash;
 428                 break;
 429         case IP_CT_RELATED:
 430         case IP_CT_RELATED_REPLY:
 431                 /* FIXME: we don't handle expectations at the moment.
 432                  * They can arrive on a different node than
 433                  * the master connection (e.g. FTP passive mode) */
 434         case IP_CT_ESTABLISHED:
 435         case IP_CT_ESTABLISHED_REPLY:
 436                 break;
 437         default:                        /* Prevent gcc warnings */
 438                 break;
 439         }
 440 
 441 #ifdef DEBUG
 442         nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 443 #endif
 444         pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
 445         if (!clusterip_responsible(cipinfo->config, hash)) {
 446                 pr_debug("not responsible\n");
 447                 return NF_DROP;
 448         }
 449         pr_debug("responsible\n");
 450 
 451         /* despite being received via linklayer multicast, this is
 452          * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
 453         skb->pkt_type = PACKET_HOST;
 454 
 455         return XT_CONTINUE;
 456 }
 457 
 458 static int clusterip_tg_check(const struct xt_tgchk_param *par)
 459 {
 460         struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 461         const struct ipt_entry *e = par->entryinfo;
 462         struct clusterip_config *config;
 463         int ret, i;
 464 
 465         if (par->nft_compat) {
 466                 pr_err("cannot use CLUSTERIP target from nftables compat\n");
 467                 return -EOPNOTSUPP;
 468         }
 469 
 470         if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
 471             cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
 472             cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
 473                 pr_info("unknown mode %u\n", cipinfo->hash_mode);
 474                 return -EINVAL;
 475 
 476         }
 477         if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
 478             e->ip.dst.s_addr == 0) {
 479                 pr_info("Please specify destination IP\n");
 480                 return -EINVAL;
 481         }
 482         if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
 483                 pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
 484                 return -EINVAL;
 485         }
 486         for (i = 0; i < cipinfo->num_local_nodes; i++) {
 487                 if (cipinfo->local_nodes[i] - 1 >=
 488                     sizeof(config->local_nodes) * 8) {
 489                         pr_info("bad local_nodes[%d] %u\n",
 490                                 i, cipinfo->local_nodes[i]);
 491                         return -EINVAL;
 492                 }
 493         }
 494 
 495         config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
 496         if (!config) {
 497                 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 498                         pr_info("no config found for %pI4, need 'new'\n",
 499                                 &e->ip.dst.s_addr);
 500                         return -EINVAL;
 501                 } else {
 502                         config = clusterip_config_init(par->net, cipinfo,
 503                                                        e->ip.dst.s_addr,
 504                                                        e->ip.iniface);
 505                         if (IS_ERR(config))
 506                                 return PTR_ERR(config);
 507                 }
 508         } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
 509                 return -EINVAL;
 510 
 511         ret = nf_ct_netns_get(par->net, par->family);
 512         if (ret < 0) {
 513                 pr_info("cannot load conntrack support for proto=%u\n",
 514                         par->family);
 515                 clusterip_config_entry_put(config);
 516                 clusterip_config_put(config);
 517                 return ret;
 518         }
 519 
 520         if (!par->net->xt.clusterip_deprecated_warning) {
 521                 pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
 522                         "use xt_cluster instead\n");
 523                 par->net->xt.clusterip_deprecated_warning = true;
 524         }
 525 
 526         cipinfo->config = config;
 527         return ret;
 528 }
 529 
 530 /* drop reference count of cluster config when rule is deleted */
 531 static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 532 {
 533         const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 534 
 535         /* if no more entries are referencing the config, remove it
 536          * from the list and destroy the proc entry */
 537         clusterip_config_entry_put(cipinfo->config);
 538 
 539         clusterip_config_put(cipinfo->config);
 540 
 541         nf_ct_netns_put(par->net, par->family);
 542 }
 543 
 544 #ifdef CONFIG_COMPAT
 545 struct compat_ipt_clusterip_tgt_info
 546 {
 547         u_int32_t       flags;
 548         u_int8_t        clustermac[6];
 549         u_int16_t       num_total_nodes;
 550         u_int16_t       num_local_nodes;
 551         u_int16_t       local_nodes[CLUSTERIP_MAX_NODES];
 552         u_int32_t       hash_mode;
 553         u_int32_t       hash_initval;
 554         compat_uptr_t   config;
 555 };
 556 #endif /* CONFIG_COMPAT */
 557 
 558 static struct xt_target clusterip_tg_reg __read_mostly = {
 559         .name           = "CLUSTERIP",
 560         .family         = NFPROTO_IPV4,
 561         .target         = clusterip_tg,
 562         .checkentry     = clusterip_tg_check,
 563         .destroy        = clusterip_tg_destroy,
 564         .targetsize     = sizeof(struct ipt_clusterip_tgt_info),
 565         .usersize       = offsetof(struct ipt_clusterip_tgt_info, config),
 566 #ifdef CONFIG_COMPAT
 567         .compatsize     = sizeof(struct compat_ipt_clusterip_tgt_info),
 568 #endif /* CONFIG_COMPAT */
 569         .me             = THIS_MODULE
 570 };
 571 
 572 
 573 /***********************************************************************
 574  * ARP MANGLING CODE
 575  ***********************************************************************/
 576 
 577 /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
 578 struct arp_payload {
 579         u_int8_t src_hw[ETH_ALEN];
 580         __be32 src_ip;
 581         u_int8_t dst_hw[ETH_ALEN];
 582         __be32 dst_ip;
 583 } __packed;
 584 
 585 #ifdef DEBUG
 586 static void arp_print(struct arp_payload *payload)
 587 {
 588 #define HBUFFERLEN 30
 589         char hbuffer[HBUFFERLEN];
 590         int j, k;
 591 
 592         for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
 593                 hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
 594                 hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
 595                 hbuffer[k++] = ':';
 596         }
 597         hbuffer[--k] = '\0';
 598 
 599         pr_debug("src %pI4@%s, dst %pI4\n",
 600                  &payload->src_ip, hbuffer, &payload->dst_ip);
 601 }
 602 #endif
 603 
 604 static unsigned int
 605 arp_mangle(void *priv,
 606            struct sk_buff *skb,
 607            const struct nf_hook_state *state)
 608 {
 609         struct arphdr *arp = arp_hdr(skb);
 610         struct arp_payload *payload;
 611         struct clusterip_config *c;
 612         struct net *net = state->net;
 613 
 614         /* we don't care about non-ethernet and non-ipv4 ARP */
 615         if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
 616             arp->ar_pro != htons(ETH_P_IP) ||
 617             arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
 618                 return NF_ACCEPT;
 619 
 620         /* we only want to mangle arp requests and replies */
 621         if (arp->ar_op != htons(ARPOP_REPLY) &&
 622             arp->ar_op != htons(ARPOP_REQUEST))
 623                 return NF_ACCEPT;
 624 
 625         payload = (void *)(arp+1);
 626 
 627         /* if there is no clusterip configuration for the arp reply's
 628          * source ip, we don't want to mangle it */
 629         c = clusterip_config_find_get(net, payload->src_ip, 0);
 630         if (!c)
 631                 return NF_ACCEPT;
 632 
 633         /* normally the linux kernel always replies to arp queries of
 634          * addresses on different interfacs.  However, in the CLUSTERIP case
 635          * this wouldn't work, since we didn't subscribe the mcast group on
 636          * other interfaces */
 637         if (c->ifindex != state->out->ifindex) {
 638                 pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
 639                          c->ifindex, state->out->ifindex);
 640                 clusterip_config_put(c);
 641                 return NF_ACCEPT;
 642         }
 643 
 644         /* mangle reply hardware address */
 645         memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
 646 
 647 #ifdef DEBUG
 648         pr_debug("mangled arp reply: ");
 649         arp_print(payload);
 650 #endif
 651 
 652         clusterip_config_put(c);
 653 
 654         return NF_ACCEPT;
 655 }
 656 
 657 static const struct nf_hook_ops cip_arp_ops = {
 658         .hook = arp_mangle,
 659         .pf = NFPROTO_ARP,
 660         .hooknum = NF_ARP_OUT,
 661         .priority = -1
 662 };
 663 
 664 /***********************************************************************
 665  * PROC DIR HANDLING
 666  ***********************************************************************/
 667 
 668 #ifdef CONFIG_PROC_FS
 669 
 670 struct clusterip_seq_position {
 671         unsigned int pos;       /* position */
 672         unsigned int weight;    /* number of bits set == size */
 673         unsigned int bit;       /* current bit */
 674         unsigned long val;      /* current value */
 675 };
 676 
 677 static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
 678 {
 679         struct clusterip_config *c = s->private;
 680         unsigned int weight;
 681         u_int32_t local_nodes;
 682         struct clusterip_seq_position *idx;
 683 
 684         /* FIXME: possible race */
 685         local_nodes = c->local_nodes;
 686         weight = hweight32(local_nodes);
 687         if (*pos >= weight)
 688                 return NULL;
 689 
 690         idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
 691         if (!idx)
 692                 return ERR_PTR(-ENOMEM);
 693 
 694         idx->pos = *pos;
 695         idx->weight = weight;
 696         idx->bit = ffs(local_nodes);
 697         idx->val = local_nodes;
 698         clear_bit(idx->bit - 1, &idx->val);
 699 
 700         return idx;
 701 }
 702 
 703 static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
 704 {
 705         struct clusterip_seq_position *idx = v;
 706 
 707         *pos = ++idx->pos;
 708         if (*pos >= idx->weight) {
 709                 kfree(v);
 710                 return NULL;
 711         }
 712         idx->bit = ffs(idx->val);
 713         clear_bit(idx->bit - 1, &idx->val);
 714         return idx;
 715 }
 716 
 717 static void clusterip_seq_stop(struct seq_file *s, void *v)
 718 {
 719         if (!IS_ERR(v))
 720                 kfree(v);
 721 }
 722 
 723 static int clusterip_seq_show(struct seq_file *s, void *v)
 724 {
 725         struct clusterip_seq_position *idx = v;
 726 
 727         if (idx->pos != 0)
 728                 seq_putc(s, ',');
 729 
 730         seq_printf(s, "%u", idx->bit);
 731 
 732         if (idx->pos == idx->weight - 1)
 733                 seq_putc(s, '\n');
 734 
 735         return 0;
 736 }
 737 
 738 static const struct seq_operations clusterip_seq_ops = {
 739         .start  = clusterip_seq_start,
 740         .next   = clusterip_seq_next,
 741         .stop   = clusterip_seq_stop,
 742         .show   = clusterip_seq_show,
 743 };
 744 
 745 static int clusterip_proc_open(struct inode *inode, struct file *file)
 746 {
 747         int ret = seq_open(file, &clusterip_seq_ops);
 748 
 749         if (!ret) {
 750                 struct seq_file *sf = file->private_data;
 751                 struct clusterip_config *c = PDE_DATA(inode);
 752 
 753                 sf->private = c;
 754 
 755                 clusterip_config_get(c);
 756         }
 757 
 758         return ret;
 759 }
 760 
 761 static int clusterip_proc_release(struct inode *inode, struct file *file)
 762 {
 763         struct clusterip_config *c = PDE_DATA(inode);
 764         int ret;
 765 
 766         ret = seq_release(inode, file);
 767 
 768         if (!ret)
 769                 clusterip_config_put(c);
 770 
 771         return ret;
 772 }
 773 
 774 static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 775                                 size_t size, loff_t *ofs)
 776 {
 777         struct clusterip_config *c = PDE_DATA(file_inode(file));
 778 #define PROC_WRITELEN   10
 779         char buffer[PROC_WRITELEN+1];
 780         unsigned long nodenum;
 781         int rc;
 782 
 783         if (size > PROC_WRITELEN)
 784                 return -EIO;
 785         if (copy_from_user(buffer, input, size))
 786                 return -EFAULT;
 787         buffer[size] = 0;
 788 
 789         if (*buffer == '+') {
 790                 rc = kstrtoul(buffer+1, 10, &nodenum);
 791                 if (rc)
 792                         return rc;
 793                 if (clusterip_add_node(c, nodenum))
 794                         return -ENOMEM;
 795         } else if (*buffer == '-') {
 796                 rc = kstrtoul(buffer+1, 10, &nodenum);
 797                 if (rc)
 798                         return rc;
 799                 if (clusterip_del_node(c, nodenum))
 800                         return -ENOENT;
 801         } else
 802                 return -EIO;
 803 
 804         return size;
 805 }
 806 
 807 static const struct file_operations clusterip_proc_fops = {
 808         .open    = clusterip_proc_open,
 809         .read    = seq_read,
 810         .write   = clusterip_proc_write,
 811         .llseek  = seq_lseek,
 812         .release = clusterip_proc_release,
 813 };
 814 
 815 #endif /* CONFIG_PROC_FS */
 816 
 817 static int clusterip_net_init(struct net *net)
 818 {
 819         struct clusterip_net *cn = clusterip_pernet(net);
 820         int ret;
 821 
 822         INIT_LIST_HEAD(&cn->configs);
 823 
 824         spin_lock_init(&cn->lock);
 825 
 826         ret = nf_register_net_hook(net, &cip_arp_ops);
 827         if (ret < 0)
 828                 return ret;
 829 
 830 #ifdef CONFIG_PROC_FS
 831         cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
 832         if (!cn->procdir) {
 833                 nf_unregister_net_hook(net, &cip_arp_ops);
 834                 pr_err("Unable to proc dir entry\n");
 835                 return -ENOMEM;
 836         }
 837         mutex_init(&cn->mutex);
 838 #endif /* CONFIG_PROC_FS */
 839 
 840         return 0;
 841 }
 842 
 843 static void clusterip_net_exit(struct net *net)
 844 {
 845 #ifdef CONFIG_PROC_FS
 846         struct clusterip_net *cn = clusterip_pernet(net);
 847 
 848         mutex_lock(&cn->mutex);
 849         proc_remove(cn->procdir);
 850         cn->procdir = NULL;
 851         mutex_unlock(&cn->mutex);
 852 #endif
 853         nf_unregister_net_hook(net, &cip_arp_ops);
 854 }
 855 
 856 static struct pernet_operations clusterip_net_ops = {
 857         .init = clusterip_net_init,
 858         .exit = clusterip_net_exit,
 859         .id   = &clusterip_net_id,
 860         .size = sizeof(struct clusterip_net),
 861 };
 862 
 863 static struct notifier_block cip_netdev_notifier = {
 864         .notifier_call = clusterip_netdev_event
 865 };
 866 
 867 static int __init clusterip_tg_init(void)
 868 {
 869         int ret;
 870 
 871         ret = register_pernet_subsys(&clusterip_net_ops);
 872         if (ret < 0)
 873                 return ret;
 874 
 875         ret = xt_register_target(&clusterip_tg_reg);
 876         if (ret < 0)
 877                 goto cleanup_subsys;
 878 
 879         ret = register_netdevice_notifier(&cip_netdev_notifier);
 880         if (ret < 0)
 881                 goto unregister_target;
 882 
 883         pr_info("ClusterIP Version %s loaded successfully\n",
 884                 CLUSTERIP_VERSION);
 885 
 886         return 0;
 887 
 888 unregister_target:
 889         xt_unregister_target(&clusterip_tg_reg);
 890 cleanup_subsys:
 891         unregister_pernet_subsys(&clusterip_net_ops);
 892         return ret;
 893 }
 894 
 895 static void __exit clusterip_tg_exit(void)
 896 {
 897         pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 898 
 899         unregister_netdevice_notifier(&cip_netdev_notifier);
 900         xt_unregister_target(&clusterip_tg_reg);
 901         unregister_pernet_subsys(&clusterip_net_ops);
 902 
 903         /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
 904         rcu_barrier();
 905 }
 906 
 907 module_init(clusterip_tg_init);
 908 module_exit(clusterip_tg_exit);

/* [<][>][^][v][top][bottom][index][help] */