root/net/netfilter/ipset/ip_set_core.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_set_pernet
  2. ip_set_type_lock
  3. ip_set_type_unlock
  4. find_set_type
  5. load_settype
  6. __find_set_type_get
  7. __find_set_type_minmax
  8. ip_set_type_register
  9. ip_set_type_unregister
  10. ip_set_alloc
  11. ip_set_free
  12. flag_nested
  13. ip_set_get_ipaddr4
  14. ip_set_get_ipaddr6
  15. add_extension
  16. ip_set_elem_len
  17. ip_set_get_extensions
  18. ip_set_put_extensions
  19. ip_set_match_extensions
  20. __ip_set_get
  21. __ip_set_put
  22. __ip_set_put_netlink
  23. ip_set_rcu_get
  24. ip_set_lock
  25. ip_set_unlock
  26. ip_set_test
  27. ip_set_add
  28. ip_set_del
  29. ip_set_get_byname
  30. __ip_set_put_byindex
  31. ip_set_put_byindex
  32. ip_set_name_byindex
  33. ip_set_nfnl_get_byindex
  34. ip_set_nfnl_put
  35. protocol
  36. protocol_failed
  37. protocol_min_failed
  38. flag_exist
  39. start_msg
  40. find_set_and_id
  41. find_set
  42. find_free_id
  43. ip_set_none
  44. ip_set_create
  45. ip_set_destroy_set
  46. ip_set_destroy
  47. ip_set_flush_set
  48. ip_set_flush
  49. ip_set_rename
  50. ip_set_swap
  51. ip_set_dump_done
  52. dump_attrs
  53. ip_set_dump_start
  54. ip_set_dump_do
  55. ip_set_dump
  56. call_ad
  57. ip_set_ad
  58. ip_set_uadd
  59. ip_set_udel
  60. ip_set_utest
  61. ip_set_header
  62. ip_set_type
  63. ip_set_protocol
  64. ip_set_byname
  65. ip_set_byindex
  66. ip_set_sockfn_get
  67. ip_set_net_init
  68. ip_set_net_exit
  69. ip_set_init
  70. ip_set_fini

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
   3  *                         Patrick Schaaf <bof@bof.de>
   4  * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
   5  */
   6 
   7 /* Kernel module for IP set management */
   8 
   9 #include <linux/init.h>
  10 #include <linux/module.h>
  11 #include <linux/moduleparam.h>
  12 #include <linux/ip.h>
  13 #include <linux/skbuff.h>
  14 #include <linux/spinlock.h>
  15 #include <linux/rculist.h>
  16 #include <net/netlink.h>
  17 #include <net/net_namespace.h>
  18 #include <net/netns/generic.h>
  19 
  20 #include <linux/netfilter.h>
  21 #include <linux/netfilter/x_tables.h>
  22 #include <linux/netfilter/nfnetlink.h>
  23 #include <linux/netfilter/ipset/ip_set.h>
  24 
  25 static LIST_HEAD(ip_set_type_list);             /* all registered set types */
  26 static DEFINE_MUTEX(ip_set_type_mutex);         /* protects ip_set_type_list */
  27 static DEFINE_RWLOCK(ip_set_ref_lock);          /* protects the set refs */
  28 
  29 struct ip_set_net {
  30         struct ip_set * __rcu *ip_set_list;     /* all individual sets */
  31         ip_set_id_t     ip_set_max;     /* max number of sets */
  32         bool            is_deleted;     /* deleted by ip_set_net_exit */
  33         bool            is_destroyed;   /* all sets are destroyed */
  34 };
  35 
  36 static unsigned int ip_set_net_id __read_mostly;
  37 
  38 static inline struct ip_set_net *ip_set_pernet(struct net *net)
  39 {
  40         return net_generic(net, ip_set_net_id);
  41 }
  42 
  43 #define IP_SET_INC      64
  44 #define STRNCMP(a, b)   (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
  45 
  46 static unsigned int max_sets;
  47 
  48 module_param(max_sets, int, 0600);
  49 MODULE_PARM_DESC(max_sets, "maximal number of sets");
  50 MODULE_LICENSE("GPL");
  51 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
  52 MODULE_DESCRIPTION("core IP set support");
  53 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
  54 
  55 /* When the nfnl mutex or ip_set_ref_lock is held: */
  56 #define ip_set_dereference(p)           \
  57         rcu_dereference_protected(p,    \
  58                 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
  59                 lockdep_is_held(&ip_set_ref_lock))
  60 #define ip_set(inst, id)                \
  61         ip_set_dereference((inst)->ip_set_list)[id]
  62 #define ip_set_ref_netlink(inst,id)     \
  63         rcu_dereference_raw((inst)->ip_set_list)[id]
  64 
  65 /* The set types are implemented in modules and registered set types
  66  * can be found in ip_set_type_list. Adding/deleting types is
  67  * serialized by ip_set_type_mutex.
  68  */
  69 
  70 static inline void
  71 ip_set_type_lock(void)
  72 {
  73         mutex_lock(&ip_set_type_mutex);
  74 }
  75 
  76 static inline void
  77 ip_set_type_unlock(void)
  78 {
  79         mutex_unlock(&ip_set_type_mutex);
  80 }
  81 
  82 /* Register and deregister settype */
  83 
  84 static struct ip_set_type *
  85 find_set_type(const char *name, u8 family, u8 revision)
  86 {
  87         struct ip_set_type *type;
  88 
  89         list_for_each_entry_rcu(type, &ip_set_type_list, list)
  90                 if (STRNCMP(type->name, name) &&
  91                     (type->family == family ||
  92                      type->family == NFPROTO_UNSPEC) &&
  93                     revision >= type->revision_min &&
  94                     revision <= type->revision_max)
  95                         return type;
  96         return NULL;
  97 }
  98 
  99 /* Unlock, try to load a set type module and lock again */
 100 static bool
 101 load_settype(const char *name)
 102 {
 103         nfnl_unlock(NFNL_SUBSYS_IPSET);
 104         pr_debug("try to load ip_set_%s\n", name);
 105         if (request_module("ip_set_%s", name) < 0) {
 106                 pr_warn("Can't find ip_set type %s\n", name);
 107                 nfnl_lock(NFNL_SUBSYS_IPSET);
 108                 return false;
 109         }
 110         nfnl_lock(NFNL_SUBSYS_IPSET);
 111         return true;
 112 }
 113 
 114 /* Find a set type and reference it */
 115 #define find_set_type_get(name, family, revision, found)        \
 116         __find_set_type_get(name, family, revision, found, false)
 117 
 118 static int
 119 __find_set_type_get(const char *name, u8 family, u8 revision,
 120                     struct ip_set_type **found, bool retry)
 121 {
 122         struct ip_set_type *type;
 123         int err;
 124 
 125         if (retry && !load_settype(name))
 126                 return -IPSET_ERR_FIND_TYPE;
 127 
 128         rcu_read_lock();
 129         *found = find_set_type(name, family, revision);
 130         if (*found) {
 131                 err = !try_module_get((*found)->me) ? -EFAULT : 0;
 132                 goto unlock;
 133         }
 134         /* Make sure the type is already loaded
 135          * but we don't support the revision
 136          */
 137         list_for_each_entry_rcu(type, &ip_set_type_list, list)
 138                 if (STRNCMP(type->name, name)) {
 139                         err = -IPSET_ERR_FIND_TYPE;
 140                         goto unlock;
 141                 }
 142         rcu_read_unlock();
 143 
 144         return retry ? -IPSET_ERR_FIND_TYPE :
 145                 __find_set_type_get(name, family, revision, found, true);
 146 
 147 unlock:
 148         rcu_read_unlock();
 149         return err;
 150 }
 151 
 152 /* Find a given set type by name and family.
 153  * If we succeeded, the supported minimal and maximum revisions are
 154  * filled out.
 155  */
 156 #define find_set_type_minmax(name, family, min, max) \
 157         __find_set_type_minmax(name, family, min, max, false)
 158 
 159 static int
 160 __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
 161                        bool retry)
 162 {
 163         struct ip_set_type *type;
 164         bool found = false;
 165 
 166         if (retry && !load_settype(name))
 167                 return -IPSET_ERR_FIND_TYPE;
 168 
 169         *min = 255; *max = 0;
 170         rcu_read_lock();
 171         list_for_each_entry_rcu(type, &ip_set_type_list, list)
 172                 if (STRNCMP(type->name, name) &&
 173                     (type->family == family ||
 174                      type->family == NFPROTO_UNSPEC)) {
 175                         found = true;
 176                         if (type->revision_min < *min)
 177                                 *min = type->revision_min;
 178                         if (type->revision_max > *max)
 179                                 *max = type->revision_max;
 180                 }
 181         rcu_read_unlock();
 182         if (found)
 183                 return 0;
 184 
 185         return retry ? -IPSET_ERR_FIND_TYPE :
 186                 __find_set_type_minmax(name, family, min, max, true);
 187 }
 188 
 189 #define family_name(f)  ((f) == NFPROTO_IPV4 ? "inet" : \
 190                          (f) == NFPROTO_IPV6 ? "inet6" : "any")
 191 
 192 /* Register a set type structure. The type is identified by
 193  * the unique triple of name, family and revision.
 194  */
 195 int
 196 ip_set_type_register(struct ip_set_type *type)
 197 {
 198         int ret = 0;
 199 
 200         if (type->protocol != IPSET_PROTOCOL) {
 201                 pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
 202                         type->name, family_name(type->family),
 203                         type->revision_min, type->revision_max,
 204                         type->protocol, IPSET_PROTOCOL);
 205                 return -EINVAL;
 206         }
 207 
 208         ip_set_type_lock();
 209         if (find_set_type(type->name, type->family, type->revision_min)) {
 210                 /* Duplicate! */
 211                 pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
 212                         type->name, family_name(type->family),
 213                         type->revision_min);
 214                 ip_set_type_unlock();
 215                 return -EINVAL;
 216         }
 217         list_add_rcu(&type->list, &ip_set_type_list);
 218         pr_debug("type %s, family %s, revision %u:%u registered.\n",
 219                  type->name, family_name(type->family),
 220                  type->revision_min, type->revision_max);
 221         ip_set_type_unlock();
 222 
 223         return ret;
 224 }
 225 EXPORT_SYMBOL_GPL(ip_set_type_register);
 226 
 227 /* Unregister a set type. There's a small race with ip_set_create */
 228 void
 229 ip_set_type_unregister(struct ip_set_type *type)
 230 {
 231         ip_set_type_lock();
 232         if (!find_set_type(type->name, type->family, type->revision_min)) {
 233                 pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
 234                         type->name, family_name(type->family),
 235                         type->revision_min);
 236                 ip_set_type_unlock();
 237                 return;
 238         }
 239         list_del_rcu(&type->list);
 240         pr_debug("type %s, family %s with revision min %u unregistered.\n",
 241                  type->name, family_name(type->family), type->revision_min);
 242         ip_set_type_unlock();
 243 
 244         synchronize_rcu();
 245 }
 246 EXPORT_SYMBOL_GPL(ip_set_type_unregister);
 247 
 248 /* Utility functions */
 249 void *
 250 ip_set_alloc(size_t size)
 251 {
 252         void *members = NULL;
 253 
 254         if (size < KMALLOC_MAX_SIZE)
 255                 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 256 
 257         if (members) {
 258                 pr_debug("%p: allocated with kmalloc\n", members);
 259                 return members;
 260         }
 261 
 262         members = vzalloc(size);
 263         if (!members)
 264                 return NULL;
 265         pr_debug("%p: allocated with vmalloc\n", members);
 266 
 267         return members;
 268 }
 269 EXPORT_SYMBOL_GPL(ip_set_alloc);
 270 
 271 void
 272 ip_set_free(void *members)
 273 {
 274         pr_debug("%p: free with %s\n", members,
 275                  is_vmalloc_addr(members) ? "vfree" : "kfree");
 276         kvfree(members);
 277 }
 278 EXPORT_SYMBOL_GPL(ip_set_free);
 279 
 280 static inline bool
 281 flag_nested(const struct nlattr *nla)
 282 {
 283         return nla->nla_type & NLA_F_NESTED;
 284 }
 285 
 286 static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
 287         [IPSET_ATTR_IPADDR_IPV4]        = { .type = NLA_U32 },
 288         [IPSET_ATTR_IPADDR_IPV6]        = { .type = NLA_BINARY,
 289                                             .len = sizeof(struct in6_addr) },
 290 };
 291 
 292 int
 293 ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
 294 {
 295         struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
 296 
 297         if (unlikely(!flag_nested(nla)))
 298                 return -IPSET_ERR_PROTOCOL;
 299         if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
 300                              ipaddr_policy, NULL))
 301                 return -IPSET_ERR_PROTOCOL;
 302         if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
 303                 return -IPSET_ERR_PROTOCOL;
 304 
 305         *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
 306         return 0;
 307 }
 308 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
 309 
 310 int
 311 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 312 {
 313         struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
 314 
 315         if (unlikely(!flag_nested(nla)))
 316                 return -IPSET_ERR_PROTOCOL;
 317 
 318         if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
 319                              ipaddr_policy, NULL))
 320                 return -IPSET_ERR_PROTOCOL;
 321         if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
 322                 return -IPSET_ERR_PROTOCOL;
 323 
 324         memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
 325                sizeof(struct in6_addr));
 326         return 0;
 327 }
 328 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 329 
 330 typedef void (*destroyer)(struct ip_set *, void *);
 331 /* ipset data extension types, in size order */
 332 
 333 const struct ip_set_ext_type ip_set_extensions[] = {
 334         [IPSET_EXT_ID_COUNTER] = {
 335                 .type   = IPSET_EXT_COUNTER,
 336                 .flag   = IPSET_FLAG_WITH_COUNTERS,
 337                 .len    = sizeof(struct ip_set_counter),
 338                 .align  = __alignof__(struct ip_set_counter),
 339         },
 340         [IPSET_EXT_ID_TIMEOUT] = {
 341                 .type   = IPSET_EXT_TIMEOUT,
 342                 .len    = sizeof(unsigned long),
 343                 .align  = __alignof__(unsigned long),
 344         },
 345         [IPSET_EXT_ID_SKBINFO] = {
 346                 .type   = IPSET_EXT_SKBINFO,
 347                 .flag   = IPSET_FLAG_WITH_SKBINFO,
 348                 .len    = sizeof(struct ip_set_skbinfo),
 349                 .align  = __alignof__(struct ip_set_skbinfo),
 350         },
 351         [IPSET_EXT_ID_COMMENT] = {
 352                 .type    = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
 353                 .flag    = IPSET_FLAG_WITH_COMMENT,
 354                 .len     = sizeof(struct ip_set_comment),
 355                 .align   = __alignof__(struct ip_set_comment),
 356                 .destroy = (destroyer) ip_set_comment_free,
 357         },
 358 };
 359 EXPORT_SYMBOL_GPL(ip_set_extensions);
 360 
 361 static inline bool
 362 add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
 363 {
 364         return ip_set_extensions[id].flag ?
 365                 (flags & ip_set_extensions[id].flag) :
 366                 !!tb[IPSET_ATTR_TIMEOUT];
 367 }
 368 
 369 size_t
 370 ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
 371                 size_t align)
 372 {
 373         enum ip_set_ext_id id;
 374         u32 cadt_flags = 0;
 375 
 376         if (tb[IPSET_ATTR_CADT_FLAGS])
 377                 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 378         if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
 379                 set->flags |= IPSET_CREATE_FLAG_FORCEADD;
 380         if (!align)
 381                 align = 1;
 382         for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
 383                 if (!add_extension(id, cadt_flags, tb))
 384                         continue;
 385                 len = ALIGN(len, ip_set_extensions[id].align);
 386                 set->offset[id] = len;
 387                 set->extensions |= ip_set_extensions[id].type;
 388                 len += ip_set_extensions[id].len;
 389         }
 390         return ALIGN(len, align);
 391 }
 392 EXPORT_SYMBOL_GPL(ip_set_elem_len);
 393 
 394 int
 395 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 396                       struct ip_set_ext *ext)
 397 {
 398         u64 fullmark;
 399 
 400         if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
 401                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
 402                      !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
 403                      !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
 404                      !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
 405                      !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
 406                 return -IPSET_ERR_PROTOCOL;
 407 
 408         if (tb[IPSET_ATTR_TIMEOUT]) {
 409                 if (!SET_WITH_TIMEOUT(set))
 410                         return -IPSET_ERR_TIMEOUT;
 411                 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 412         }
 413         if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
 414                 if (!SET_WITH_COUNTER(set))
 415                         return -IPSET_ERR_COUNTER;
 416                 if (tb[IPSET_ATTR_BYTES])
 417                         ext->bytes = be64_to_cpu(nla_get_be64(
 418                                                  tb[IPSET_ATTR_BYTES]));
 419                 if (tb[IPSET_ATTR_PACKETS])
 420                         ext->packets = be64_to_cpu(nla_get_be64(
 421                                                    tb[IPSET_ATTR_PACKETS]));
 422         }
 423         if (tb[IPSET_ATTR_COMMENT]) {
 424                 if (!SET_WITH_COMMENT(set))
 425                         return -IPSET_ERR_COMMENT;
 426                 ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
 427         }
 428         if (tb[IPSET_ATTR_SKBMARK]) {
 429                 if (!SET_WITH_SKBINFO(set))
 430                         return -IPSET_ERR_SKBINFO;
 431                 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
 432                 ext->skbinfo.skbmark = fullmark >> 32;
 433                 ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
 434         }
 435         if (tb[IPSET_ATTR_SKBPRIO]) {
 436                 if (!SET_WITH_SKBINFO(set))
 437                         return -IPSET_ERR_SKBINFO;
 438                 ext->skbinfo.skbprio =
 439                         be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
 440         }
 441         if (tb[IPSET_ATTR_SKBQUEUE]) {
 442                 if (!SET_WITH_SKBINFO(set))
 443                         return -IPSET_ERR_SKBINFO;
 444                 ext->skbinfo.skbqueue =
 445                         be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
 446         }
 447         return 0;
 448 }
 449 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
 450 
 451 int
 452 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 453                       const void *e, bool active)
 454 {
 455         if (SET_WITH_TIMEOUT(set)) {
 456                 unsigned long *timeout = ext_timeout(e, set);
 457 
 458                 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 459                         htonl(active ? ip_set_timeout_get(timeout)
 460                                 : *timeout)))
 461                         return -EMSGSIZE;
 462         }
 463         if (SET_WITH_COUNTER(set) &&
 464             ip_set_put_counter(skb, ext_counter(e, set)))
 465                 return -EMSGSIZE;
 466         if (SET_WITH_COMMENT(set) &&
 467             ip_set_put_comment(skb, ext_comment(e, set)))
 468                 return -EMSGSIZE;
 469         if (SET_WITH_SKBINFO(set) &&
 470             ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
 471                 return -EMSGSIZE;
 472         return 0;
 473 }
 474 EXPORT_SYMBOL_GPL(ip_set_put_extensions);
 475 
 476 bool
 477 ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
 478                         struct ip_set_ext *mext, u32 flags, void *data)
 479 {
 480         if (SET_WITH_TIMEOUT(set) &&
 481             ip_set_timeout_expired(ext_timeout(data, set)))
 482                 return false;
 483         if (SET_WITH_COUNTER(set)) {
 484                 struct ip_set_counter *counter = ext_counter(data, set);
 485 
 486                 if (flags & IPSET_FLAG_MATCH_COUNTERS &&
 487                     !(ip_set_match_counter(ip_set_get_packets(counter),
 488                                 mext->packets, mext->packets_op) &&
 489                       ip_set_match_counter(ip_set_get_bytes(counter),
 490                                 mext->bytes, mext->bytes_op)))
 491                         return false;
 492                 ip_set_update_counter(counter, ext, flags);
 493         }
 494         if (SET_WITH_SKBINFO(set))
 495                 ip_set_get_skbinfo(ext_skbinfo(data, set),
 496                                    ext, mext, flags);
 497         return true;
 498 }
 499 EXPORT_SYMBOL_GPL(ip_set_match_extensions);
 500 
 501 /* Creating/destroying/renaming/swapping affect the existence and
 502  * the properties of a set. All of these can be executed from userspace
 503  * only and serialized by the nfnl mutex indirectly from nfnetlink.
 504  *
 505  * Sets are identified by their index in ip_set_list and the index
 506  * is used by the external references (set/SET netfilter modules).
 507  *
 508  * The set behind an index may change by swapping only, from userspace.
 509  */
 510 
 511 static inline void
 512 __ip_set_get(struct ip_set *set)
 513 {
 514         write_lock_bh(&ip_set_ref_lock);
 515         set->ref++;
 516         write_unlock_bh(&ip_set_ref_lock);
 517 }
 518 
 519 static inline void
 520 __ip_set_put(struct ip_set *set)
 521 {
 522         write_lock_bh(&ip_set_ref_lock);
 523         BUG_ON(set->ref == 0);
 524         set->ref--;
 525         write_unlock_bh(&ip_set_ref_lock);
 526 }
 527 
 528 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
 529  * a separate reference counter
 530  */
 531 static inline void
 532 __ip_set_put_netlink(struct ip_set *set)
 533 {
 534         write_lock_bh(&ip_set_ref_lock);
 535         BUG_ON(set->ref_netlink == 0);
 536         set->ref_netlink--;
 537         write_unlock_bh(&ip_set_ref_lock);
 538 }
 539 
 540 /* Add, del and test set entries from kernel.
 541  *
 542  * The set behind the index must exist and must be referenced
 543  * so it can't be destroyed (or changed) under our foot.
 544  */
 545 
 546 static inline struct ip_set *
 547 ip_set_rcu_get(struct net *net, ip_set_id_t index)
 548 {
 549         struct ip_set *set;
 550         struct ip_set_net *inst = ip_set_pernet(net);
 551 
 552         rcu_read_lock();
 553         /* ip_set_list itself needs to be protected */
 554         set = rcu_dereference(inst->ip_set_list)[index];
 555         rcu_read_unlock();
 556 
 557         return set;
 558 }
 559 
 560 static inline void
 561 ip_set_lock(struct ip_set *set)
 562 {
 563         if (!set->variant->region_lock)
 564                 spin_lock_bh(&set->lock);
 565 }
 566 
 567 static inline void
 568 ip_set_unlock(struct ip_set *set)
 569 {
 570         if (!set->variant->region_lock)
 571                 spin_unlock_bh(&set->lock);
 572 }
 573 
 574 int
 575 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 576             const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 577 {
 578         struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
 579         int ret = 0;
 580 
 581         BUG_ON(!set);
 582         pr_debug("set %s, index %u\n", set->name, index);
 583 
 584         if (opt->dim < set->type->dimension ||
 585             !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
 586                 return 0;
 587 
 588         rcu_read_lock_bh();
 589         ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
 590         rcu_read_unlock_bh();
 591 
 592         if (ret == -EAGAIN) {
 593                 /* Type requests element to be completed */
 594                 pr_debug("element must be completed, ADD is triggered\n");
 595                 ip_set_lock(set);
 596                 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
 597                 ip_set_unlock(set);
 598                 ret = 1;
 599         } else {
 600                 /* --return-nomatch: invert matched element */
 601                 if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
 602                     (set->type->features & IPSET_TYPE_NOMATCH) &&
 603                     (ret > 0 || ret == -ENOTEMPTY))
 604                         ret = -ret;
 605         }
 606 
 607         /* Convert error codes to nomatch */
 608         return (ret < 0 ? 0 : ret);
 609 }
 610 EXPORT_SYMBOL_GPL(ip_set_test);
 611 
 612 int
 613 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 614            const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 615 {
 616         struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
 617         int ret;
 618 
 619         BUG_ON(!set);
 620         pr_debug("set %s, index %u\n", set->name, index);
 621 
 622         if (opt->dim < set->type->dimension ||
 623             !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
 624                 return -IPSET_ERR_TYPE_MISMATCH;
 625 
 626         ip_set_lock(set);
 627         ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
 628         ip_set_unlock(set);
 629 
 630         return ret;
 631 }
 632 EXPORT_SYMBOL_GPL(ip_set_add);
 633 
 634 int
 635 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 636            const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 637 {
 638         struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
 639         int ret = 0;
 640 
 641         BUG_ON(!set);
 642         pr_debug("set %s, index %u\n", set->name, index);
 643 
 644         if (opt->dim < set->type->dimension ||
 645             !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
 646                 return -IPSET_ERR_TYPE_MISMATCH;
 647 
 648         ip_set_lock(set);
 649         ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
 650         ip_set_unlock(set);
 651 
 652         return ret;
 653 }
 654 EXPORT_SYMBOL_GPL(ip_set_del);
 655 
 656 /* Find set by name, reference it once. The reference makes sure the
 657  * thing pointed to, does not go away under our feet.
 658  *
 659  */
 660 ip_set_id_t
 661 ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 662 {
 663         ip_set_id_t i, index = IPSET_INVALID_ID;
 664         struct ip_set *s;
 665         struct ip_set_net *inst = ip_set_pernet(net);
 666 
 667         rcu_read_lock();
 668         for (i = 0; i < inst->ip_set_max; i++) {
 669                 s = rcu_dereference(inst->ip_set_list)[i];
 670                 if (s && STRNCMP(s->name, name)) {
 671                         __ip_set_get(s);
 672                         index = i;
 673                         *set = s;
 674                         break;
 675                 }
 676         }
 677         rcu_read_unlock();
 678 
 679         return index;
 680 }
 681 EXPORT_SYMBOL_GPL(ip_set_get_byname);
 682 
 683 /* If the given set pointer points to a valid set, decrement
 684  * reference count by 1. The caller shall not assume the index
 685  * to be valid, after calling this function.
 686  *
 687  */
 688 
 689 static inline void
 690 __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 691 {
 692         struct ip_set *set;
 693 
 694         rcu_read_lock();
 695         set = rcu_dereference(inst->ip_set_list)[index];
 696         if (set)
 697                 __ip_set_put(set);
 698         rcu_read_unlock();
 699 }
 700 
 701 void
 702 ip_set_put_byindex(struct net *net, ip_set_id_t index)
 703 {
 704         struct ip_set_net *inst = ip_set_pernet(net);
 705 
 706         __ip_set_put_byindex(inst, index);
 707 }
 708 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 709 
 710 /* Get the name of a set behind a set index.
 711  * Set itself is protected by RCU, but its name isn't: to protect against
 712  * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
 713  * name.
 714  */
 715 void
 716 ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
 717 {
 718         struct ip_set *set = ip_set_rcu_get(net, index);
 719 
 720         BUG_ON(!set);
 721 
 722         read_lock_bh(&ip_set_ref_lock);
 723         strncpy(name, set->name, IPSET_MAXNAMELEN);
 724         read_unlock_bh(&ip_set_ref_lock);
 725 }
 726 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 727 
 728 /* Routines to call by external subsystems, which do not
 729  * call nfnl_lock for us.
 730  */
 731 
 732 /* Find set by index, reference it once. The reference makes sure the
 733  * thing pointed to, does not go away under our feet.
 734  *
 735  * The nfnl mutex is used in the function.
 736  */
 737 ip_set_id_t
 738 ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 739 {
 740         struct ip_set *set;
 741         struct ip_set_net *inst = ip_set_pernet(net);
 742 
 743         if (index >= inst->ip_set_max)
 744                 return IPSET_INVALID_ID;
 745 
 746         nfnl_lock(NFNL_SUBSYS_IPSET);
 747         set = ip_set(inst, index);
 748         if (set)
 749                 __ip_set_get(set);
 750         else
 751                 index = IPSET_INVALID_ID;
 752         nfnl_unlock(NFNL_SUBSYS_IPSET);
 753 
 754         return index;
 755 }
 756 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
 757 
 758 /* If the given set pointer points to a valid set, decrement
 759  * reference count by 1. The caller shall not assume the index
 760  * to be valid, after calling this function.
 761  *
 762  * The nfnl mutex is used in the function.
 763  */
 764 void
 765 ip_set_nfnl_put(struct net *net, ip_set_id_t index)
 766 {
 767         struct ip_set *set;
 768         struct ip_set_net *inst = ip_set_pernet(net);
 769 
 770         nfnl_lock(NFNL_SUBSYS_IPSET);
 771         if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
 772                 set = ip_set(inst, index);
 773                 if (set)
 774                         __ip_set_put(set);
 775         }
 776         nfnl_unlock(NFNL_SUBSYS_IPSET);
 777 }
 778 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 779 
 780 /* Communication protocol with userspace over netlink.
 781  *
 782  * The commands are serialized by the nfnl mutex.
 783  */
 784 
 785 static inline u8 protocol(const struct nlattr * const tb[])
 786 {
 787         return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
 788 }
 789 
 790 static inline bool
 791 protocol_failed(const struct nlattr * const tb[])
 792 {
 793         return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
 794 }
 795 
 796 static inline bool
 797 protocol_min_failed(const struct nlattr * const tb[])
 798 {
 799         return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
 800 }
 801 
 802 static inline u32
 803 flag_exist(const struct nlmsghdr *nlh)
 804 {
 805         return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
 806 }
 807 
 808 static struct nlmsghdr *
 809 start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 810           enum ipset_cmd cmd)
 811 {
 812         struct nlmsghdr *nlh;
 813         struct nfgenmsg *nfmsg;
 814 
 815         nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
 816                         sizeof(*nfmsg), flags);
 817         if (!nlh)
 818                 return NULL;
 819 
 820         nfmsg = nlmsg_data(nlh);
 821         nfmsg->nfgen_family = NFPROTO_IPV4;
 822         nfmsg->version = NFNETLINK_V0;
 823         nfmsg->res_id = 0;
 824 
 825         return nlh;
 826 }
 827 
 828 /* Create a set */
 829 
 830 static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
 831         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
 832         [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
 833                                     .len = IPSET_MAXNAMELEN - 1 },
 834         [IPSET_ATTR_TYPENAME]   = { .type = NLA_NUL_STRING,
 835                                     .len = IPSET_MAXNAMELEN - 1},
 836         [IPSET_ATTR_REVISION]   = { .type = NLA_U8 },
 837         [IPSET_ATTR_FAMILY]     = { .type = NLA_U8 },
 838         [IPSET_ATTR_DATA]       = { .type = NLA_NESTED },
 839 };
 840 
 841 static struct ip_set *
 842 find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
 843 {
 844         struct ip_set *set = NULL;
 845         ip_set_id_t i;
 846 
 847         *id = IPSET_INVALID_ID;
 848         for (i = 0; i < inst->ip_set_max; i++) {
 849                 set = ip_set(inst, i);
 850                 if (set && STRNCMP(set->name, name)) {
 851                         *id = i;
 852                         break;
 853                 }
 854         }
 855         return (*id == IPSET_INVALID_ID ? NULL : set);
 856 }
 857 
 858 static inline struct ip_set *
 859 find_set(struct ip_set_net *inst, const char *name)
 860 {
 861         ip_set_id_t id;
 862 
 863         return find_set_and_id(inst, name, &id);
 864 }
 865 
 866 static int
 867 find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
 868              struct ip_set **set)
 869 {
 870         struct ip_set *s;
 871         ip_set_id_t i;
 872 
 873         *index = IPSET_INVALID_ID;
 874         for (i = 0;  i < inst->ip_set_max; i++) {
 875                 s = ip_set(inst, i);
 876                 if (!s) {
 877                         if (*index == IPSET_INVALID_ID)
 878                                 *index = i;
 879                 } else if (STRNCMP(name, s->name)) {
 880                         /* Name clash */
 881                         *set = s;
 882                         return -EEXIST;
 883                 }
 884         }
 885         if (*index == IPSET_INVALID_ID)
 886                 /* No free slot remained */
 887                 return -IPSET_ERR_MAX_SETS;
 888         return 0;
 889 }
 890 
 891 static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 892                        const struct nlmsghdr *nlh,
 893                        const struct nlattr * const attr[],
 894                        struct netlink_ext_ack *extack)
 895 {
 896         return -EOPNOTSUPP;
 897 }
 898 
 899 static int ip_set_create(struct net *net, struct sock *ctnl,
 900                          struct sk_buff *skb, const struct nlmsghdr *nlh,
 901                          const struct nlattr * const attr[],
 902                          struct netlink_ext_ack *extack)
 903 {
 904         struct ip_set_net *inst = ip_set_pernet(net);
 905         struct ip_set *set, *clash = NULL;
 906         ip_set_id_t index = IPSET_INVALID_ID;
 907         struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
 908         const char *name, *typename;
 909         u8 family, revision;
 910         u32 flags = flag_exist(nlh);
 911         int ret = 0;
 912 
 913         if (unlikely(protocol_min_failed(attr) ||
 914                      !attr[IPSET_ATTR_SETNAME] ||
 915                      !attr[IPSET_ATTR_TYPENAME] ||
 916                      !attr[IPSET_ATTR_REVISION] ||
 917                      !attr[IPSET_ATTR_FAMILY] ||
 918                      (attr[IPSET_ATTR_DATA] &&
 919                       !flag_nested(attr[IPSET_ATTR_DATA]))))
 920                 return -IPSET_ERR_PROTOCOL;
 921 
 922         name = nla_data(attr[IPSET_ATTR_SETNAME]);
 923         typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
 924         family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
 925         revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
 926         pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
 927                  name, typename, family_name(family), revision);
 928 
 929         /* First, and without any locks, allocate and initialize
 930          * a normal base set structure.
 931          */
 932         set = kzalloc(sizeof(*set), GFP_KERNEL);
 933         if (!set)
 934                 return -ENOMEM;
 935         spin_lock_init(&set->lock);
 936         strlcpy(set->name, name, IPSET_MAXNAMELEN);
 937         set->family = family;
 938         set->revision = revision;
 939 
 940         /* Next, check that we know the type, and take
 941          * a reference on the type, to make sure it stays available
 942          * while constructing our new set.
 943          *
 944          * After referencing the type, we try to create the type
 945          * specific part of the set without holding any locks.
 946          */
 947         ret = find_set_type_get(typename, family, revision, &set->type);
 948         if (ret)
 949                 goto out;
 950 
 951         /* Without holding any locks, create private part. */
 952         if (attr[IPSET_ATTR_DATA] &&
 953             nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
 954                              set->type->create_policy, NULL)) {
 955                 ret = -IPSET_ERR_PROTOCOL;
 956                 goto put_out;
 957         }
 958 
 959         ret = set->type->create(net, set, tb, flags);
 960         if (ret != 0)
 961                 goto put_out;
 962 
 963         /* BTW, ret==0 here. */
 964 
 965         /* Here, we have a valid, constructed set and we are protected
 966          * by the nfnl mutex. Find the first free index in ip_set_list
 967          * and check clashing.
 968          */
 969         ret = find_free_id(inst, set->name, &index, &clash);
 970         if (ret == -EEXIST) {
 971                 /* If this is the same set and requested, ignore error */
 972                 if ((flags & IPSET_FLAG_EXIST) &&
 973                     STRNCMP(set->type->name, clash->type->name) &&
 974                     set->type->family == clash->type->family &&
 975                     set->type->revision_min == clash->type->revision_min &&
 976                     set->type->revision_max == clash->type->revision_max &&
 977                     set->variant->same_set(set, clash))
 978                         ret = 0;
 979                 goto cleanup;
 980         } else if (ret == -IPSET_ERR_MAX_SETS) {
 981                 struct ip_set **list, **tmp;
 982                 ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
 983 
 984                 if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
 985                         /* Wraparound */
 986                         goto cleanup;
 987 
 988                 list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
 989                 if (!list)
 990                         goto cleanup;
 991                 /* nfnl mutex is held, both lists are valid */
 992                 tmp = ip_set_dereference(inst->ip_set_list);
 993                 memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
 994                 rcu_assign_pointer(inst->ip_set_list, list);
 995                 /* Make sure all current packets have passed through */
 996                 synchronize_net();
 997                 /* Use new list */
 998                 index = inst->ip_set_max;
 999                 inst->ip_set_max = i;
1000                 kvfree(tmp);
1001                 ret = 0;
1002         } else if (ret) {
1003                 goto cleanup;
1004         }
1005 
1006         /* Finally! Add our shiny new set to the list, and be done. */
1007         pr_debug("create: '%s' created with index %u!\n", set->name, index);
1008         ip_set(inst, index) = set;
1009 
1010         return ret;
1011 
1012 cleanup:
1013         set->variant->destroy(set);
1014 put_out:
1015         module_put(set->type->me);
1016 out:
1017         kfree(set);
1018         return ret;
1019 }
1020 
1021 /* Destroy sets */
1022 
1023 static const struct nla_policy
1024 ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
1025         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1026         [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
1027                                     .len = IPSET_MAXNAMELEN - 1 },
1028 };
1029 
1030 static void
1031 ip_set_destroy_set(struct ip_set *set)
1032 {
1033         pr_debug("set: %s\n",  set->name);
1034 
1035         /* Must call it without holding any lock */
1036         set->variant->destroy(set);
1037         module_put(set->type->me);
1038         kfree(set);
1039 }
1040 
1041 static int ip_set_destroy(struct net *net, struct sock *ctnl,
1042                           struct sk_buff *skb, const struct nlmsghdr *nlh,
1043                           const struct nlattr * const attr[],
1044                           struct netlink_ext_ack *extack)
1045 {
1046         struct ip_set_net *inst = ip_set_pernet(net);
1047         struct ip_set *s;
1048         ip_set_id_t i;
1049         int ret = 0;
1050 
1051         if (unlikely(protocol_min_failed(attr)))
1052                 return -IPSET_ERR_PROTOCOL;
1053 
1054         /* Must wait for flush to be really finished in list:set */
1055         rcu_barrier();
1056 
1057         /* Commands are serialized and references are
1058          * protected by the ip_set_ref_lock.
1059          * External systems (i.e. xt_set) must call
1060          * ip_set_put|get_nfnl_* functions, that way we
1061          * can safely check references here.
1062          *
1063          * list:set timer can only decrement the reference
1064          * counter, so if it's already zero, we can proceed
1065          * without holding the lock.
1066          */
1067         read_lock_bh(&ip_set_ref_lock);
1068         if (!attr[IPSET_ATTR_SETNAME]) {
1069                 for (i = 0; i < inst->ip_set_max; i++) {
1070                         s = ip_set(inst, i);
1071                         if (s && (s->ref || s->ref_netlink)) {
1072                                 ret = -IPSET_ERR_BUSY;
1073                                 goto out;
1074                         }
1075                 }
1076                 inst->is_destroyed = true;
1077                 read_unlock_bh(&ip_set_ref_lock);
1078                 for (i = 0; i < inst->ip_set_max; i++) {
1079                         s = ip_set(inst, i);
1080                         if (s) {
1081                                 ip_set(inst, i) = NULL;
1082                                 ip_set_destroy_set(s);
1083                         }
1084                 }
1085                 /* Modified by ip_set_destroy() only, which is serialized */
1086                 inst->is_destroyed = false;
1087         } else {
1088                 s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1089                                     &i);
1090                 if (!s) {
1091                         ret = -ENOENT;
1092                         goto out;
1093                 } else if (s->ref || s->ref_netlink) {
1094                         ret = -IPSET_ERR_BUSY;
1095                         goto out;
1096                 }
1097                 ip_set(inst, i) = NULL;
1098                 read_unlock_bh(&ip_set_ref_lock);
1099 
1100                 ip_set_destroy_set(s);
1101         }
1102         return 0;
1103 out:
1104         read_unlock_bh(&ip_set_ref_lock);
1105         return ret;
1106 }
1107 
1108 /* Flush sets */
1109 
1110 static void
1111 ip_set_flush_set(struct ip_set *set)
1112 {
1113         pr_debug("set: %s\n",  set->name);
1114 
1115         ip_set_lock(set);
1116         set->variant->flush(set);
1117         ip_set_unlock(set);
1118 }
1119 
1120 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1121                         const struct nlmsghdr *nlh,
1122                         const struct nlattr * const attr[],
1123                         struct netlink_ext_ack *extack)
1124 {
1125         struct ip_set_net *inst = ip_set_pernet(net);
1126         struct ip_set *s;
1127         ip_set_id_t i;
1128 
1129         if (unlikely(protocol_min_failed(attr)))
1130                 return -IPSET_ERR_PROTOCOL;
1131 
1132         if (!attr[IPSET_ATTR_SETNAME]) {
1133                 for (i = 0; i < inst->ip_set_max; i++) {
1134                         s = ip_set(inst, i);
1135                         if (s)
1136                                 ip_set_flush_set(s);
1137                 }
1138         } else {
1139                 s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1140                 if (!s)
1141                         return -ENOENT;
1142 
1143                 ip_set_flush_set(s);
1144         }
1145 
1146         return 0;
1147 }
1148 
1149 /* Rename a set */
1150 
1151 static const struct nla_policy
1152 ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
1153         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1154         [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
1155                                     .len = IPSET_MAXNAMELEN - 1 },
1156         [IPSET_ATTR_SETNAME2]   = { .type = NLA_NUL_STRING,
1157                                     .len = IPSET_MAXNAMELEN - 1 },
1158 };
1159 
1160 static int ip_set_rename(struct net *net, struct sock *ctnl,
1161                          struct sk_buff *skb, const struct nlmsghdr *nlh,
1162                          const struct nlattr * const attr[],
1163                          struct netlink_ext_ack *extack)
1164 {
1165         struct ip_set_net *inst = ip_set_pernet(net);
1166         struct ip_set *set, *s;
1167         const char *name2;
1168         ip_set_id_t i;
1169         int ret = 0;
1170 
1171         if (unlikely(protocol_min_failed(attr) ||
1172                      !attr[IPSET_ATTR_SETNAME] ||
1173                      !attr[IPSET_ATTR_SETNAME2]))
1174                 return -IPSET_ERR_PROTOCOL;
1175 
1176         set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1177         if (!set)
1178                 return -ENOENT;
1179 
1180         write_lock_bh(&ip_set_ref_lock);
1181         if (set->ref != 0 || set->ref_netlink != 0) {
1182                 ret = -IPSET_ERR_REFERENCED;
1183                 goto out;
1184         }
1185 
1186         name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1187         for (i = 0; i < inst->ip_set_max; i++) {
1188                 s = ip_set(inst, i);
1189                 if (s && STRNCMP(s->name, name2)) {
1190                         ret = -IPSET_ERR_EXIST_SETNAME2;
1191                         goto out;
1192                 }
1193         }
1194         strncpy(set->name, name2, IPSET_MAXNAMELEN);
1195 
1196 out:
1197         write_unlock_bh(&ip_set_ref_lock);
1198         return ret;
1199 }
1200 
1201 /* Swap two sets so that name/index points to the other.
1202  * References and set names are also swapped.
1203  *
1204  * The commands are serialized by the nfnl mutex and references are
1205  * protected by the ip_set_ref_lock. The kernel interfaces
1206  * do not hold the mutex but the pointer settings are atomic
1207  * so the ip_set_list always contains valid pointers to the sets.
1208  */
1209 
1210 static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1211                        const struct nlmsghdr *nlh,
1212                        const struct nlattr * const attr[],
1213                        struct netlink_ext_ack *extack)
1214 {
1215         struct ip_set_net *inst = ip_set_pernet(net);
1216         struct ip_set *from, *to;
1217         ip_set_id_t from_id, to_id;
1218         char from_name[IPSET_MAXNAMELEN];
1219 
1220         if (unlikely(protocol_min_failed(attr) ||
1221                      !attr[IPSET_ATTR_SETNAME] ||
1222                      !attr[IPSET_ATTR_SETNAME2]))
1223                 return -IPSET_ERR_PROTOCOL;
1224 
1225         from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1226                                &from_id);
1227         if (!from)
1228                 return -ENOENT;
1229 
1230         to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
1231                              &to_id);
1232         if (!to)
1233                 return -IPSET_ERR_EXIST_SETNAME2;
1234 
1235         /* Features must not change.
1236          * Not an artifical restriction anymore, as we must prevent
1237          * possible loops created by swapping in setlist type of sets.
1238          */
1239         if (!(from->type->features == to->type->features &&
1240               from->family == to->family))
1241                 return -IPSET_ERR_TYPE_MISMATCH;
1242 
1243         write_lock_bh(&ip_set_ref_lock);
1244 
1245         if (from->ref_netlink || to->ref_netlink) {
1246                 write_unlock_bh(&ip_set_ref_lock);
1247                 return -EBUSY;
1248         }
1249 
1250         strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1251         strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1252         strncpy(to->name, from_name, IPSET_MAXNAMELEN);
1253 
1254         swap(from->ref, to->ref);
1255         ip_set(inst, from_id) = to;
1256         ip_set(inst, to_id) = from;
1257         write_unlock_bh(&ip_set_ref_lock);
1258 
1259         return 0;
1260 }
1261 
1262 /* List/save set data */
1263 
1264 #define DUMP_INIT       0
1265 #define DUMP_ALL        1
1266 #define DUMP_ONE        2
1267 #define DUMP_LAST       3
1268 
1269 #define DUMP_TYPE(arg)          (((u32)(arg)) & 0x0000FFFF)
1270 #define DUMP_FLAGS(arg)         (((u32)(arg)) >> 16)
1271 
1272 static int
1273 ip_set_dump_done(struct netlink_callback *cb)
1274 {
1275         if (cb->args[IPSET_CB_ARG0]) {
1276                 struct ip_set_net *inst =
1277                         (struct ip_set_net *)cb->args[IPSET_CB_NET];
1278                 ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1279                 struct ip_set *set = ip_set_ref_netlink(inst, index);
1280 
1281                 if (set->variant->uref)
1282                         set->variant->uref(set, cb, false);
1283                 pr_debug("release set %s\n", set->name);
1284                 __ip_set_put_netlink(set);
1285         }
1286         return 0;
1287 }
1288 
1289 static inline void
1290 dump_attrs(struct nlmsghdr *nlh)
1291 {
1292         const struct nlattr *attr;
1293         int rem;
1294 
1295         pr_debug("dump nlmsg\n");
1296         nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1297                 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1298         }
1299 }
1300 
1301 static const struct nla_policy
1302 ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
1303         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1304         [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
1305                                     .len = IPSET_MAXNAMELEN - 1 },
1306         [IPSET_ATTR_FLAGS]      = { .type = NLA_U32 },
1307 };
1308 
1309 static int
1310 ip_set_dump_start(struct netlink_callback *cb)
1311 {
1312         struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1313         int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1314         struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1315         struct nlattr *attr = (void *)nlh + min_len;
1316         struct sk_buff *skb = cb->skb;
1317         struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1318         u32 dump_type;
1319         int ret;
1320 
1321         ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
1322                         nlh->nlmsg_len - min_len,
1323                         ip_set_dump_policy, NULL);
1324         if (ret)
1325                 goto error;
1326 
1327         cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
1328         if (cda[IPSET_ATTR_SETNAME]) {
1329                 ip_set_id_t index;
1330                 struct ip_set *set;
1331 
1332                 set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
1333                                       &index);
1334                 if (!set) {
1335                         ret = -ENOENT;
1336                         goto error;
1337                 }
1338                 dump_type = DUMP_ONE;
1339                 cb->args[IPSET_CB_INDEX] = index;
1340         } else {
1341                 dump_type = DUMP_ALL;
1342         }
1343 
1344         if (cda[IPSET_ATTR_FLAGS]) {
1345                 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1346 
1347                 dump_type |= (f << 16);
1348         }
1349         cb->args[IPSET_CB_NET] = (unsigned long)inst;
1350         cb->args[IPSET_CB_DUMP] = dump_type;
1351 
1352         return 0;
1353 
1354 error:
1355         /* We have to create and send the error message manually :-( */
1356         if (nlh->nlmsg_flags & NLM_F_ACK) {
1357                 netlink_ack(cb->skb, nlh, ret, NULL);
1358         }
1359         return ret;
1360 }
1361 
1362 static int
1363 ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb)
1364 {
1365         ip_set_id_t index = IPSET_INVALID_ID, max;
1366         struct ip_set *set = NULL;
1367         struct nlmsghdr *nlh = NULL;
1368         unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1369         struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1370         u32 dump_type, dump_flags;
1371         bool is_destroyed;
1372         int ret = 0;
1373 
1374         if (!cb->args[IPSET_CB_DUMP])
1375                 return -EINVAL;
1376 
1377         if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
1378                 goto out;
1379 
1380         dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
1381         dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
1382         max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
1383                                     : inst->ip_set_max;
1384 dump_last:
1385         pr_debug("dump type, flag: %u %u index: %ld\n",
1386                  dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
1387         for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
1388                 index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
1389                 write_lock_bh(&ip_set_ref_lock);
1390                 set = ip_set(inst, index);
1391                 is_destroyed = inst->is_destroyed;
1392                 if (!set || is_destroyed) {
1393                         write_unlock_bh(&ip_set_ref_lock);
1394                         if (dump_type == DUMP_ONE) {
1395                                 ret = -ENOENT;
1396                                 goto out;
1397                         }
1398                         if (is_destroyed) {
1399                                 /* All sets are just being destroyed */
1400                                 ret = 0;
1401                                 goto out;
1402                         }
1403                         continue;
1404                 }
1405                 /* When dumping all sets, we must dump "sorted"
1406                  * so that lists (unions of sets) are dumped last.
1407                  */
1408                 if (dump_type != DUMP_ONE &&
1409                     ((dump_type == DUMP_ALL) ==
1410                      !!(set->type->features & IPSET_DUMP_LAST))) {
1411                         write_unlock_bh(&ip_set_ref_lock);
1412                         continue;
1413                 }
1414                 pr_debug("List set: %s\n", set->name);
1415                 if (!cb->args[IPSET_CB_ARG0]) {
1416                         /* Start listing: make sure set won't be destroyed */
1417                         pr_debug("reference set\n");
1418                         set->ref_netlink++;
1419                 }
1420                 write_unlock_bh(&ip_set_ref_lock);
1421                 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1422                                 cb->nlh->nlmsg_seq, flags,
1423                                 IPSET_CMD_LIST);
1424                 if (!nlh) {
1425                         ret = -EMSGSIZE;
1426                         goto release_refcount;
1427                 }
1428                 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
1429                                cb->args[IPSET_CB_PROTO]) ||
1430                     nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1431                         goto nla_put_failure;
1432                 if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1433                         goto next_set;
1434                 switch (cb->args[IPSET_CB_ARG0]) {
1435                 case 0:
1436                         /* Core header data */
1437                         if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1438                                            set->type->name) ||
1439                             nla_put_u8(skb, IPSET_ATTR_FAMILY,
1440                                        set->family) ||
1441                             nla_put_u8(skb, IPSET_ATTR_REVISION,
1442                                        set->revision))
1443                                 goto nla_put_failure;
1444                         if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
1445                             nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
1446                                 goto nla_put_failure;
1447                         ret = set->variant->head(set, skb);
1448                         if (ret < 0)
1449                                 goto release_refcount;
1450                         if (dump_flags & IPSET_FLAG_LIST_HEADER)
1451                                 goto next_set;
1452                         if (set->variant->uref)
1453                                 set->variant->uref(set, cb, true);
1454                         /* fall through */
1455                 default:
1456                         ret = set->variant->list(set, skb, cb);
1457                         if (!cb->args[IPSET_CB_ARG0])
1458                                 /* Set is done, proceed with next one */
1459                                 goto next_set;
1460                         goto release_refcount;
1461                 }
1462         }
1463         /* If we dump all sets, continue with dumping last ones */
1464         if (dump_type == DUMP_ALL) {
1465                 dump_type = DUMP_LAST;
1466                 cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
1467                 cb->args[IPSET_CB_INDEX] = 0;
1468                 if (set && set->variant->uref)
1469                         set->variant->uref(set, cb, false);
1470                 goto dump_last;
1471         }
1472         goto out;
1473 
1474 nla_put_failure:
1475         ret = -EFAULT;
1476 next_set:
1477         if (dump_type == DUMP_ONE)
1478                 cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
1479         else
1480                 cb->args[IPSET_CB_INDEX]++;
1481 release_refcount:
1482         /* If there was an error or set is done, release set */
1483         if (ret || !cb->args[IPSET_CB_ARG0]) {
1484                 set = ip_set_ref_netlink(inst, index);
1485                 if (set->variant->uref)
1486                         set->variant->uref(set, cb, false);
1487                 pr_debug("release set %s\n", set->name);
1488                 __ip_set_put_netlink(set);
1489                 cb->args[IPSET_CB_ARG0] = 0;
1490         }
1491 out:
1492         if (nlh) {
1493                 nlmsg_end(skb, nlh);
1494                 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1495                 dump_attrs(nlh);
1496         }
1497 
1498         return ret < 0 ? ret : skb->len;
1499 }
1500 
1501 static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1502                        const struct nlmsghdr *nlh,
1503                        const struct nlattr * const attr[],
1504                        struct netlink_ext_ack *extack)
1505 {
1506         if (unlikely(protocol_min_failed(attr)))
1507                 return -IPSET_ERR_PROTOCOL;
1508 
1509         {
1510                 struct netlink_dump_control c = {
1511                         .start = ip_set_dump_start,
1512                         .dump = ip_set_dump_do,
1513                         .done = ip_set_dump_done,
1514                 };
1515                 return netlink_dump_start(ctnl, skb, nlh, &c);
1516         }
1517 }
1518 
1519 /* Add, del and test */
1520 
1521 static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1522         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1523         [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
1524                                     .len = IPSET_MAXNAMELEN - 1 },
1525         [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
1526         [IPSET_ATTR_DATA]       = { .type = NLA_NESTED },
1527         [IPSET_ATTR_ADT]        = { .type = NLA_NESTED },
1528 };
1529 
1530 static int
1531 call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1532         struct nlattr *tb[], enum ipset_adt adt,
1533         u32 flags, bool use_lineno)
1534 {
1535         int ret;
1536         u32 lineno = 0;
1537         bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1538 
1539         do {
1540                 ip_set_lock(set);
1541                 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1542                 ip_set_unlock(set);
1543                 retried = true;
1544         } while (ret == -EAGAIN &&
1545                  set->variant->resize &&
1546                  (ret = set->variant->resize(set, retried)) == 0);
1547 
1548         if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1549                 return 0;
1550         if (lineno && use_lineno) {
1551                 /* Error in restore/batch mode: send back lineno */
1552                 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1553                 struct sk_buff *skb2;
1554                 struct nlmsgerr *errmsg;
1555                 size_t payload = min(SIZE_MAX,
1556                                      sizeof(*errmsg) + nlmsg_len(nlh));
1557                 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1558                 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
1559                 struct nlattr *cmdattr;
1560                 u32 *errline;
1561 
1562                 skb2 = nlmsg_new(payload, GFP_KERNEL);
1563                 if (!skb2)
1564                         return -ENOMEM;
1565                 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1566                                   nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1567                 errmsg = nlmsg_data(rep);
1568                 errmsg->error = ret;
1569                 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1570                 cmdattr = (void *)&errmsg->msg + min_len;
1571 
1572                 ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
1573                                 nlh->nlmsg_len - min_len, ip_set_adt_policy,
1574                                 NULL);
1575 
1576                 if (ret) {
1577                         nlmsg_free(skb2);
1578                         return ret;
1579                 }
1580                 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1581 
1582                 *errline = lineno;
1583 
1584                 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
1585                                 MSG_DONTWAIT);
1586                 /* Signal netlink not to send its ACK/errmsg.  */
1587                 return -EINTR;
1588         }
1589 
1590         return ret;
1591 }
1592 
1593 static int ip_set_ad(struct net *net, struct sock *ctnl,
1594                      struct sk_buff *skb,
1595                      enum ipset_adt adt,
1596                      const struct nlmsghdr *nlh,
1597                      const struct nlattr * const attr[],
1598                      struct netlink_ext_ack *extack)
1599 {
1600         struct ip_set_net *inst = ip_set_pernet(net);
1601         struct ip_set *set;
1602         struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1603         const struct nlattr *nla;
1604         u32 flags = flag_exist(nlh);
1605         bool use_lineno;
1606         int ret = 0;
1607 
1608         if (unlikely(protocol_min_failed(attr) ||
1609                      !attr[IPSET_ATTR_SETNAME] ||
1610                      !((attr[IPSET_ATTR_DATA] != NULL) ^
1611                        (attr[IPSET_ATTR_ADT] != NULL)) ||
1612                      (attr[IPSET_ATTR_DATA] &&
1613                       !flag_nested(attr[IPSET_ATTR_DATA])) ||
1614                      (attr[IPSET_ATTR_ADT] &&
1615                       (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1616                        !attr[IPSET_ATTR_LINENO]))))
1617                 return -IPSET_ERR_PROTOCOL;
1618 
1619         set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1620         if (!set)
1621                 return -ENOENT;
1622 
1623         use_lineno = !!attr[IPSET_ATTR_LINENO];
1624         if (attr[IPSET_ATTR_DATA]) {
1625                 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1626                                      attr[IPSET_ATTR_DATA],
1627                                      set->type->adt_policy, NULL))
1628                         return -IPSET_ERR_PROTOCOL;
1629                 ret = call_ad(ctnl, skb, set, tb, adt, flags,
1630                               use_lineno);
1631         } else {
1632                 int nla_rem;
1633 
1634                 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1635                         if (nla_type(nla) != IPSET_ATTR_DATA ||
1636                             !flag_nested(nla) ||
1637                             nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1638                                              set->type->adt_policy, NULL))
1639                                 return -IPSET_ERR_PROTOCOL;
1640                         ret = call_ad(ctnl, skb, set, tb, adt,
1641                                       flags, use_lineno);
1642                         if (ret < 0)
1643                                 return ret;
1644                 }
1645         }
1646         return ret;
1647 }
1648 
1649 static int ip_set_uadd(struct net *net, struct sock *ctnl,
1650                        struct sk_buff *skb, const struct nlmsghdr *nlh,
1651                        const struct nlattr * const attr[],
1652                        struct netlink_ext_ack *extack)
1653 {
1654         return ip_set_ad(net, ctnl, skb,
1655                          IPSET_ADD, nlh, attr, extack);
1656 }
1657 
1658 static int ip_set_udel(struct net *net, struct sock *ctnl,
1659                        struct sk_buff *skb, const struct nlmsghdr *nlh,
1660                        const struct nlattr * const attr[],
1661                        struct netlink_ext_ack *extack)
1662 {
1663         return ip_set_ad(net, ctnl, skb,
1664                          IPSET_DEL, nlh, attr, extack);
1665 }
1666 
1667 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1668                         const struct nlmsghdr *nlh,
1669                         const struct nlattr * const attr[],
1670                         struct netlink_ext_ack *extack)
1671 {
1672         struct ip_set_net *inst = ip_set_pernet(net);
1673         struct ip_set *set;
1674         struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
1675         int ret = 0;
1676         u32 lineno;
1677 
1678         if (unlikely(protocol_min_failed(attr) ||
1679                      !attr[IPSET_ATTR_SETNAME] ||
1680                      !attr[IPSET_ATTR_DATA] ||
1681                      !flag_nested(attr[IPSET_ATTR_DATA])))
1682                 return -IPSET_ERR_PROTOCOL;
1683 
1684         set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1685         if (!set)
1686                 return -ENOENT;
1687 
1688         if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1689                              set->type->adt_policy, NULL))
1690                 return -IPSET_ERR_PROTOCOL;
1691 
1692         rcu_read_lock_bh();
1693         ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0);
1694         rcu_read_unlock_bh();
1695         /* Userspace can't trigger element to be re-added */
1696         if (ret == -EAGAIN)
1697                 ret = 1;
1698 
1699         return ret > 0 ? 0 : -IPSET_ERR_EXIST;
1700 }
1701 
1702 /* Get headed data of a set */
1703 
1704 static int ip_set_header(struct net *net, struct sock *ctnl,
1705                          struct sk_buff *skb, const struct nlmsghdr *nlh,
1706                          const struct nlattr * const attr[],
1707                          struct netlink_ext_ack *extack)
1708 {
1709         struct ip_set_net *inst = ip_set_pernet(net);
1710         const struct ip_set *set;
1711         struct sk_buff *skb2;
1712         struct nlmsghdr *nlh2;
1713         int ret = 0;
1714 
1715         if (unlikely(protocol_min_failed(attr) ||
1716                      !attr[IPSET_ATTR_SETNAME]))
1717                 return -IPSET_ERR_PROTOCOL;
1718 
1719         set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1720         if (!set)
1721                 return -ENOENT;
1722 
1723         skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1724         if (!skb2)
1725                 return -ENOMEM;
1726 
1727         nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1728                          IPSET_CMD_HEADER);
1729         if (!nlh2)
1730                 goto nlmsg_failure;
1731         if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1732             nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1733             nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1734             nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1735             nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1736                 goto nla_put_failure;
1737         nlmsg_end(skb2, nlh2);
1738 
1739         ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1740         if (ret < 0)
1741                 return ret;
1742 
1743         return 0;
1744 
1745 nla_put_failure:
1746         nlmsg_cancel(skb2, nlh2);
1747 nlmsg_failure:
1748         kfree_skb(skb2);
1749         return -EMSGSIZE;
1750 }
1751 
1752 /* Get type data */
1753 
1754 static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1755         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1756         [IPSET_ATTR_TYPENAME]   = { .type = NLA_NUL_STRING,
1757                                     .len = IPSET_MAXNAMELEN - 1 },
1758         [IPSET_ATTR_FAMILY]     = { .type = NLA_U8 },
1759 };
1760 
1761 static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1762                        const struct nlmsghdr *nlh,
1763                        const struct nlattr * const attr[],
1764                        struct netlink_ext_ack *extack)
1765 {
1766         struct sk_buff *skb2;
1767         struct nlmsghdr *nlh2;
1768         u8 family, min, max;
1769         const char *typename;
1770         int ret = 0;
1771 
1772         if (unlikely(protocol_min_failed(attr) ||
1773                      !attr[IPSET_ATTR_TYPENAME] ||
1774                      !attr[IPSET_ATTR_FAMILY]))
1775                 return -IPSET_ERR_PROTOCOL;
1776 
1777         family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1778         typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1779         ret = find_set_type_minmax(typename, family, &min, &max);
1780         if (ret)
1781                 return ret;
1782 
1783         skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1784         if (!skb2)
1785                 return -ENOMEM;
1786 
1787         nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1788                          IPSET_CMD_TYPE);
1789         if (!nlh2)
1790                 goto nlmsg_failure;
1791         if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1792             nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1793             nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1794             nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1795             nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1796                 goto nla_put_failure;
1797         nlmsg_end(skb2, nlh2);
1798 
1799         pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1800         ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1801         if (ret < 0)
1802                 return ret;
1803 
1804         return 0;
1805 
1806 nla_put_failure:
1807         nlmsg_cancel(skb2, nlh2);
1808 nlmsg_failure:
1809         kfree_skb(skb2);
1810         return -EMSGSIZE;
1811 }
1812 
1813 /* Get protocol version */
1814 
1815 static const struct nla_policy
1816 ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1817         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1818 };
1819 
1820 static int ip_set_protocol(struct net *net, struct sock *ctnl,
1821                            struct sk_buff *skb, const struct nlmsghdr *nlh,
1822                            const struct nlattr * const attr[],
1823                            struct netlink_ext_ack *extack)
1824 {
1825         struct sk_buff *skb2;
1826         struct nlmsghdr *nlh2;
1827         int ret = 0;
1828 
1829         if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
1830                 return -IPSET_ERR_PROTOCOL;
1831 
1832         skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1833         if (!skb2)
1834                 return -ENOMEM;
1835 
1836         nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1837                          IPSET_CMD_PROTOCOL);
1838         if (!nlh2)
1839                 goto nlmsg_failure;
1840         if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1841                 goto nla_put_failure;
1842         if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
1843                 goto nla_put_failure;
1844         nlmsg_end(skb2, nlh2);
1845 
1846         ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1847         if (ret < 0)
1848                 return ret;
1849 
1850         return 0;
1851 
1852 nla_put_failure:
1853         nlmsg_cancel(skb2, nlh2);
1854 nlmsg_failure:
1855         kfree_skb(skb2);
1856         return -EMSGSIZE;
1857 }
1858 
1859 /* Get set by name or index, from userspace */
1860 
1861 static int ip_set_byname(struct net *net, struct sock *ctnl,
1862                          struct sk_buff *skb, const struct nlmsghdr *nlh,
1863                          const struct nlattr * const attr[],
1864                          struct netlink_ext_ack *extack)
1865 {
1866         struct ip_set_net *inst = ip_set_pernet(net);
1867         struct sk_buff *skb2;
1868         struct nlmsghdr *nlh2;
1869         ip_set_id_t id = IPSET_INVALID_ID;
1870         const struct ip_set *set;
1871         int ret = 0;
1872 
1873         if (unlikely(protocol_failed(attr) ||
1874                      !attr[IPSET_ATTR_SETNAME]))
1875                 return -IPSET_ERR_PROTOCOL;
1876 
1877         set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
1878         if (id == IPSET_INVALID_ID)
1879                 return -ENOENT;
1880 
1881         skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1882         if (!skb2)
1883                 return -ENOMEM;
1884 
1885         nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1886                          IPSET_CMD_GET_BYNAME);
1887         if (!nlh2)
1888                 goto nlmsg_failure;
1889         if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1890             nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1891             nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
1892                 goto nla_put_failure;
1893         nlmsg_end(skb2, nlh2);
1894 
1895         ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1896         if (ret < 0)
1897                 return ret;
1898 
1899         return 0;
1900 
1901 nla_put_failure:
1902         nlmsg_cancel(skb2, nlh2);
1903 nlmsg_failure:
1904         kfree_skb(skb2);
1905         return -EMSGSIZE;
1906 }
1907 
1908 static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
1909         [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1910         [IPSET_ATTR_INDEX]      = { .type = NLA_U16 },
1911 };
1912 
1913 static int ip_set_byindex(struct net *net, struct sock *ctnl,
1914                           struct sk_buff *skb, const struct nlmsghdr *nlh,
1915                           const struct nlattr * const attr[],
1916                           struct netlink_ext_ack *extack)
1917 {
1918         struct ip_set_net *inst = ip_set_pernet(net);
1919         struct sk_buff *skb2;
1920         struct nlmsghdr *nlh2;
1921         ip_set_id_t id = IPSET_INVALID_ID;
1922         const struct ip_set *set;
1923         int ret = 0;
1924 
1925         if (unlikely(protocol_failed(attr) ||
1926                      !attr[IPSET_ATTR_INDEX]))
1927                 return -IPSET_ERR_PROTOCOL;
1928 
1929         id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
1930         if (id >= inst->ip_set_max)
1931                 return -ENOENT;
1932         set = ip_set(inst, id);
1933         if (set == NULL)
1934                 return -ENOENT;
1935 
1936         skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1937         if (!skb2)
1938                 return -ENOMEM;
1939 
1940         nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1941                          IPSET_CMD_GET_BYINDEX);
1942         if (!nlh2)
1943                 goto nlmsg_failure;
1944         if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
1945             nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
1946                 goto nla_put_failure;
1947         nlmsg_end(skb2, nlh2);
1948 
1949         ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1950         if (ret < 0)
1951                 return ret;
1952 
1953         return 0;
1954 
1955 nla_put_failure:
1956         nlmsg_cancel(skb2, nlh2);
1957 nlmsg_failure:
1958         kfree_skb(skb2);
1959         return -EMSGSIZE;
1960 }
1961 
1962 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1963         [IPSET_CMD_NONE]        = {
1964                 .call           = ip_set_none,
1965                 .attr_count     = IPSET_ATTR_CMD_MAX,
1966         },
1967         [IPSET_CMD_CREATE]      = {
1968                 .call           = ip_set_create,
1969                 .attr_count     = IPSET_ATTR_CMD_MAX,
1970                 .policy         = ip_set_create_policy,
1971         },
1972         [IPSET_CMD_DESTROY]     = {
1973                 .call           = ip_set_destroy,
1974                 .attr_count     = IPSET_ATTR_CMD_MAX,
1975                 .policy         = ip_set_setname_policy,
1976         },
1977         [IPSET_CMD_FLUSH]       = {
1978                 .call           = ip_set_flush,
1979                 .attr_count     = IPSET_ATTR_CMD_MAX,
1980                 .policy         = ip_set_setname_policy,
1981         },
1982         [IPSET_CMD_RENAME]      = {
1983                 .call           = ip_set_rename,
1984                 .attr_count     = IPSET_ATTR_CMD_MAX,
1985                 .policy         = ip_set_setname2_policy,
1986         },
1987         [IPSET_CMD_SWAP]        = {
1988                 .call           = ip_set_swap,
1989                 .attr_count     = IPSET_ATTR_CMD_MAX,
1990                 .policy         = ip_set_setname2_policy,
1991         },
1992         [IPSET_CMD_LIST]        = {
1993                 .call           = ip_set_dump,
1994                 .attr_count     = IPSET_ATTR_CMD_MAX,
1995                 .policy         = ip_set_dump_policy,
1996         },
1997         [IPSET_CMD_SAVE]        = {
1998                 .call           = ip_set_dump,
1999                 .attr_count     = IPSET_ATTR_CMD_MAX,
2000                 .policy         = ip_set_setname_policy,
2001         },
2002         [IPSET_CMD_ADD] = {
2003                 .call           = ip_set_uadd,
2004                 .attr_count     = IPSET_ATTR_CMD_MAX,
2005                 .policy         = ip_set_adt_policy,
2006         },
2007         [IPSET_CMD_DEL] = {
2008                 .call           = ip_set_udel,
2009                 .attr_count     = IPSET_ATTR_CMD_MAX,
2010                 .policy         = ip_set_adt_policy,
2011         },
2012         [IPSET_CMD_TEST]        = {
2013                 .call           = ip_set_utest,
2014                 .attr_count     = IPSET_ATTR_CMD_MAX,
2015                 .policy         = ip_set_adt_policy,
2016         },
2017         [IPSET_CMD_HEADER]      = {
2018                 .call           = ip_set_header,
2019                 .attr_count     = IPSET_ATTR_CMD_MAX,
2020                 .policy         = ip_set_setname_policy,
2021         },
2022         [IPSET_CMD_TYPE]        = {
2023                 .call           = ip_set_type,
2024                 .attr_count     = IPSET_ATTR_CMD_MAX,
2025                 .policy         = ip_set_type_policy,
2026         },
2027         [IPSET_CMD_PROTOCOL]    = {
2028                 .call           = ip_set_protocol,
2029                 .attr_count     = IPSET_ATTR_CMD_MAX,
2030                 .policy         = ip_set_protocol_policy,
2031         },
2032         [IPSET_CMD_GET_BYNAME]  = {
2033                 .call           = ip_set_byname,
2034                 .attr_count     = IPSET_ATTR_CMD_MAX,
2035                 .policy         = ip_set_setname_policy,
2036         },
2037         [IPSET_CMD_GET_BYINDEX] = {
2038                 .call           = ip_set_byindex,
2039                 .attr_count     = IPSET_ATTR_CMD_MAX,
2040                 .policy         = ip_set_index_policy,
2041         },
2042 };
2043 
2044 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
2045         .name           = "ip_set",
2046         .subsys_id      = NFNL_SUBSYS_IPSET,
2047         .cb_count       = IPSET_MSG_MAX,
2048         .cb             = ip_set_netlink_subsys_cb,
2049 };
2050 
2051 /* Interface to iptables/ip6tables */
2052 
2053 static int
2054 ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
2055 {
2056         unsigned int *op;
2057         void *data;
2058         int copylen = *len, ret = 0;
2059         struct net *net = sock_net(sk);
2060         struct ip_set_net *inst = ip_set_pernet(net);
2061 
2062         if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2063                 return -EPERM;
2064         if (optval != SO_IP_SET)
2065                 return -EBADF;
2066         if (*len < sizeof(unsigned int))
2067                 return -EINVAL;
2068 
2069         data = vmalloc(*len);
2070         if (!data)
2071                 return -ENOMEM;
2072         if (copy_from_user(data, user, *len) != 0) {
2073                 ret = -EFAULT;
2074                 goto done;
2075         }
2076         op = data;
2077 
2078         if (*op < IP_SET_OP_VERSION) {
2079                 /* Check the version at the beginning of operations */
2080                 struct ip_set_req_version *req_version = data;
2081 
2082                 if (*len < sizeof(struct ip_set_req_version)) {
2083                         ret = -EINVAL;
2084                         goto done;
2085                 }
2086 
2087                 if (req_version->version < IPSET_PROTOCOL_MIN) {
2088                         ret = -EPROTO;
2089                         goto done;
2090                 }
2091         }
2092 
2093         switch (*op) {
2094         case IP_SET_OP_VERSION: {
2095                 struct ip_set_req_version *req_version = data;
2096 
2097                 if (*len != sizeof(struct ip_set_req_version)) {
2098                         ret = -EINVAL;
2099                         goto done;
2100                 }
2101 
2102                 req_version->version = IPSET_PROTOCOL;
2103                 if (copy_to_user(user, req_version,
2104                                  sizeof(struct ip_set_req_version)))
2105                         ret = -EFAULT;
2106                 goto done;
2107         }
2108         case IP_SET_OP_GET_BYNAME: {
2109                 struct ip_set_req_get_set *req_get = data;
2110                 ip_set_id_t id;
2111 
2112                 if (*len != sizeof(struct ip_set_req_get_set)) {
2113                         ret = -EINVAL;
2114                         goto done;
2115                 }
2116                 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
2117                 nfnl_lock(NFNL_SUBSYS_IPSET);
2118                 find_set_and_id(inst, req_get->set.name, &id);
2119                 req_get->set.index = id;
2120                 nfnl_unlock(NFNL_SUBSYS_IPSET);
2121                 goto copy;
2122         }
2123         case IP_SET_OP_GET_FNAME: {
2124                 struct ip_set_req_get_set_family *req_get = data;
2125                 ip_set_id_t id;
2126 
2127                 if (*len != sizeof(struct ip_set_req_get_set_family)) {
2128                         ret = -EINVAL;
2129                         goto done;
2130                 }
2131                 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
2132                 nfnl_lock(NFNL_SUBSYS_IPSET);
2133                 find_set_and_id(inst, req_get->set.name, &id);
2134                 req_get->set.index = id;
2135                 if (id != IPSET_INVALID_ID)
2136                         req_get->family = ip_set(inst, id)->family;
2137                 nfnl_unlock(NFNL_SUBSYS_IPSET);
2138                 goto copy;
2139         }
2140         case IP_SET_OP_GET_BYINDEX: {
2141                 struct ip_set_req_get_set *req_get = data;
2142                 struct ip_set *set;
2143 
2144                 if (*len != sizeof(struct ip_set_req_get_set) ||
2145                     req_get->set.index >= inst->ip_set_max) {
2146                         ret = -EINVAL;
2147                         goto done;
2148                 }
2149                 nfnl_lock(NFNL_SUBSYS_IPSET);
2150                 set = ip_set(inst, req_get->set.index);
2151                 ret = strscpy(req_get->set.name, set ? set->name : "",
2152                               IPSET_MAXNAMELEN);
2153                 nfnl_unlock(NFNL_SUBSYS_IPSET);
2154                 if (ret < 0)
2155                         goto done;
2156                 goto copy;
2157         }
2158         default:
2159                 ret = -EBADMSG;
2160                 goto done;
2161         }       /* end of switch(op) */
2162 
2163 copy:
2164         if (copy_to_user(user, data, copylen))
2165                 ret = -EFAULT;
2166 
2167 done:
2168         vfree(data);
2169         if (ret > 0)
2170                 ret = 0;
2171         return ret;
2172 }
2173 
2174 static struct nf_sockopt_ops so_set __read_mostly = {
2175         .pf             = PF_INET,
2176         .get_optmin     = SO_IP_SET,
2177         .get_optmax     = SO_IP_SET + 1,
2178         .get            = ip_set_sockfn_get,
2179         .owner          = THIS_MODULE,
2180 };
2181 
2182 static int __net_init
2183 ip_set_net_init(struct net *net)
2184 {
2185         struct ip_set_net *inst = ip_set_pernet(net);
2186         struct ip_set **list;
2187 
2188         inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
2189         if (inst->ip_set_max >= IPSET_INVALID_ID)
2190                 inst->ip_set_max = IPSET_INVALID_ID - 1;
2191 
2192         list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
2193         if (!list)
2194                 return -ENOMEM;
2195         inst->is_deleted = false;
2196         inst->is_destroyed = false;
2197         rcu_assign_pointer(inst->ip_set_list, list);
2198         return 0;
2199 }
2200 
2201 static void __net_exit
2202 ip_set_net_exit(struct net *net)
2203 {
2204         struct ip_set_net *inst = ip_set_pernet(net);
2205 
2206         struct ip_set *set = NULL;
2207         ip_set_id_t i;
2208 
2209         inst->is_deleted = true; /* flag for ip_set_nfnl_put */
2210 
2211         nfnl_lock(NFNL_SUBSYS_IPSET);
2212         for (i = 0; i < inst->ip_set_max; i++) {
2213                 set = ip_set(inst, i);
2214                 if (set) {
2215                         ip_set(inst, i) = NULL;
2216                         ip_set_destroy_set(set);
2217                 }
2218         }
2219         nfnl_unlock(NFNL_SUBSYS_IPSET);
2220         kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
2221 }
2222 
2223 static struct pernet_operations ip_set_net_ops = {
2224         .init   = ip_set_net_init,
2225         .exit   = ip_set_net_exit,
2226         .id     = &ip_set_net_id,
2227         .size   = sizeof(struct ip_set_net),
2228 };
2229 
2230 static int __init
2231 ip_set_init(void)
2232 {
2233         int ret = register_pernet_subsys(&ip_set_net_ops);
2234 
2235         if (ret) {
2236                 pr_err("ip_set: cannot register pernet_subsys.\n");
2237                 return ret;
2238         }
2239 
2240         ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2241         if (ret != 0) {
2242                 pr_err("ip_set: cannot register with nfnetlink.\n");
2243                 unregister_pernet_subsys(&ip_set_net_ops);
2244                 return ret;
2245         }
2246 
2247         ret = nf_register_sockopt(&so_set);
2248         if (ret != 0) {
2249                 pr_err("SO_SET registry failed: %d\n", ret);
2250                 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2251                 unregister_pernet_subsys(&ip_set_net_ops);
2252                 return ret;
2253         }
2254 
2255         return 0;
2256 }
2257 
2258 static void __exit
2259 ip_set_fini(void)
2260 {
2261         nf_unregister_sockopt(&so_set);
2262         nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2263 
2264         unregister_pernet_subsys(&ip_set_net_ops);
2265         pr_debug("these are the famous last words\n");
2266 }
2267 
2268 module_init(ip_set_init);
2269 module_exit(ip_set_fini);
2270 
2271 MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));

/* [<][>][^][v][top][bottom][index][help] */