root/net/netfilter/ipvs/ip_vs_ctl.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_vs_get_debug_level
  2. __ip_vs_addr_is_local_v6
  3. update_defense_level
  4. defense_work_handler
  5. ip_vs_use_count_inc
  6. ip_vs_use_count_dec
  7. ip_vs_svc_hashkey
  8. ip_vs_svc_fwm_hashkey
  9. ip_vs_svc_hash
  10. ip_vs_svc_unhash
  11. __ip_vs_service_find
  12. __ip_vs_svc_fwm_find
  13. ip_vs_service_find
  14. __ip_vs_bind_svc
  15. ip_vs_service_free
  16. ip_vs_service_rcu_free
  17. __ip_vs_svc_put
  18. ip_vs_rs_hashkey
  19. ip_vs_rs_hash
  20. ip_vs_rs_unhash
  21. ip_vs_has_real_service
  22. ip_vs_find_real_service
  23. ip_vs_find_tunnel
  24. ip_vs_lookup_dest
  25. ip_vs_find_dest
  26. ip_vs_dest_dst_rcu_free
  27. __ip_vs_dst_cache_reset
  28. ip_vs_trash_get_dest
  29. ip_vs_dest_free
  30. ip_vs_trash_cleanup
  31. ip_vs_copy_stats
  32. ip_vs_export_stats_user
  33. ip_vs_zero_stats
  34. __ip_vs_update_dest
  35. ip_vs_new_dest
  36. ip_vs_add_dest
  37. ip_vs_edit_dest
  38. __ip_vs_del_dest
  39. __ip_vs_unlink_dest
  40. ip_vs_del_dest
  41. ip_vs_dest_trash_expire
  42. ip_vs_add_service
  43. ip_vs_edit_service
  44. __ip_vs_del_service
  45. ip_vs_unlink_service
  46. ip_vs_del_service
  47. ip_vs_flush
  48. ip_vs_service_net_cleanup
  49. ip_vs_forget_dev
  50. ip_vs_dst_event
  51. ip_vs_zero_service
  52. ip_vs_zero_all
  53. proc_do_defense_mode
  54. proc_do_sync_threshold
  55. proc_do_sync_ports
  56. ip_vs_fwd_name
  57. ip_vs_info_array
  58. ip_vs_info_seq_start
  59. ip_vs_info_seq_next
  60. ip_vs_info_seq_stop
  61. ip_vs_info_seq_show
  62. ip_vs_stats_show
  63. ip_vs_stats_percpu_show
  64. ip_vs_set_timeout
  65. ip_vs_copy_udest_compat
  66. do_ip_vs_set_ctl
  67. ip_vs_copy_service
  68. __ip_vs_get_service_entries
  69. __ip_vs_get_dest_entries
  70. __ip_vs_get_timeouts
  71. do_ip_vs_get_ctl
  72. ip_vs_genl_fill_stats
  73. ip_vs_genl_fill_stats64
  74. ip_vs_genl_fill_service
  75. ip_vs_genl_dump_service
  76. ip_vs_genl_dump_services
  77. ip_vs_is_af_valid
  78. ip_vs_genl_parse_service
  79. ip_vs_genl_find_service
  80. ip_vs_genl_fill_dest
  81. ip_vs_genl_dump_dest
  82. ip_vs_genl_dump_dests
  83. ip_vs_genl_parse_dest
  84. ip_vs_genl_fill_daemon
  85. ip_vs_genl_dump_daemon
  86. ip_vs_genl_dump_daemons
  87. ip_vs_genl_new_daemon
  88. ip_vs_genl_del_daemon
  89. ip_vs_genl_set_config
  90. ip_vs_genl_set_daemon
  91. ip_vs_genl_set_cmd
  92. ip_vs_genl_get_cmd
  93. ip_vs_genl_register
  94. ip_vs_genl_unregister
  95. ip_vs_control_net_init_sysctl
  96. ip_vs_control_net_cleanup_sysctl
  97. ip_vs_control_net_init_sysctl
  98. ip_vs_control_net_cleanup_sysctl
  99. ip_vs_control_net_init
  100. ip_vs_control_net_cleanup
  101. ip_vs_register_nl_ioctl
  102. ip_vs_unregister_nl_ioctl
  103. ip_vs_control_init
  104. ip_vs_control_cleanup

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * IPVS         An implementation of the IP virtual server support for the
   4  *              LINUX operating system.  IPVS is now implemented as a module
   5  *              over the NetFilter framework. IPVS can be used to build a
   6  *              high-performance and highly available server based on a
   7  *              cluster of servers.
   8  *
   9  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  10  *              Peter Kese <peter.kese@ijs.si>
  11  *              Julian Anastasov <ja@ssi.bg>
  12  *
  13  * Changes:
  14  */
  15 
  16 #define KMSG_COMPONENT "IPVS"
  17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18 
  19 #include <linux/module.h>
  20 #include <linux/init.h>
  21 #include <linux/types.h>
  22 #include <linux/capability.h>
  23 #include <linux/fs.h>
  24 #include <linux/sysctl.h>
  25 #include <linux/proc_fs.h>
  26 #include <linux/workqueue.h>
  27 #include <linux/swap.h>
  28 #include <linux/seq_file.h>
  29 #include <linux/slab.h>
  30 
  31 #include <linux/netfilter.h>
  32 #include <linux/netfilter_ipv4.h>
  33 #include <linux/mutex.h>
  34 
  35 #include <net/net_namespace.h>
  36 #include <linux/nsproxy.h>
  37 #include <net/ip.h>
  38 #ifdef CONFIG_IP_VS_IPV6
  39 #include <net/ipv6.h>
  40 #include <net/ip6_route.h>
  41 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
  42 #endif
  43 #include <net/route.h>
  44 #include <net/sock.h>
  45 #include <net/genetlink.h>
  46 
  47 #include <linux/uaccess.h>
  48 
  49 #include <net/ip_vs.h>
  50 
  51 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  52 static DEFINE_MUTEX(__ip_vs_mutex);
  53 
  54 /* sysctl variables */
  55 
  56 #ifdef CONFIG_IP_VS_DEBUG
  57 static int sysctl_ip_vs_debug_level = 0;
  58 
  59 int ip_vs_get_debug_level(void)
  60 {
  61         return sysctl_ip_vs_debug_level;
  62 }
  63 #endif
  64 
  65 
  66 /*  Protos */
  67 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
  68 
  69 
  70 #ifdef CONFIG_IP_VS_IPV6
  71 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
  72 static bool __ip_vs_addr_is_local_v6(struct net *net,
  73                                      const struct in6_addr *addr)
  74 {
  75         struct flowi6 fl6 = {
  76                 .daddr = *addr,
  77         };
  78         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
  79         bool is_local;
  80 
  81         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
  82 
  83         dst_release(dst);
  84         return is_local;
  85 }
  86 #endif
  87 
  88 #ifdef CONFIG_SYSCTL
  89 /*
  90  *      update_defense_level is called from keventd and from sysctl,
  91  *      so it needs to protect itself from softirqs
  92  */
  93 static void update_defense_level(struct netns_ipvs *ipvs)
  94 {
  95         struct sysinfo i;
  96         int availmem;
  97         int nomem;
  98         int to_change = -1;
  99 
 100         /* we only count free and buffered memory (in pages) */
 101         si_meminfo(&i);
 102         availmem = i.freeram + i.bufferram;
 103         /* however in linux 2.5 the i.bufferram is total page cache size,
 104            we need adjust it */
 105         /* si_swapinfo(&i); */
 106         /* availmem = availmem - (i.totalswap - i.freeswap); */
 107 
 108         nomem = (availmem < ipvs->sysctl_amemthresh);
 109 
 110         local_bh_disable();
 111 
 112         /* drop_entry */
 113         spin_lock(&ipvs->dropentry_lock);
 114         switch (ipvs->sysctl_drop_entry) {
 115         case 0:
 116                 atomic_set(&ipvs->dropentry, 0);
 117                 break;
 118         case 1:
 119                 if (nomem) {
 120                         atomic_set(&ipvs->dropentry, 1);
 121                         ipvs->sysctl_drop_entry = 2;
 122                 } else {
 123                         atomic_set(&ipvs->dropentry, 0);
 124                 }
 125                 break;
 126         case 2:
 127                 if (nomem) {
 128                         atomic_set(&ipvs->dropentry, 1);
 129                 } else {
 130                         atomic_set(&ipvs->dropentry, 0);
 131                         ipvs->sysctl_drop_entry = 1;
 132                 }
 133                 break;
 134         case 3:
 135                 atomic_set(&ipvs->dropentry, 1);
 136                 break;
 137         }
 138         spin_unlock(&ipvs->dropentry_lock);
 139 
 140         /* drop_packet */
 141         spin_lock(&ipvs->droppacket_lock);
 142         switch (ipvs->sysctl_drop_packet) {
 143         case 0:
 144                 ipvs->drop_rate = 0;
 145                 break;
 146         case 1:
 147                 if (nomem) {
 148                         ipvs->drop_rate = ipvs->drop_counter
 149                                 = ipvs->sysctl_amemthresh /
 150                                 (ipvs->sysctl_amemthresh-availmem);
 151                         ipvs->sysctl_drop_packet = 2;
 152                 } else {
 153                         ipvs->drop_rate = 0;
 154                 }
 155                 break;
 156         case 2:
 157                 if (nomem) {
 158                         ipvs->drop_rate = ipvs->drop_counter
 159                                 = ipvs->sysctl_amemthresh /
 160                                 (ipvs->sysctl_amemthresh-availmem);
 161                 } else {
 162                         ipvs->drop_rate = 0;
 163                         ipvs->sysctl_drop_packet = 1;
 164                 }
 165                 break;
 166         case 3:
 167                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
 168                 break;
 169         }
 170         spin_unlock(&ipvs->droppacket_lock);
 171 
 172         /* secure_tcp */
 173         spin_lock(&ipvs->securetcp_lock);
 174         switch (ipvs->sysctl_secure_tcp) {
 175         case 0:
 176                 if (ipvs->old_secure_tcp >= 2)
 177                         to_change = 0;
 178                 break;
 179         case 1:
 180                 if (nomem) {
 181                         if (ipvs->old_secure_tcp < 2)
 182                                 to_change = 1;
 183                         ipvs->sysctl_secure_tcp = 2;
 184                 } else {
 185                         if (ipvs->old_secure_tcp >= 2)
 186                                 to_change = 0;
 187                 }
 188                 break;
 189         case 2:
 190                 if (nomem) {
 191                         if (ipvs->old_secure_tcp < 2)
 192                                 to_change = 1;
 193                 } else {
 194                         if (ipvs->old_secure_tcp >= 2)
 195                                 to_change = 0;
 196                         ipvs->sysctl_secure_tcp = 1;
 197                 }
 198                 break;
 199         case 3:
 200                 if (ipvs->old_secure_tcp < 2)
 201                         to_change = 1;
 202                 break;
 203         }
 204         ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
 205         if (to_change >= 0)
 206                 ip_vs_protocol_timeout_change(ipvs,
 207                                               ipvs->sysctl_secure_tcp > 1);
 208         spin_unlock(&ipvs->securetcp_lock);
 209 
 210         local_bh_enable();
 211 }
 212 
 213 
 214 /*
 215  *      Timer for checking the defense
 216  */
 217 #define DEFENSE_TIMER_PERIOD    1*HZ
 218 
 219 static void defense_work_handler(struct work_struct *work)
 220 {
 221         struct netns_ipvs *ipvs =
 222                 container_of(work, struct netns_ipvs, defense_work.work);
 223 
 224         update_defense_level(ipvs);
 225         if (atomic_read(&ipvs->dropentry))
 226                 ip_vs_random_dropentry(ipvs);
 227         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 228 }
 229 #endif
 230 
 231 int
 232 ip_vs_use_count_inc(void)
 233 {
 234         return try_module_get(THIS_MODULE);
 235 }
 236 
 237 void
 238 ip_vs_use_count_dec(void)
 239 {
 240         module_put(THIS_MODULE);
 241 }
 242 
 243 
 244 /*
 245  *      Hash table: for virtual service lookups
 246  */
 247 #define IP_VS_SVC_TAB_BITS 8
 248 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 249 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 250 
 251 /* the service table hashed by <protocol, addr, port> */
 252 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 253 /* the service table hashed by fwmark */
 254 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 255 
 256 
 257 /*
 258  *      Returns hash value for virtual service
 259  */
 260 static inline unsigned int
 261 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
 262                   const union nf_inet_addr *addr, __be16 port)
 263 {
 264         unsigned int porth = ntohs(port);
 265         __be32 addr_fold = addr->ip;
 266         __u32 ahash;
 267 
 268 #ifdef CONFIG_IP_VS_IPV6
 269         if (af == AF_INET6)
 270                 addr_fold = addr->ip6[0]^addr->ip6[1]^
 271                             addr->ip6[2]^addr->ip6[3];
 272 #endif
 273         ahash = ntohl(addr_fold);
 274         ahash ^= ((size_t) ipvs >> 8);
 275 
 276         return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
 277                IP_VS_SVC_TAB_MASK;
 278 }
 279 
 280 /*
 281  *      Returns hash value of fwmark for virtual service lookup
 282  */
 283 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
 284 {
 285         return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 286 }
 287 
 288 /*
 289  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
 290  *      or in the ip_vs_svc_fwm_table by fwmark.
 291  *      Should be called with locked tables.
 292  */
 293 static int ip_vs_svc_hash(struct ip_vs_service *svc)
 294 {
 295         unsigned int hash;
 296 
 297         if (svc->flags & IP_VS_SVC_F_HASHED) {
 298                 pr_err("%s(): request for already hashed, called from %pS\n",
 299                        __func__, __builtin_return_address(0));
 300                 return 0;
 301         }
 302 
 303         if (svc->fwmark == 0) {
 304                 /*
 305                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 306                  */
 307                 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
 308                                          &svc->addr, svc->port);
 309                 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
 310         } else {
 311                 /*
 312                  *  Hash it by fwmark in svc_fwm_table
 313                  */
 314                 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
 315                 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 316         }
 317 
 318         svc->flags |= IP_VS_SVC_F_HASHED;
 319         /* increase its refcnt because it is referenced by the svc table */
 320         atomic_inc(&svc->refcnt);
 321         return 1;
 322 }
 323 
 324 
 325 /*
 326  *      Unhashes a service from svc_table / svc_fwm_table.
 327  *      Should be called with locked tables.
 328  */
 329 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 330 {
 331         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 332                 pr_err("%s(): request for unhash flagged, called from %pS\n",
 333                        __func__, __builtin_return_address(0));
 334                 return 0;
 335         }
 336 
 337         if (svc->fwmark == 0) {
 338                 /* Remove it from the svc_table table */
 339                 hlist_del_rcu(&svc->s_list);
 340         } else {
 341                 /* Remove it from the svc_fwm_table table */
 342                 hlist_del_rcu(&svc->f_list);
 343         }
 344 
 345         svc->flags &= ~IP_VS_SVC_F_HASHED;
 346         atomic_dec(&svc->refcnt);
 347         return 1;
 348 }
 349 
 350 
 351 /*
 352  *      Get service by {netns, proto,addr,port} in the service table.
 353  */
 354 static inline struct ip_vs_service *
 355 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
 356                      const union nf_inet_addr *vaddr, __be16 vport)
 357 {
 358         unsigned int hash;
 359         struct ip_vs_service *svc;
 360 
 361         /* Check for "full" addressed entries */
 362         hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
 363 
 364         hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
 365                 if ((svc->af == af)
 366                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
 367                     && (svc->port == vport)
 368                     && (svc->protocol == protocol)
 369                     && (svc->ipvs == ipvs)) {
 370                         /* HIT */
 371                         return svc;
 372                 }
 373         }
 374 
 375         return NULL;
 376 }
 377 
 378 
 379 /*
 380  *      Get service by {fwmark} in the service table.
 381  */
 382 static inline struct ip_vs_service *
 383 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
 384 {
 385         unsigned int hash;
 386         struct ip_vs_service *svc;
 387 
 388         /* Check for fwmark addressed entries */
 389         hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
 390 
 391         hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 392                 if (svc->fwmark == fwmark && svc->af == af
 393                     && (svc->ipvs == ipvs)) {
 394                         /* HIT */
 395                         return svc;
 396                 }
 397         }
 398 
 399         return NULL;
 400 }
 401 
 402 /* Find service, called under RCU lock */
 403 struct ip_vs_service *
 404 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
 405                    const union nf_inet_addr *vaddr, __be16 vport)
 406 {
 407         struct ip_vs_service *svc;
 408 
 409         /*
 410          *      Check the table hashed by fwmark first
 411          */
 412         if (fwmark) {
 413                 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
 414                 if (svc)
 415                         goto out;
 416         }
 417 
 418         /*
 419          *      Check the table hashed by <protocol,addr,port>
 420          *      for "full" addressed entries
 421          */
 422         svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
 423 
 424         if (!svc && protocol == IPPROTO_TCP &&
 425             atomic_read(&ipvs->ftpsvc_counter) &&
 426             (vport == FTPDATA || ntohs(vport) >= inet_prot_sock(ipvs->net))) {
 427                 /*
 428                  * Check if ftp service entry exists, the packet
 429                  * might belong to FTP data connections.
 430                  */
 431                 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
 432         }
 433 
 434         if (svc == NULL
 435             && atomic_read(&ipvs->nullsvc_counter)) {
 436                 /*
 437                  * Check if the catch-all port (port zero) exists
 438                  */
 439                 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
 440         }
 441 
 442   out:
 443         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
 444                       fwmark, ip_vs_proto_name(protocol),
 445                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
 446                       svc ? "hit" : "not hit");
 447 
 448         return svc;
 449 }
 450 
 451 
 452 static inline void
 453 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 454 {
 455         atomic_inc(&svc->refcnt);
 456         rcu_assign_pointer(dest->svc, svc);
 457 }
 458 
 459 static void ip_vs_service_free(struct ip_vs_service *svc)
 460 {
 461         free_percpu(svc->stats.cpustats);
 462         kfree(svc);
 463 }
 464 
 465 static void ip_vs_service_rcu_free(struct rcu_head *head)
 466 {
 467         struct ip_vs_service *svc;
 468 
 469         svc = container_of(head, struct ip_vs_service, rcu_head);
 470         ip_vs_service_free(svc);
 471 }
 472 
 473 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
 474 {
 475         if (atomic_dec_and_test(&svc->refcnt)) {
 476                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
 477                               svc->fwmark,
 478                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
 479                               ntohs(svc->port));
 480                 if (do_delay)
 481                         call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
 482                 else
 483                         ip_vs_service_free(svc);
 484         }
 485 }
 486 
 487 
 488 /*
 489  *      Returns hash value for real service
 490  */
 491 static inline unsigned int ip_vs_rs_hashkey(int af,
 492                                             const union nf_inet_addr *addr,
 493                                             __be16 port)
 494 {
 495         unsigned int porth = ntohs(port);
 496         __be32 addr_fold = addr->ip;
 497 
 498 #ifdef CONFIG_IP_VS_IPV6
 499         if (af == AF_INET6)
 500                 addr_fold = addr->ip6[0]^addr->ip6[1]^
 501                             addr->ip6[2]^addr->ip6[3];
 502 #endif
 503 
 504         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
 505                 & IP_VS_RTAB_MASK;
 506 }
 507 
 508 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
 509 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 510 {
 511         unsigned int hash;
 512         __be16 port;
 513 
 514         if (dest->in_rs_table)
 515                 return;
 516 
 517         switch (IP_VS_DFWD_METHOD(dest)) {
 518         case IP_VS_CONN_F_MASQ:
 519                 port = dest->port;
 520                 break;
 521         case IP_VS_CONN_F_TUNNEL:
 522                 switch (dest->tun_type) {
 523                 case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
 524                         port = dest->tun_port;
 525                         break;
 526                 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
 527                 case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
 528                         port = 0;
 529                         break;
 530                 default:
 531                         return;
 532                 }
 533                 break;
 534         default:
 535                 return;
 536         }
 537 
 538         /*
 539          *      Hash by proto,addr,port,
 540          *      which are the parameters of the real service.
 541          */
 542         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
 543 
 544         hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
 545         dest->in_rs_table = 1;
 546 }
 547 
 548 /* Unhash ip_vs_dest from rs_table. */
 549 static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
 550 {
 551         /*
 552          * Remove it from the rs_table table.
 553          */
 554         if (dest->in_rs_table) {
 555                 hlist_del_rcu(&dest->d_list);
 556                 dest->in_rs_table = 0;
 557         }
 558 }
 559 
 560 /* Check if real service by <proto,addr,port> is present */
 561 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 562                             const union nf_inet_addr *daddr, __be16 dport)
 563 {
 564         unsigned int hash;
 565         struct ip_vs_dest *dest;
 566 
 567         /* Check for "full" addressed entries */
 568         hash = ip_vs_rs_hashkey(af, daddr, dport);
 569 
 570         hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
 571                 if (dest->port == dport &&
 572                     dest->af == af &&
 573                     ip_vs_addr_equal(af, &dest->addr, daddr) &&
 574                     (dest->protocol == protocol || dest->vfwmark) &&
 575                     IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
 576                         /* HIT */
 577                         return true;
 578                 }
 579         }
 580 
 581         return false;
 582 }
 583 
 584 /* Find real service record by <proto,addr,port>.
 585  * In case of multiple records with the same <proto,addr,port>, only
 586  * the first found record is returned.
 587  *
 588  * To be called under RCU lock.
 589  */
 590 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
 591                                            __u16 protocol,
 592                                            const union nf_inet_addr *daddr,
 593                                            __be16 dport)
 594 {
 595         unsigned int hash;
 596         struct ip_vs_dest *dest;
 597 
 598         /* Check for "full" addressed entries */
 599         hash = ip_vs_rs_hashkey(af, daddr, dport);
 600 
 601         hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
 602                 if (dest->port == dport &&
 603                     dest->af == af &&
 604                     ip_vs_addr_equal(af, &dest->addr, daddr) &&
 605                     (dest->protocol == protocol || dest->vfwmark) &&
 606                     IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
 607                         /* HIT */
 608                         return dest;
 609                 }
 610         }
 611 
 612         return NULL;
 613 }
 614 
 615 /* Find real service record by <af,addr,tun_port>.
 616  * In case of multiple records with the same <af,addr,tun_port>, only
 617  * the first found record is returned.
 618  *
 619  * To be called under RCU lock.
 620  */
 621 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
 622                                      const union nf_inet_addr *daddr,
 623                                      __be16 tun_port)
 624 {
 625         struct ip_vs_dest *dest;
 626         unsigned int hash;
 627 
 628         /* Check for "full" addressed entries */
 629         hash = ip_vs_rs_hashkey(af, daddr, tun_port);
 630 
 631         hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
 632                 if (dest->tun_port == tun_port &&
 633                     dest->af == af &&
 634                     ip_vs_addr_equal(af, &dest->addr, daddr) &&
 635                     IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
 636                         /* HIT */
 637                         return dest;
 638                 }
 639         }
 640 
 641         return NULL;
 642 }
 643 
 644 /* Lookup destination by {addr,port} in the given service
 645  * Called under RCU lock.
 646  */
 647 static struct ip_vs_dest *
 648 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
 649                   const union nf_inet_addr *daddr, __be16 dport)
 650 {
 651         struct ip_vs_dest *dest;
 652 
 653         /*
 654          * Find the destination for the given service
 655          */
 656         list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 657                 if ((dest->af == dest_af) &&
 658                     ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
 659                     (dest->port == dport)) {
 660                         /* HIT */
 661                         return dest;
 662                 }
 663         }
 664 
 665         return NULL;
 666 }
 667 
 668 /*
 669  * Find destination by {daddr,dport,vaddr,protocol}
 670  * Created to be used in ip_vs_process_message() in
 671  * the backup synchronization daemon. It finds the
 672  * destination to be bound to the received connection
 673  * on the backup.
 674  * Called under RCU lock, no refcnt is returned.
 675  */
 676 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
 677                                    const union nf_inet_addr *daddr,
 678                                    __be16 dport,
 679                                    const union nf_inet_addr *vaddr,
 680                                    __be16 vport, __u16 protocol, __u32 fwmark,
 681                                    __u32 flags)
 682 {
 683         struct ip_vs_dest *dest;
 684         struct ip_vs_service *svc;
 685         __be16 port = dport;
 686 
 687         svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
 688         if (!svc)
 689                 return NULL;
 690         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
 691                 port = 0;
 692         dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
 693         if (!dest)
 694                 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
 695         return dest;
 696 }
 697 
 698 void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
 699 {
 700         struct ip_vs_dest_dst *dest_dst = container_of(head,
 701                                                        struct ip_vs_dest_dst,
 702                                                        rcu_head);
 703 
 704         dst_release(dest_dst->dst_cache);
 705         kfree(dest_dst);
 706 }
 707 
 708 /* Release dest_dst and dst_cache for dest in user context */
 709 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
 710 {
 711         struct ip_vs_dest_dst *old;
 712 
 713         old = rcu_dereference_protected(dest->dest_dst, 1);
 714         if (old) {
 715                 RCU_INIT_POINTER(dest->dest_dst, NULL);
 716                 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
 717         }
 718 }
 719 
 720 /*
 721  *  Lookup dest by {svc,addr,port} in the destination trash.
 722  *  The destination trash is used to hold the destinations that are removed
 723  *  from the service table but are still referenced by some conn entries.
 724  *  The reason to add the destination trash is when the dest is temporary
 725  *  down (either by administrator or by monitor program), the dest can be
 726  *  picked back from the trash, the remaining connections to the dest can
 727  *  continue, and the counting information of the dest is also useful for
 728  *  scheduling.
 729  */
 730 static struct ip_vs_dest *
 731 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
 732                      const union nf_inet_addr *daddr, __be16 dport)
 733 {
 734         struct ip_vs_dest *dest;
 735         struct netns_ipvs *ipvs = svc->ipvs;
 736 
 737         /*
 738          * Find the destination in trash
 739          */
 740         spin_lock_bh(&ipvs->dest_trash_lock);
 741         list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
 742                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 743                               "dest->refcnt=%d\n",
 744                               dest->vfwmark,
 745                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
 746                               ntohs(dest->port),
 747                               refcount_read(&dest->refcnt));
 748                 if (dest->af == dest_af &&
 749                     ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
 750                     dest->port == dport &&
 751                     dest->vfwmark == svc->fwmark &&
 752                     dest->protocol == svc->protocol &&
 753                     (svc->fwmark ||
 754                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 755                       dest->vport == svc->port))) {
 756                         /* HIT */
 757                         list_del(&dest->t_list);
 758                         goto out;
 759                 }
 760         }
 761 
 762         dest = NULL;
 763 
 764 out:
 765         spin_unlock_bh(&ipvs->dest_trash_lock);
 766 
 767         return dest;
 768 }
 769 
 770 static void ip_vs_dest_free(struct ip_vs_dest *dest)
 771 {
 772         struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
 773 
 774         __ip_vs_dst_cache_reset(dest);
 775         __ip_vs_svc_put(svc, false);
 776         free_percpu(dest->stats.cpustats);
 777         ip_vs_dest_put_and_free(dest);
 778 }
 779 
 780 /*
 781  *  Clean up all the destinations in the trash
 782  *  Called by the ip_vs_control_cleanup()
 783  *
 784  *  When the ip_vs_control_clearup is activated by ipvs module exit,
 785  *  the service tables must have been flushed and all the connections
 786  *  are expired, and the refcnt of each destination in the trash must
 787  *  be 1, so we simply release them here.
 788  */
 789 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
 790 {
 791         struct ip_vs_dest *dest, *nxt;
 792 
 793         del_timer_sync(&ipvs->dest_trash_timer);
 794         /* No need to use dest_trash_lock */
 795         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
 796                 list_del(&dest->t_list);
 797                 ip_vs_dest_free(dest);
 798         }
 799 }
 800 
 801 static void
 802 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
 803 {
 804 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
 805 
 806         spin_lock_bh(&src->lock);
 807 
 808         IP_VS_SHOW_STATS_COUNTER(conns);
 809         IP_VS_SHOW_STATS_COUNTER(inpkts);
 810         IP_VS_SHOW_STATS_COUNTER(outpkts);
 811         IP_VS_SHOW_STATS_COUNTER(inbytes);
 812         IP_VS_SHOW_STATS_COUNTER(outbytes);
 813 
 814         ip_vs_read_estimator(dst, src);
 815 
 816         spin_unlock_bh(&src->lock);
 817 }
 818 
 819 static void
 820 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
 821 {
 822         dst->conns = (u32)src->conns;
 823         dst->inpkts = (u32)src->inpkts;
 824         dst->outpkts = (u32)src->outpkts;
 825         dst->inbytes = src->inbytes;
 826         dst->outbytes = src->outbytes;
 827         dst->cps = (u32)src->cps;
 828         dst->inpps = (u32)src->inpps;
 829         dst->outpps = (u32)src->outpps;
 830         dst->inbps = (u32)src->inbps;
 831         dst->outbps = (u32)src->outbps;
 832 }
 833 
 834 static void
 835 ip_vs_zero_stats(struct ip_vs_stats *stats)
 836 {
 837         spin_lock_bh(&stats->lock);
 838 
 839         /* get current counters as zero point, rates are zeroed */
 840 
 841 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
 842 
 843         IP_VS_ZERO_STATS_COUNTER(conns);
 844         IP_VS_ZERO_STATS_COUNTER(inpkts);
 845         IP_VS_ZERO_STATS_COUNTER(outpkts);
 846         IP_VS_ZERO_STATS_COUNTER(inbytes);
 847         IP_VS_ZERO_STATS_COUNTER(outbytes);
 848 
 849         ip_vs_zero_estimator(stats);
 850 
 851         spin_unlock_bh(&stats->lock);
 852 }
 853 
 854 /*
 855  *      Update a destination in the given service
 856  */
 857 static void
 858 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 859                     struct ip_vs_dest_user_kern *udest, int add)
 860 {
 861         struct netns_ipvs *ipvs = svc->ipvs;
 862         struct ip_vs_service *old_svc;
 863         struct ip_vs_scheduler *sched;
 864         int conn_flags;
 865 
 866         /* We cannot modify an address and change the address family */
 867         BUG_ON(!add && udest->af != dest->af);
 868 
 869         if (add && udest->af != svc->af)
 870                 ipvs->mixed_address_family_dests++;
 871 
 872         /* keep the last_weight with latest non-0 weight */
 873         if (add || udest->weight != 0)
 874                 atomic_set(&dest->last_weight, udest->weight);
 875 
 876         /* set the weight and the flags */
 877         atomic_set(&dest->weight, udest->weight);
 878         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 879         conn_flags |= IP_VS_CONN_F_INACTIVE;
 880 
 881         /* Need to rehash? */
 882         if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
 883             IP_VS_DFWD_METHOD(dest) ||
 884             udest->tun_type != dest->tun_type ||
 885             udest->tun_port != dest->tun_port)
 886                 ip_vs_rs_unhash(dest);
 887 
 888         /* set the tunnel info */
 889         dest->tun_type = udest->tun_type;
 890         dest->tun_port = udest->tun_port;
 891         dest->tun_flags = udest->tun_flags;
 892 
 893         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 894         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 895                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 896         } else {
 897                 /* FTP-NAT requires conntrack for mangling */
 898                 if (svc->port == FTPPORT)
 899                         ip_vs_register_conntrack(svc);
 900         }
 901         atomic_set(&dest->conn_flags, conn_flags);
 902         /* Put the real service in rs_table if not present. */
 903         ip_vs_rs_hash(ipvs, dest);
 904 
 905         /* bind the service */
 906         old_svc = rcu_dereference_protected(dest->svc, 1);
 907         if (!old_svc) {
 908                 __ip_vs_bind_svc(dest, svc);
 909         } else {
 910                 if (old_svc != svc) {
 911                         ip_vs_zero_stats(&dest->stats);
 912                         __ip_vs_bind_svc(dest, svc);
 913                         __ip_vs_svc_put(old_svc, true);
 914                 }
 915         }
 916 
 917         /* set the dest status flags */
 918         dest->flags |= IP_VS_DEST_F_AVAILABLE;
 919 
 920         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
 921                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 922         dest->u_threshold = udest->u_threshold;
 923         dest->l_threshold = udest->l_threshold;
 924 
 925         dest->af = udest->af;
 926 
 927         spin_lock_bh(&dest->dst_lock);
 928         __ip_vs_dst_cache_reset(dest);
 929         spin_unlock_bh(&dest->dst_lock);
 930 
 931         if (add) {
 932                 ip_vs_start_estimator(svc->ipvs, &dest->stats);
 933                 list_add_rcu(&dest->n_list, &svc->destinations);
 934                 svc->num_dests++;
 935                 sched = rcu_dereference_protected(svc->scheduler, 1);
 936                 if (sched && sched->add_dest)
 937                         sched->add_dest(svc, dest);
 938         } else {
 939                 sched = rcu_dereference_protected(svc->scheduler, 1);
 940                 if (sched && sched->upd_dest)
 941                         sched->upd_dest(svc, dest);
 942         }
 943 }
 944 
 945 
 946 /*
 947  *      Create a destination for the given service
 948  */
 949 static int
 950 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 951                struct ip_vs_dest **dest_p)
 952 {
 953         struct ip_vs_dest *dest;
 954         unsigned int atype, i;
 955 
 956         EnterFunction(2);
 957 
 958 #ifdef CONFIG_IP_VS_IPV6
 959         if (udest->af == AF_INET6) {
 960                 int ret;
 961 
 962                 atype = ipv6_addr_type(&udest->addr.in6);
 963                 if ((!(atype & IPV6_ADDR_UNICAST) ||
 964                         atype & IPV6_ADDR_LINKLOCAL) &&
 965                         !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
 966                         return -EINVAL;
 967 
 968                 ret = nf_defrag_ipv6_enable(svc->ipvs->net);
 969                 if (ret)
 970                         return ret;
 971         } else
 972 #endif
 973         {
 974                 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
 975                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 976                         return -EINVAL;
 977         }
 978 
 979         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
 980         if (dest == NULL)
 981                 return -ENOMEM;
 982 
 983         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
 984         if (!dest->stats.cpustats)
 985                 goto err_alloc;
 986 
 987         for_each_possible_cpu(i) {
 988                 struct ip_vs_cpu_stats *ip_vs_dest_stats;
 989                 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
 990                 u64_stats_init(&ip_vs_dest_stats->syncp);
 991         }
 992 
 993         dest->af = udest->af;
 994         dest->protocol = svc->protocol;
 995         dest->vaddr = svc->addr;
 996         dest->vport = svc->port;
 997         dest->vfwmark = svc->fwmark;
 998         ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
 999         dest->port = udest->port;
1000 
1001         atomic_set(&dest->activeconns, 0);
1002         atomic_set(&dest->inactconns, 0);
1003         atomic_set(&dest->persistconns, 0);
1004         refcount_set(&dest->refcnt, 1);
1005 
1006         INIT_HLIST_NODE(&dest->d_list);
1007         spin_lock_init(&dest->dst_lock);
1008         spin_lock_init(&dest->stats.lock);
1009         __ip_vs_update_dest(svc, dest, udest, 1);
1010 
1011         *dest_p = dest;
1012 
1013         LeaveFunction(2);
1014         return 0;
1015 
1016 err_alloc:
1017         kfree(dest);
1018         return -ENOMEM;
1019 }
1020 
1021 
1022 /*
1023  *      Add a destination into an existing service
1024  */
1025 static int
1026 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1027 {
1028         struct ip_vs_dest *dest;
1029         union nf_inet_addr daddr;
1030         __be16 dport = udest->port;
1031         int ret;
1032 
1033         EnterFunction(2);
1034 
1035         if (udest->weight < 0) {
1036                 pr_err("%s(): server weight less than zero\n", __func__);
1037                 return -ERANGE;
1038         }
1039 
1040         if (udest->l_threshold > udest->u_threshold) {
1041                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1042                         __func__);
1043                 return -ERANGE;
1044         }
1045 
1046         if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1047                 if (udest->tun_port == 0) {
1048                         pr_err("%s(): tunnel port is zero\n", __func__);
1049                         return -EINVAL;
1050                 }
1051         }
1052 
1053         ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1054 
1055         /* We use function that requires RCU lock */
1056         rcu_read_lock();
1057         dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1058         rcu_read_unlock();
1059 
1060         if (dest != NULL) {
1061                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1062                 return -EEXIST;
1063         }
1064 
1065         /*
1066          * Check if the dest already exists in the trash and
1067          * is from the same service
1068          */
1069         dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
1070 
1071         if (dest != NULL) {
1072                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
1073                               "dest->refcnt=%d, service %u/%s:%u\n",
1074                               IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
1075                               refcount_read(&dest->refcnt),
1076                               dest->vfwmark,
1077                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
1078                               ntohs(dest->vport));
1079 
1080                 __ip_vs_update_dest(svc, dest, udest, 1);
1081                 ret = 0;
1082         } else {
1083                 /*
1084                  * Allocate and initialize the dest structure
1085                  */
1086                 ret = ip_vs_new_dest(svc, udest, &dest);
1087         }
1088         LeaveFunction(2);
1089 
1090         return ret;
1091 }
1092 
1093 
1094 /*
1095  *      Edit a destination in the given service
1096  */
1097 static int
1098 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1099 {
1100         struct ip_vs_dest *dest;
1101         union nf_inet_addr daddr;
1102         __be16 dport = udest->port;
1103 
1104         EnterFunction(2);
1105 
1106         if (udest->weight < 0) {
1107                 pr_err("%s(): server weight less than zero\n", __func__);
1108                 return -ERANGE;
1109         }
1110 
1111         if (udest->l_threshold > udest->u_threshold) {
1112                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1113                         __func__);
1114                 return -ERANGE;
1115         }
1116 
1117         if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1118                 if (udest->tun_port == 0) {
1119                         pr_err("%s(): tunnel port is zero\n", __func__);
1120                         return -EINVAL;
1121                 }
1122         }
1123 
1124         ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1125 
1126         /* We use function that requires RCU lock */
1127         rcu_read_lock();
1128         dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1129         rcu_read_unlock();
1130 
1131         if (dest == NULL) {
1132                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1133                 return -ENOENT;
1134         }
1135 
1136         __ip_vs_update_dest(svc, dest, udest, 0);
1137         LeaveFunction(2);
1138 
1139         return 0;
1140 }
1141 
1142 /*
1143  *      Delete a destination (must be already unlinked from the service)
1144  */
1145 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1146                              bool cleanup)
1147 {
1148         ip_vs_stop_estimator(ipvs, &dest->stats);
1149 
1150         /*
1151          *  Remove it from the d-linked list with the real services.
1152          */
1153         ip_vs_rs_unhash(dest);
1154 
1155         spin_lock_bh(&ipvs->dest_trash_lock);
1156         IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1157                       IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1158                       refcount_read(&dest->refcnt));
1159         if (list_empty(&ipvs->dest_trash) && !cleanup)
1160                 mod_timer(&ipvs->dest_trash_timer,
1161                           jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1162         /* dest lives in trash with reference */
1163         list_add(&dest->t_list, &ipvs->dest_trash);
1164         dest->idle_start = 0;
1165         spin_unlock_bh(&ipvs->dest_trash_lock);
1166 }
1167 
1168 
1169 /*
1170  *      Unlink a destination from the given service
1171  */
1172 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1173                                 struct ip_vs_dest *dest,
1174                                 int svcupd)
1175 {
1176         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1177 
1178         /*
1179          *  Remove it from the d-linked destination list.
1180          */
1181         list_del_rcu(&dest->n_list);
1182         svc->num_dests--;
1183 
1184         if (dest->af != svc->af)
1185                 svc->ipvs->mixed_address_family_dests--;
1186 
1187         if (svcupd) {
1188                 struct ip_vs_scheduler *sched;
1189 
1190                 sched = rcu_dereference_protected(svc->scheduler, 1);
1191                 if (sched && sched->del_dest)
1192                         sched->del_dest(svc, dest);
1193         }
1194 }
1195 
1196 
1197 /*
1198  *      Delete a destination server in the given service
1199  */
1200 static int
1201 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1202 {
1203         struct ip_vs_dest *dest;
1204         __be16 dport = udest->port;
1205 
1206         EnterFunction(2);
1207 
1208         /* We use function that requires RCU lock */
1209         rcu_read_lock();
1210         dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1211         rcu_read_unlock();
1212 
1213         if (dest == NULL) {
1214                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1215                 return -ENOENT;
1216         }
1217 
1218         /*
1219          *      Unlink dest from the service
1220          */
1221         __ip_vs_unlink_dest(svc, dest, 1);
1222 
1223         /*
1224          *      Delete the destination
1225          */
1226         __ip_vs_del_dest(svc->ipvs, dest, false);
1227 
1228         LeaveFunction(2);
1229 
1230         return 0;
1231 }
1232 
1233 static void ip_vs_dest_trash_expire(struct timer_list *t)
1234 {
1235         struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
1236         struct ip_vs_dest *dest, *next;
1237         unsigned long now = jiffies;
1238 
1239         spin_lock(&ipvs->dest_trash_lock);
1240         list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1241                 if (refcount_read(&dest->refcnt) > 1)
1242                         continue;
1243                 if (dest->idle_start) {
1244                         if (time_before(now, dest->idle_start +
1245                                              IP_VS_DEST_TRASH_PERIOD))
1246                                 continue;
1247                 } else {
1248                         dest->idle_start = max(1UL, now);
1249                         continue;
1250                 }
1251                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1252                               dest->vfwmark,
1253                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1254                               ntohs(dest->port));
1255                 list_del(&dest->t_list);
1256                 ip_vs_dest_free(dest);
1257         }
1258         if (!list_empty(&ipvs->dest_trash))
1259                 mod_timer(&ipvs->dest_trash_timer,
1260                           jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1261         spin_unlock(&ipvs->dest_trash_lock);
1262 }
1263 
1264 /*
1265  *      Add a service into the service hash table
1266  */
1267 static int
1268 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1269                   struct ip_vs_service **svc_p)
1270 {
1271         int ret = 0, i;
1272         struct ip_vs_scheduler *sched = NULL;
1273         struct ip_vs_pe *pe = NULL;
1274         struct ip_vs_service *svc = NULL;
1275 
1276         /* increase the module use count */
1277         if (!ip_vs_use_count_inc())
1278                 return -ENOPROTOOPT;
1279 
1280         /* Lookup the scheduler by 'u->sched_name' */
1281         if (strcmp(u->sched_name, "none")) {
1282                 sched = ip_vs_scheduler_get(u->sched_name);
1283                 if (!sched) {
1284                         pr_info("Scheduler module ip_vs_%s not found\n",
1285                                 u->sched_name);
1286                         ret = -ENOENT;
1287                         goto out_err;
1288                 }
1289         }
1290 
1291         if (u->pe_name && *u->pe_name) {
1292                 pe = ip_vs_pe_getbyname(u->pe_name);
1293                 if (pe == NULL) {
1294                         pr_info("persistence engine module ip_vs_pe_%s "
1295                                 "not found\n", u->pe_name);
1296                         ret = -ENOENT;
1297                         goto out_err;
1298                 }
1299         }
1300 
1301 #ifdef CONFIG_IP_VS_IPV6
1302         if (u->af == AF_INET6) {
1303                 __u32 plen = (__force __u32) u->netmask;
1304 
1305                 if (plen < 1 || plen > 128) {
1306                         ret = -EINVAL;
1307                         goto out_err;
1308                 }
1309 
1310                 ret = nf_defrag_ipv6_enable(ipvs->net);
1311                 if (ret)
1312                         goto out_err;
1313         }
1314 #endif
1315 
1316         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1317         if (svc == NULL) {
1318                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1319                 ret = -ENOMEM;
1320                 goto out_err;
1321         }
1322         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1323         if (!svc->stats.cpustats) {
1324                 ret = -ENOMEM;
1325                 goto out_err;
1326         }
1327 
1328         for_each_possible_cpu(i) {
1329                 struct ip_vs_cpu_stats *ip_vs_stats;
1330                 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
1331                 u64_stats_init(&ip_vs_stats->syncp);
1332         }
1333 
1334 
1335         /* I'm the first user of the service */
1336         atomic_set(&svc->refcnt, 0);
1337 
1338         svc->af = u->af;
1339         svc->protocol = u->protocol;
1340         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1341         svc->port = u->port;
1342         svc->fwmark = u->fwmark;
1343         svc->flags = u->flags;
1344         svc->timeout = u->timeout * HZ;
1345         svc->netmask = u->netmask;
1346         svc->ipvs = ipvs;
1347 
1348         INIT_LIST_HEAD(&svc->destinations);
1349         spin_lock_init(&svc->sched_lock);
1350         spin_lock_init(&svc->stats.lock);
1351 
1352         /* Bind the scheduler */
1353         if (sched) {
1354                 ret = ip_vs_bind_scheduler(svc, sched);
1355                 if (ret)
1356                         goto out_err;
1357                 sched = NULL;
1358         }
1359 
1360         /* Bind the ct retriever */
1361         RCU_INIT_POINTER(svc->pe, pe);
1362         pe = NULL;
1363 
1364         /* Update the virtual service counters */
1365         if (svc->port == FTPPORT)
1366                 atomic_inc(&ipvs->ftpsvc_counter);
1367         else if (svc->port == 0)
1368                 atomic_inc(&ipvs->nullsvc_counter);
1369         if (svc->pe && svc->pe->conn_out)
1370                 atomic_inc(&ipvs->conn_out_counter);
1371 
1372         ip_vs_start_estimator(ipvs, &svc->stats);
1373 
1374         /* Count only IPv4 services for old get/setsockopt interface */
1375         if (svc->af == AF_INET)
1376                 ipvs->num_services++;
1377 
1378         /* Hash the service into the service table */
1379         ip_vs_svc_hash(svc);
1380 
1381         *svc_p = svc;
1382         /* Now there is a service - full throttle */
1383         ipvs->enable = 1;
1384         return 0;
1385 
1386 
1387  out_err:
1388         if (svc != NULL) {
1389                 ip_vs_unbind_scheduler(svc, sched);
1390                 ip_vs_service_free(svc);
1391         }
1392         ip_vs_scheduler_put(sched);
1393         ip_vs_pe_put(pe);
1394 
1395         /* decrease the module use count */
1396         ip_vs_use_count_dec();
1397 
1398         return ret;
1399 }
1400 
1401 
1402 /*
1403  *      Edit a service and bind it with a new scheduler
1404  */
1405 static int
1406 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1407 {
1408         struct ip_vs_scheduler *sched = NULL, *old_sched;
1409         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1410         int ret = 0;
1411         bool new_pe_conn_out, old_pe_conn_out;
1412 
1413         /*
1414          * Lookup the scheduler, by 'u->sched_name'
1415          */
1416         if (strcmp(u->sched_name, "none")) {
1417                 sched = ip_vs_scheduler_get(u->sched_name);
1418                 if (!sched) {
1419                         pr_info("Scheduler module ip_vs_%s not found\n",
1420                                 u->sched_name);
1421                         return -ENOENT;
1422                 }
1423         }
1424         old_sched = sched;
1425 
1426         if (u->pe_name && *u->pe_name) {
1427                 pe = ip_vs_pe_getbyname(u->pe_name);
1428                 if (pe == NULL) {
1429                         pr_info("persistence engine module ip_vs_pe_%s "
1430                                 "not found\n", u->pe_name);
1431                         ret = -ENOENT;
1432                         goto out;
1433                 }
1434                 old_pe = pe;
1435         }
1436 
1437 #ifdef CONFIG_IP_VS_IPV6
1438         if (u->af == AF_INET6) {
1439                 __u32 plen = (__force __u32) u->netmask;
1440 
1441                 if (plen < 1 || plen > 128) {
1442                         ret = -EINVAL;
1443                         goto out;
1444                 }
1445         }
1446 #endif
1447 
1448         old_sched = rcu_dereference_protected(svc->scheduler, 1);
1449         if (sched != old_sched) {
1450                 if (old_sched) {
1451                         ip_vs_unbind_scheduler(svc, old_sched);
1452                         RCU_INIT_POINTER(svc->scheduler, NULL);
1453                         /* Wait all svc->sched_data users */
1454                         synchronize_rcu();
1455                 }
1456                 /* Bind the new scheduler */
1457                 if (sched) {
1458                         ret = ip_vs_bind_scheduler(svc, sched);
1459                         if (ret) {
1460                                 ip_vs_scheduler_put(sched);
1461                                 goto out;
1462                         }
1463                 }
1464         }
1465 
1466         /*
1467          * Set the flags and timeout value
1468          */
1469         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1470         svc->timeout = u->timeout * HZ;
1471         svc->netmask = u->netmask;
1472 
1473         old_pe = rcu_dereference_protected(svc->pe, 1);
1474         if (pe != old_pe) {
1475                 rcu_assign_pointer(svc->pe, pe);
1476                 /* check for optional methods in new pe */
1477                 new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1478                 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1479                 if (new_pe_conn_out && !old_pe_conn_out)
1480                         atomic_inc(&svc->ipvs->conn_out_counter);
1481                 if (old_pe_conn_out && !new_pe_conn_out)
1482                         atomic_dec(&svc->ipvs->conn_out_counter);
1483         }
1484 
1485 out:
1486         ip_vs_scheduler_put(old_sched);
1487         ip_vs_pe_put(old_pe);
1488         return ret;
1489 }
1490 
1491 /*
1492  *      Delete a service from the service list
1493  *      - The service must be unlinked, unlocked and not referenced!
1494  *      - We are called under _bh lock
1495  */
1496 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1497 {
1498         struct ip_vs_dest *dest, *nxt;
1499         struct ip_vs_scheduler *old_sched;
1500         struct ip_vs_pe *old_pe;
1501         struct netns_ipvs *ipvs = svc->ipvs;
1502 
1503         /* Count only IPv4 services for old get/setsockopt interface */
1504         if (svc->af == AF_INET)
1505                 ipvs->num_services--;
1506 
1507         ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1508 
1509         /* Unbind scheduler */
1510         old_sched = rcu_dereference_protected(svc->scheduler, 1);
1511         ip_vs_unbind_scheduler(svc, old_sched);
1512         ip_vs_scheduler_put(old_sched);
1513 
1514         /* Unbind persistence engine, keep svc->pe */
1515         old_pe = rcu_dereference_protected(svc->pe, 1);
1516         if (old_pe && old_pe->conn_out)
1517                 atomic_dec(&ipvs->conn_out_counter);
1518         ip_vs_pe_put(old_pe);
1519 
1520         /*
1521          *    Unlink the whole destination list
1522          */
1523         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1524                 __ip_vs_unlink_dest(svc, dest, 0);
1525                 __ip_vs_del_dest(svc->ipvs, dest, cleanup);
1526         }
1527 
1528         /*
1529          *    Update the virtual service counters
1530          */
1531         if (svc->port == FTPPORT)
1532                 atomic_dec(&ipvs->ftpsvc_counter);
1533         else if (svc->port == 0)
1534                 atomic_dec(&ipvs->nullsvc_counter);
1535 
1536         /*
1537          *    Free the service if nobody refers to it
1538          */
1539         __ip_vs_svc_put(svc, true);
1540 
1541         /* decrease the module use count */
1542         ip_vs_use_count_dec();
1543 }
1544 
1545 /*
1546  * Unlink a service from list and try to delete it if its refcnt reached 0
1547  */
1548 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1549 {
1550         ip_vs_unregister_conntrack(svc);
1551         /* Hold svc to avoid double release from dest_trash */
1552         atomic_inc(&svc->refcnt);
1553         /*
1554          * Unhash it from the service table
1555          */
1556         ip_vs_svc_unhash(svc);
1557 
1558         __ip_vs_del_service(svc, cleanup);
1559 }
1560 
1561 /*
1562  *      Delete a service from the service list
1563  */
1564 static int ip_vs_del_service(struct ip_vs_service *svc)
1565 {
1566         if (svc == NULL)
1567                 return -EEXIST;
1568         ip_vs_unlink_service(svc, false);
1569 
1570         return 0;
1571 }
1572 
1573 
1574 /*
1575  *      Flush all the virtual services
1576  */
1577 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
1578 {
1579         int idx;
1580         struct ip_vs_service *svc;
1581         struct hlist_node *n;
1582 
1583         /*
1584          * Flush the service table hashed by <netns,protocol,addr,port>
1585          */
1586         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1587                 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1588                                           s_list) {
1589                         if (svc->ipvs == ipvs)
1590                                 ip_vs_unlink_service(svc, cleanup);
1591                 }
1592         }
1593 
1594         /*
1595          * Flush the service table hashed by fwmark
1596          */
1597         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1598                 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1599                                           f_list) {
1600                         if (svc->ipvs == ipvs)
1601                                 ip_vs_unlink_service(svc, cleanup);
1602                 }
1603         }
1604 
1605         return 0;
1606 }
1607 
1608 /*
1609  *      Delete service by {netns} in the service table.
1610  *      Called by __ip_vs_cleanup()
1611  */
1612 void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1613 {
1614         EnterFunction(2);
1615         /* Check for "full" addressed entries */
1616         mutex_lock(&__ip_vs_mutex);
1617         ip_vs_flush(ipvs, true);
1618         mutex_unlock(&__ip_vs_mutex);
1619         LeaveFunction(2);
1620 }
1621 
1622 /* Put all references for device (dst_cache) */
1623 static inline void
1624 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1625 {
1626         struct ip_vs_dest_dst *dest_dst;
1627 
1628         spin_lock_bh(&dest->dst_lock);
1629         dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1630         if (dest_dst && dest_dst->dst_cache->dev == dev) {
1631                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1632                               dev->name,
1633                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1634                               ntohs(dest->port),
1635                               refcount_read(&dest->refcnt));
1636                 __ip_vs_dst_cache_reset(dest);
1637         }
1638         spin_unlock_bh(&dest->dst_lock);
1639 
1640 }
1641 /* Netdev event receiver
1642  * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1643  */
1644 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1645                            void *ptr)
1646 {
1647         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1648         struct net *net = dev_net(dev);
1649         struct netns_ipvs *ipvs = net_ipvs(net);
1650         struct ip_vs_service *svc;
1651         struct ip_vs_dest *dest;
1652         unsigned int idx;
1653 
1654         if (event != NETDEV_DOWN || !ipvs)
1655                 return NOTIFY_DONE;
1656         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1657         EnterFunction(2);
1658         mutex_lock(&__ip_vs_mutex);
1659         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1660                 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1661                         if (svc->ipvs == ipvs) {
1662                                 list_for_each_entry(dest, &svc->destinations,
1663                                                     n_list) {
1664                                         ip_vs_forget_dev(dest, dev);
1665                                 }
1666                         }
1667                 }
1668 
1669                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1670                         if (svc->ipvs == ipvs) {
1671                                 list_for_each_entry(dest, &svc->destinations,
1672                                                     n_list) {
1673                                         ip_vs_forget_dev(dest, dev);
1674                                 }
1675                         }
1676 
1677                 }
1678         }
1679 
1680         spin_lock_bh(&ipvs->dest_trash_lock);
1681         list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1682                 ip_vs_forget_dev(dest, dev);
1683         }
1684         spin_unlock_bh(&ipvs->dest_trash_lock);
1685         mutex_unlock(&__ip_vs_mutex);
1686         LeaveFunction(2);
1687         return NOTIFY_DONE;
1688 }
1689 
1690 /*
1691  *      Zero counters in a service or all services
1692  */
1693 static int ip_vs_zero_service(struct ip_vs_service *svc)
1694 {
1695         struct ip_vs_dest *dest;
1696 
1697         list_for_each_entry(dest, &svc->destinations, n_list) {
1698                 ip_vs_zero_stats(&dest->stats);
1699         }
1700         ip_vs_zero_stats(&svc->stats);
1701         return 0;
1702 }
1703 
1704 static int ip_vs_zero_all(struct netns_ipvs *ipvs)
1705 {
1706         int idx;
1707         struct ip_vs_service *svc;
1708 
1709         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1710                 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1711                         if (svc->ipvs == ipvs)
1712                                 ip_vs_zero_service(svc);
1713                 }
1714         }
1715 
1716         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1717                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1718                         if (svc->ipvs == ipvs)
1719                                 ip_vs_zero_service(svc);
1720                 }
1721         }
1722 
1723         ip_vs_zero_stats(&ipvs->tot_stats);
1724         return 0;
1725 }
1726 
1727 #ifdef CONFIG_SYSCTL
1728 
1729 static int three = 3;
1730 
1731 static int
1732 proc_do_defense_mode(struct ctl_table *table, int write,
1733                      void __user *buffer, size_t *lenp, loff_t *ppos)
1734 {
1735         struct netns_ipvs *ipvs = table->extra2;
1736         int *valp = table->data;
1737         int val = *valp;
1738         int rc;
1739 
1740         struct ctl_table tmp = {
1741                 .data = &val,
1742                 .maxlen = sizeof(int),
1743                 .mode = table->mode,
1744         };
1745 
1746         rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
1747         if (write && (*valp != val)) {
1748                 if (val < 0 || val > 3) {
1749                         rc = -EINVAL;
1750                 } else {
1751                         *valp = val;
1752                         update_defense_level(ipvs);
1753                 }
1754         }
1755         return rc;
1756 }
1757 
1758 static int
1759 proc_do_sync_threshold(struct ctl_table *table, int write,
1760                        void __user *buffer, size_t *lenp, loff_t *ppos)
1761 {
1762         int *valp = table->data;
1763         int val[2];
1764         int rc;
1765         struct ctl_table tmp = {
1766                 .data = &val,
1767                 .maxlen = table->maxlen,
1768                 .mode = table->mode,
1769         };
1770 
1771         memcpy(val, valp, sizeof(val));
1772         rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
1773         if (write) {
1774                 if (val[0] < 0 || val[1] < 0 ||
1775                     (val[0] >= val[1] && val[1]))
1776                         rc = -EINVAL;
1777                 else
1778                         memcpy(valp, val, sizeof(val));
1779         }
1780         return rc;
1781 }
1782 
1783 static int
1784 proc_do_sync_ports(struct ctl_table *table, int write,
1785                    void __user *buffer, size_t *lenp, loff_t *ppos)
1786 {
1787         int *valp = table->data;
1788         int val = *valp;
1789         int rc;
1790 
1791         struct ctl_table tmp = {
1792                 .data = &val,
1793                 .maxlen = sizeof(int),
1794                 .mode = table->mode,
1795         };
1796 
1797         rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
1798         if (write && (*valp != val)) {
1799                 if (val < 1 || !is_power_of_2(val))
1800                         rc = -EINVAL;
1801                 else
1802                         *valp = val;
1803         }
1804         return rc;
1805 }
1806 
1807 /*
1808  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1809  *      Do not change order or insert new entries without
1810  *      align with netns init in ip_vs_control_net_init()
1811  */
1812 
1813 static struct ctl_table vs_vars[] = {
1814         {
1815                 .procname       = "amemthresh",
1816                 .maxlen         = sizeof(int),
1817                 .mode           = 0644,
1818                 .proc_handler   = proc_dointvec,
1819         },
1820         {
1821                 .procname       = "am_droprate",
1822                 .maxlen         = sizeof(int),
1823                 .mode           = 0644,
1824                 .proc_handler   = proc_dointvec,
1825         },
1826         {
1827                 .procname       = "drop_entry",
1828                 .maxlen         = sizeof(int),
1829                 .mode           = 0644,
1830                 .proc_handler   = proc_do_defense_mode,
1831         },
1832         {
1833                 .procname       = "drop_packet",
1834                 .maxlen         = sizeof(int),
1835                 .mode           = 0644,
1836                 .proc_handler   = proc_do_defense_mode,
1837         },
1838 #ifdef CONFIG_IP_VS_NFCT
1839         {
1840                 .procname       = "conntrack",
1841                 .maxlen         = sizeof(int),
1842                 .mode           = 0644,
1843                 .proc_handler   = &proc_dointvec,
1844         },
1845 #endif
1846         {
1847                 .procname       = "secure_tcp",
1848                 .maxlen         = sizeof(int),
1849                 .mode           = 0644,
1850                 .proc_handler   = proc_do_defense_mode,
1851         },
1852         {
1853                 .procname       = "snat_reroute",
1854                 .maxlen         = sizeof(int),
1855                 .mode           = 0644,
1856                 .proc_handler   = &proc_dointvec,
1857         },
1858         {
1859                 .procname       = "sync_version",
1860                 .maxlen         = sizeof(int),
1861                 .mode           = 0644,
1862                 .proc_handler   = proc_dointvec_minmax,
1863                 .extra1         = SYSCTL_ZERO,
1864                 .extra2         = SYSCTL_ONE,
1865         },
1866         {
1867                 .procname       = "sync_ports",
1868                 .maxlen         = sizeof(int),
1869                 .mode           = 0644,
1870                 .proc_handler   = proc_do_sync_ports,
1871         },
1872         {
1873                 .procname       = "sync_persist_mode",
1874                 .maxlen         = sizeof(int),
1875                 .mode           = 0644,
1876                 .proc_handler   = proc_dointvec,
1877         },
1878         {
1879                 .procname       = "sync_qlen_max",
1880                 .maxlen         = sizeof(unsigned long),
1881                 .mode           = 0644,
1882                 .proc_handler   = proc_doulongvec_minmax,
1883         },
1884         {
1885                 .procname       = "sync_sock_size",
1886                 .maxlen         = sizeof(int),
1887                 .mode           = 0644,
1888                 .proc_handler   = proc_dointvec,
1889         },
1890         {
1891                 .procname       = "cache_bypass",
1892                 .maxlen         = sizeof(int),
1893                 .mode           = 0644,
1894                 .proc_handler   = proc_dointvec,
1895         },
1896         {
1897                 .procname       = "expire_nodest_conn",
1898                 .maxlen         = sizeof(int),
1899                 .mode           = 0644,
1900                 .proc_handler   = proc_dointvec,
1901         },
1902         {
1903                 .procname       = "sloppy_tcp",
1904                 .maxlen         = sizeof(int),
1905                 .mode           = 0644,
1906                 .proc_handler   = proc_dointvec,
1907         },
1908         {
1909                 .procname       = "sloppy_sctp",
1910                 .maxlen         = sizeof(int),
1911                 .mode           = 0644,
1912                 .proc_handler   = proc_dointvec,
1913         },
1914         {
1915                 .procname       = "expire_quiescent_template",
1916                 .maxlen         = sizeof(int),
1917                 .mode           = 0644,
1918                 .proc_handler   = proc_dointvec,
1919         },
1920         {
1921                 .procname       = "sync_threshold",
1922                 .maxlen         =
1923                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1924                 .mode           = 0644,
1925                 .proc_handler   = proc_do_sync_threshold,
1926         },
1927         {
1928                 .procname       = "sync_refresh_period",
1929                 .maxlen         = sizeof(int),
1930                 .mode           = 0644,
1931                 .proc_handler   = proc_dointvec_jiffies,
1932         },
1933         {
1934                 .procname       = "sync_retries",
1935                 .maxlen         = sizeof(int),
1936                 .mode           = 0644,
1937                 .proc_handler   = proc_dointvec_minmax,
1938                 .extra1         = SYSCTL_ZERO,
1939                 .extra2         = &three,
1940         },
1941         {
1942                 .procname       = "nat_icmp_send",
1943                 .maxlen         = sizeof(int),
1944                 .mode           = 0644,
1945                 .proc_handler   = proc_dointvec,
1946         },
1947         {
1948                 .procname       = "pmtu_disc",
1949                 .maxlen         = sizeof(int),
1950                 .mode           = 0644,
1951                 .proc_handler   = proc_dointvec,
1952         },
1953         {
1954                 .procname       = "backup_only",
1955                 .maxlen         = sizeof(int),
1956                 .mode           = 0644,
1957                 .proc_handler   = proc_dointvec,
1958         },
1959         {
1960                 .procname       = "conn_reuse_mode",
1961                 .maxlen         = sizeof(int),
1962                 .mode           = 0644,
1963                 .proc_handler   = proc_dointvec,
1964         },
1965         {
1966                 .procname       = "schedule_icmp",
1967                 .maxlen         = sizeof(int),
1968                 .mode           = 0644,
1969                 .proc_handler   = proc_dointvec,
1970         },
1971         {
1972                 .procname       = "ignore_tunneled",
1973                 .maxlen         = sizeof(int),
1974                 .mode           = 0644,
1975                 .proc_handler   = proc_dointvec,
1976         },
1977 #ifdef CONFIG_IP_VS_DEBUG
1978         {
1979                 .procname       = "debug_level",
1980                 .data           = &sysctl_ip_vs_debug_level,
1981                 .maxlen         = sizeof(int),
1982                 .mode           = 0644,
1983                 .proc_handler   = proc_dointvec,
1984         },
1985 #endif
1986         { }
1987 };
1988 
1989 #endif
1990 
1991 #ifdef CONFIG_PROC_FS
1992 
1993 struct ip_vs_iter {
1994         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1995         struct hlist_head *table;
1996         int bucket;
1997 };
1998 
1999 /*
2000  *      Write the contents of the VS rule table to a PROCfs file.
2001  *      (It is kept just for backward compatibility)
2002  */
2003 static inline const char *ip_vs_fwd_name(unsigned int flags)
2004 {
2005         switch (flags & IP_VS_CONN_F_FWD_MASK) {
2006         case IP_VS_CONN_F_LOCALNODE:
2007                 return "Local";
2008         case IP_VS_CONN_F_TUNNEL:
2009                 return "Tunnel";
2010         case IP_VS_CONN_F_DROUTE:
2011                 return "Route";
2012         default:
2013                 return "Masq";
2014         }
2015 }
2016 
2017 
2018 /* Get the Nth entry in the two lists */
2019 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
2020 {
2021         struct net *net = seq_file_net(seq);
2022         struct netns_ipvs *ipvs = net_ipvs(net);
2023         struct ip_vs_iter *iter = seq->private;
2024         int idx;
2025         struct ip_vs_service *svc;
2026 
2027         /* look in hash by protocol */
2028         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2029                 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
2030                         if ((svc->ipvs == ipvs) && pos-- == 0) {
2031                                 iter->table = ip_vs_svc_table;
2032                                 iter->bucket = idx;
2033                                 return svc;
2034                         }
2035                 }
2036         }
2037 
2038         /* keep looking in fwmark */
2039         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2040                 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
2041                                          f_list) {
2042                         if ((svc->ipvs == ipvs) && pos-- == 0) {
2043                                 iter->table = ip_vs_svc_fwm_table;
2044                                 iter->bucket = idx;
2045                                 return svc;
2046                         }
2047                 }
2048         }
2049 
2050         return NULL;
2051 }
2052 
2053 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
2054         __acquires(RCU)
2055 {
2056         rcu_read_lock();
2057         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
2058 }
2059 
2060 
2061 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2062 {
2063         struct hlist_node *e;
2064         struct ip_vs_iter *iter;
2065         struct ip_vs_service *svc;
2066 
2067         ++*pos;
2068         if (v == SEQ_START_TOKEN)
2069                 return ip_vs_info_array(seq,0);
2070 
2071         svc = v;
2072         iter = seq->private;
2073 
2074         if (iter->table == ip_vs_svc_table) {
2075                 /* next service in table hashed by protocol */
2076                 e = rcu_dereference(hlist_next_rcu(&svc->s_list));
2077                 if (e)
2078                         return hlist_entry(e, struct ip_vs_service, s_list);
2079 
2080                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2081                         hlist_for_each_entry_rcu(svc,
2082                                                  &ip_vs_svc_table[iter->bucket],
2083                                                  s_list) {
2084                                 return svc;
2085                         }
2086                 }
2087 
2088                 iter->table = ip_vs_svc_fwm_table;
2089                 iter->bucket = -1;
2090                 goto scan_fwmark;
2091         }
2092 
2093         /* next service in hashed by fwmark */
2094         e = rcu_dereference(hlist_next_rcu(&svc->f_list));
2095         if (e)
2096                 return hlist_entry(e, struct ip_vs_service, f_list);
2097 
2098  scan_fwmark:
2099         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2100                 hlist_for_each_entry_rcu(svc,
2101                                          &ip_vs_svc_fwm_table[iter->bucket],
2102                                          f_list)
2103                         return svc;
2104         }
2105 
2106         return NULL;
2107 }
2108 
2109 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2110         __releases(RCU)
2111 {
2112         rcu_read_unlock();
2113 }
2114 
2115 
2116 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2117 {
2118         if (v == SEQ_START_TOKEN) {
2119                 seq_printf(seq,
2120                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2121                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2122                 seq_puts(seq,
2123                          "Prot LocalAddress:Port Scheduler Flags\n");
2124                 seq_puts(seq,
2125                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2126         } else {
2127                 struct net *net = seq_file_net(seq);
2128                 struct netns_ipvs *ipvs = net_ipvs(net);
2129                 const struct ip_vs_service *svc = v;
2130                 const struct ip_vs_iter *iter = seq->private;
2131                 const struct ip_vs_dest *dest;
2132                 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2133                 char *sched_name = sched ? sched->name : "none";
2134 
2135                 if (svc->ipvs != ipvs)
2136                         return 0;
2137                 if (iter->table == ip_vs_svc_table) {
2138 #ifdef CONFIG_IP_VS_IPV6
2139                         if (svc->af == AF_INET6)
2140                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2141                                            ip_vs_proto_name(svc->protocol),
2142                                            &svc->addr.in6,
2143                                            ntohs(svc->port),
2144                                            sched_name);
2145                         else
2146 #endif
2147                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2148                                            ip_vs_proto_name(svc->protocol),
2149                                            ntohl(svc->addr.ip),
2150                                            ntohs(svc->port),
2151                                            sched_name,
2152                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2153                 } else {
2154                         seq_printf(seq, "FWM  %08X %s %s",
2155                                    svc->fwmark, sched_name,
2156                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2157                 }
2158 
2159                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2160                         seq_printf(seq, "persistent %d %08X\n",
2161                                 svc->timeout,
2162                                 ntohl(svc->netmask));
2163                 else
2164                         seq_putc(seq, '\n');
2165 
2166                 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2167 #ifdef CONFIG_IP_VS_IPV6
2168                         if (dest->af == AF_INET6)
2169                                 seq_printf(seq,
2170                                            "  -> [%pI6]:%04X"
2171                                            "      %-7s %-6d %-10d %-10d\n",
2172                                            &dest->addr.in6,
2173                                            ntohs(dest->port),
2174                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2175                                            atomic_read(&dest->weight),
2176                                            atomic_read(&dest->activeconns),
2177                                            atomic_read(&dest->inactconns));
2178                         else
2179 #endif
2180                                 seq_printf(seq,
2181                                            "  -> %08X:%04X      "
2182                                            "%-7s %-6d %-10d %-10d\n",
2183                                            ntohl(dest->addr.ip),
2184                                            ntohs(dest->port),
2185                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2186                                            atomic_read(&dest->weight),
2187                                            atomic_read(&dest->activeconns),
2188                                            atomic_read(&dest->inactconns));
2189 
2190                 }
2191         }
2192         return 0;
2193 }
2194 
2195 static const struct seq_operations ip_vs_info_seq_ops = {
2196         .start = ip_vs_info_seq_start,
2197         .next  = ip_vs_info_seq_next,
2198         .stop  = ip_vs_info_seq_stop,
2199         .show  = ip_vs_info_seq_show,
2200 };
2201 
2202 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2203 {
2204         struct net *net = seq_file_single_net(seq);
2205         struct ip_vs_kstats show;
2206 
2207 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2208         seq_puts(seq,
2209                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2210         seq_puts(seq,
2211                  "   Conns  Packets  Packets            Bytes            Bytes\n");
2212 
2213         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2214         seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
2215                    (unsigned long long)show.conns,
2216                    (unsigned long long)show.inpkts,
2217                    (unsigned long long)show.outpkts,
2218                    (unsigned long long)show.inbytes,
2219                    (unsigned long long)show.outbytes);
2220 
2221 /*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
2222         seq_puts(seq,
2223                  " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2224         seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
2225                    (unsigned long long)show.cps,
2226                    (unsigned long long)show.inpps,
2227                    (unsigned long long)show.outpps,
2228                    (unsigned long long)show.inbps,
2229                    (unsigned long long)show.outbps);
2230 
2231         return 0;
2232 }
2233 
2234 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2235 {
2236         struct net *net = seq_file_single_net(seq);
2237         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2238         struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2239         struct ip_vs_kstats kstats;
2240         int i;
2241 
2242 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2243         seq_puts(seq,
2244                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2245         seq_puts(seq,
2246                  "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2247 
2248         for_each_possible_cpu(i) {
2249                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2250                 unsigned int start;
2251                 u64 conns, inpkts, outpkts, inbytes, outbytes;
2252 
2253                 do {
2254                         start = u64_stats_fetch_begin_irq(&u->syncp);
2255                         conns = u->cnt.conns;
2256                         inpkts = u->cnt.inpkts;
2257                         outpkts = u->cnt.outpkts;
2258                         inbytes = u->cnt.inbytes;
2259                         outbytes = u->cnt.outbytes;
2260                 } while (u64_stats_fetch_retry_irq(&u->syncp, start));
2261 
2262                 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
2263                            i, (u64)conns, (u64)inpkts,
2264                            (u64)outpkts, (u64)inbytes,
2265                            (u64)outbytes);
2266         }
2267 
2268         ip_vs_copy_stats(&kstats, tot_stats);
2269 
2270         seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
2271                    (unsigned long long)kstats.conns,
2272                    (unsigned long long)kstats.inpkts,
2273                    (unsigned long long)kstats.outpkts,
2274                    (unsigned long long)kstats.inbytes,
2275                    (unsigned long long)kstats.outbytes);
2276 
2277 /*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2278         seq_puts(seq,
2279                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2280         seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
2281                    kstats.cps,
2282                    kstats.inpps,
2283                    kstats.outpps,
2284                    kstats.inbps,
2285                    kstats.outbps);
2286 
2287         return 0;
2288 }
2289 #endif
2290 
2291 /*
2292  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2293  */
2294 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2295 {
2296 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2297         struct ip_vs_proto_data *pd;
2298 #endif
2299 
2300         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2301                   u->tcp_timeout,
2302                   u->tcp_fin_timeout,
2303                   u->udp_timeout);
2304 
2305 #ifdef CONFIG_IP_VS_PROTO_TCP
2306         if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
2307             u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
2308                 return -EINVAL;
2309         }
2310 #endif
2311 
2312 #ifdef CONFIG_IP_VS_PROTO_UDP
2313         if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
2314                 return -EINVAL;
2315 #endif
2316 
2317 #ifdef CONFIG_IP_VS_PROTO_TCP
2318         if (u->tcp_timeout) {
2319                 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2320                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2321                         = u->tcp_timeout * HZ;
2322         }
2323 
2324         if (u->tcp_fin_timeout) {
2325                 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2326                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2327                         = u->tcp_fin_timeout * HZ;
2328         }
2329 #endif
2330 
2331 #ifdef CONFIG_IP_VS_PROTO_UDP
2332         if (u->udp_timeout) {
2333                 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2334                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2335                         = u->udp_timeout * HZ;
2336         }
2337 #endif
2338         return 0;
2339 }
2340 
2341 #define CMDID(cmd)              (cmd - IP_VS_BASE_CTL)
2342 
2343 struct ip_vs_svcdest_user {
2344         struct ip_vs_service_user       s;
2345         struct ip_vs_dest_user          d;
2346 };
2347 
2348 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
2349         [CMDID(IP_VS_SO_SET_ADD)]         = sizeof(struct ip_vs_service_user),
2350         [CMDID(IP_VS_SO_SET_EDIT)]        = sizeof(struct ip_vs_service_user),
2351         [CMDID(IP_VS_SO_SET_DEL)]         = sizeof(struct ip_vs_service_user),
2352         [CMDID(IP_VS_SO_SET_ADDDEST)]     = sizeof(struct ip_vs_svcdest_user),
2353         [CMDID(IP_VS_SO_SET_DELDEST)]     = sizeof(struct ip_vs_svcdest_user),
2354         [CMDID(IP_VS_SO_SET_EDITDEST)]    = sizeof(struct ip_vs_svcdest_user),
2355         [CMDID(IP_VS_SO_SET_TIMEOUT)]     = sizeof(struct ip_vs_timeout_user),
2356         [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
2357         [CMDID(IP_VS_SO_SET_STOPDAEMON)]  = sizeof(struct ip_vs_daemon_user),
2358         [CMDID(IP_VS_SO_SET_ZERO)]        = sizeof(struct ip_vs_service_user),
2359 };
2360 
2361 union ip_vs_set_arglen {
2362         struct ip_vs_service_user       field_IP_VS_SO_SET_ADD;
2363         struct ip_vs_service_user       field_IP_VS_SO_SET_EDIT;
2364         struct ip_vs_service_user       field_IP_VS_SO_SET_DEL;
2365         struct ip_vs_svcdest_user       field_IP_VS_SO_SET_ADDDEST;
2366         struct ip_vs_svcdest_user       field_IP_VS_SO_SET_DELDEST;
2367         struct ip_vs_svcdest_user       field_IP_VS_SO_SET_EDITDEST;
2368         struct ip_vs_timeout_user       field_IP_VS_SO_SET_TIMEOUT;
2369         struct ip_vs_daemon_user        field_IP_VS_SO_SET_STARTDAEMON;
2370         struct ip_vs_daemon_user        field_IP_VS_SO_SET_STOPDAEMON;
2371         struct ip_vs_service_user       field_IP_VS_SO_SET_ZERO;
2372 };
2373 
2374 #define MAX_SET_ARGLEN  sizeof(union ip_vs_set_arglen)
2375 
2376 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2377                                   struct ip_vs_service_user *usvc_compat)
2378 {
2379         memset(usvc, 0, sizeof(*usvc));
2380 
2381         usvc->af                = AF_INET;
2382         usvc->protocol          = usvc_compat->protocol;
2383         usvc->addr.ip           = usvc_compat->addr;
2384         usvc->port              = usvc_compat->port;
2385         usvc->fwmark            = usvc_compat->fwmark;
2386 
2387         /* Deep copy of sched_name is not needed here */
2388         usvc->sched_name        = usvc_compat->sched_name;
2389 
2390         usvc->flags             = usvc_compat->flags;
2391         usvc->timeout           = usvc_compat->timeout;
2392         usvc->netmask           = usvc_compat->netmask;
2393 }
2394 
2395 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2396                                    struct ip_vs_dest_user *udest_compat)
2397 {
2398         memset(udest, 0, sizeof(*udest));
2399 
2400         udest->addr.ip          = udest_compat->addr;
2401         udest->port             = udest_compat->port;
2402         udest->conn_flags       = udest_compat->conn_flags;
2403         udest->weight           = udest_compat->weight;
2404         udest->u_threshold      = udest_compat->u_threshold;
2405         udest->l_threshold      = udest_compat->l_threshold;
2406         udest->af               = AF_INET;
2407         udest->tun_type         = IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
2408 }
2409 
2410 static int
2411 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2412 {
2413         struct net *net = sock_net(sk);
2414         int ret;
2415         unsigned char arg[MAX_SET_ARGLEN];
2416         struct ip_vs_service_user *usvc_compat;
2417         struct ip_vs_service_user_kern usvc;
2418         struct ip_vs_service *svc;
2419         struct ip_vs_dest_user *udest_compat;
2420         struct ip_vs_dest_user_kern udest;
2421         struct netns_ipvs *ipvs = net_ipvs(net);
2422 
2423         BUILD_BUG_ON(sizeof(arg) > 255);
2424         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2425                 return -EPERM;
2426 
2427         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2428                 return -EINVAL;
2429         if (len != set_arglen[CMDID(cmd)]) {
2430                 IP_VS_DBG(1, "set_ctl: len %u != %u\n",
2431                           len, set_arglen[CMDID(cmd)]);
2432                 return -EINVAL;
2433         }
2434 
2435         if (copy_from_user(arg, user, len) != 0)
2436                 return -EFAULT;
2437 
2438         /* Handle daemons since they have another lock */
2439         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2440             cmd == IP_VS_SO_SET_STOPDAEMON) {
2441                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2442 
2443                 if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2444                         struct ipvs_sync_daemon_cfg cfg;
2445 
2446                         memset(&cfg, 0, sizeof(cfg));
2447                         ret = -EINVAL;
2448                         if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
2449                                     sizeof(cfg.mcast_ifn)) <= 0)
2450                                 return ret;
2451                         cfg.syncid = dm->syncid;
2452                         ret = start_sync_thread(ipvs, &cfg, dm->state);
2453                 } else {
2454                         ret = stop_sync_thread(ipvs, dm->state);
2455                 }
2456                 return ret;
2457         }
2458 
2459         mutex_lock(&__ip_vs_mutex);
2460         if (cmd == IP_VS_SO_SET_FLUSH) {
2461                 /* Flush the virtual service */
2462                 ret = ip_vs_flush(ipvs, false);
2463                 goto out_unlock;
2464         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2465                 /* Set timeout values for (tcp tcpfin udp) */
2466                 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
2467                 goto out_unlock;
2468         }
2469 
2470         usvc_compat = (struct ip_vs_service_user *)arg;
2471         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2472 
2473         /* We only use the new structs internally, so copy userspace compat
2474          * structs to extended internal versions */
2475         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2476         ip_vs_copy_udest_compat(&udest, udest_compat);
2477 
2478         if (cmd == IP_VS_SO_SET_ZERO) {
2479                 /* if no service address is set, zero counters in all */
2480                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2481                         ret = ip_vs_zero_all(ipvs);
2482                         goto out_unlock;
2483                 }
2484         }
2485 
2486         if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
2487             strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
2488             IP_VS_SCHEDNAME_MAXLEN) {
2489                 ret = -EINVAL;
2490                 goto out_unlock;
2491         }
2492 
2493         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2494         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2495             usvc.protocol != IPPROTO_SCTP) {
2496                 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
2497                        usvc.protocol, &usvc.addr.ip,
2498                        ntohs(usvc.port));
2499                 ret = -EFAULT;
2500                 goto out_unlock;
2501         }
2502 
2503         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2504         rcu_read_lock();
2505         if (usvc.fwmark == 0)
2506                 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
2507                                            &usvc.addr, usvc.port);
2508         else
2509                 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
2510         rcu_read_unlock();
2511 
2512         if (cmd != IP_VS_SO_SET_ADD
2513             && (svc == NULL || svc->protocol != usvc.protocol)) {
2514                 ret = -ESRCH;
2515                 goto out_unlock;
2516         }
2517 
2518         switch (cmd) {
2519         case IP_VS_SO_SET_ADD:
2520                 if (svc != NULL)
2521                         ret = -EEXIST;
2522                 else
2523                         ret = ip_vs_add_service(ipvs, &usvc, &svc);
2524                 break;
2525         case IP_VS_SO_SET_EDIT:
2526                 ret = ip_vs_edit_service(svc, &usvc);
2527                 break;
2528         case IP_VS_SO_SET_DEL:
2529                 ret = ip_vs_del_service(svc);
2530                 if (!ret)
2531                         goto out_unlock;
2532                 break;
2533         case IP_VS_SO_SET_ZERO:
2534                 ret = ip_vs_zero_service(svc);
2535                 break;
2536         case IP_VS_SO_SET_ADDDEST:
2537                 ret = ip_vs_add_dest(svc, &udest);
2538                 break;
2539         case IP_VS_SO_SET_EDITDEST:
2540                 ret = ip_vs_edit_dest(svc, &udest);
2541                 break;
2542         case IP_VS_SO_SET_DELDEST:
2543                 ret = ip_vs_del_dest(svc, &udest);
2544                 break;
2545         default:
2546                 ret = -EINVAL;
2547         }
2548 
2549   out_unlock:
2550         mutex_unlock(&__ip_vs_mutex);
2551         return ret;
2552 }
2553 
2554 
2555 static void
2556 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2557 {
2558         struct ip_vs_scheduler *sched;
2559         struct ip_vs_kstats kstats;
2560         char *sched_name;
2561 
2562         sched = rcu_dereference_protected(src->scheduler, 1);
2563         sched_name = sched ? sched->name : "none";
2564         dst->protocol = src->protocol;
2565         dst->addr = src->addr.ip;
2566         dst->port = src->port;
2567         dst->fwmark = src->fwmark;
2568         strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2569         dst->flags = src->flags;
2570         dst->timeout = src->timeout / HZ;
2571         dst->netmask = src->netmask;
2572         dst->num_dests = src->num_dests;
2573         ip_vs_copy_stats(&kstats, &src->stats);
2574         ip_vs_export_stats_user(&dst->stats, &kstats);
2575 }
2576 
2577 static inline int
2578 __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
2579                             const struct ip_vs_get_services *get,
2580                             struct ip_vs_get_services __user *uptr)
2581 {
2582         int idx, count=0;
2583         struct ip_vs_service *svc;
2584         struct ip_vs_service_entry entry;
2585         int ret = 0;
2586 
2587         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2588                 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2589                         /* Only expose IPv4 entries to old interface */
2590                         if (svc->af != AF_INET || (svc->ipvs != ipvs))
2591                                 continue;
2592 
2593                         if (count >= get->num_services)
2594                                 goto out;
2595                         memset(&entry, 0, sizeof(entry));
2596                         ip_vs_copy_service(&entry, svc);
2597                         if (copy_to_user(&uptr->entrytable[count],
2598                                          &entry, sizeof(entry))) {
2599                                 ret = -EFAULT;
2600                                 goto out;
2601                         }
2602                         count++;
2603                 }
2604         }
2605 
2606         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2607                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2608                         /* Only expose IPv4 entries to old interface */
2609                         if (svc->af != AF_INET || (svc->ipvs != ipvs))
2610                                 continue;
2611 
2612                         if (count >= get->num_services)
2613                                 goto out;
2614                         memset(&entry, 0, sizeof(entry));
2615                         ip_vs_copy_service(&entry, svc);
2616                         if (copy_to_user(&uptr->entrytable[count],
2617                                          &entry, sizeof(entry))) {
2618                                 ret = -EFAULT;
2619                                 goto out;
2620                         }
2621                         count++;
2622                 }
2623         }
2624 out:
2625         return ret;
2626 }
2627 
2628 static inline int
2629 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
2630                          struct ip_vs_get_dests __user *uptr)
2631 {
2632         struct ip_vs_service *svc;
2633         union nf_inet_addr addr = { .ip = get->addr };
2634         int ret = 0;
2635 
2636         rcu_read_lock();
2637         if (get->fwmark)
2638                 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
2639         else
2640                 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
2641                                            get->port);
2642         rcu_read_unlock();
2643 
2644         if (svc) {
2645                 int count = 0;
2646                 struct ip_vs_dest *dest;
2647                 struct ip_vs_dest_entry entry;
2648                 struct ip_vs_kstats kstats;
2649 
2650                 memset(&entry, 0, sizeof(entry));
2651                 list_for_each_entry(dest, &svc->destinations, n_list) {
2652                         if (count >= get->num_dests)
2653                                 break;
2654 
2655                         /* Cannot expose heterogeneous members via sockopt
2656                          * interface
2657                          */
2658                         if (dest->af != svc->af)
2659                                 continue;
2660 
2661                         entry.addr = dest->addr.ip;
2662                         entry.port = dest->port;
2663                         entry.conn_flags = atomic_read(&dest->conn_flags);
2664                         entry.weight = atomic_read(&dest->weight);
2665                         entry.u_threshold = dest->u_threshold;
2666                         entry.l_threshold = dest->l_threshold;
2667                         entry.activeconns = atomic_read(&dest->activeconns);
2668                         entry.inactconns = atomic_read(&dest->inactconns);
2669                         entry.persistconns = atomic_read(&dest->persistconns);
2670                         ip_vs_copy_stats(&kstats, &dest->stats);
2671                         ip_vs_export_stats_user(&entry.stats, &kstats);
2672                         if (copy_to_user(&uptr->entrytable[count],
2673                                          &entry, sizeof(entry))) {
2674                                 ret = -EFAULT;
2675                                 break;
2676                         }
2677                         count++;
2678                 }
2679         } else
2680                 ret = -ESRCH;
2681         return ret;
2682 }
2683 
2684 static inline void
2685 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2686 {
2687 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2688         struct ip_vs_proto_data *pd;
2689 #endif
2690 
2691         memset(u, 0, sizeof (*u));
2692 
2693 #ifdef CONFIG_IP_VS_PROTO_TCP
2694         pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2695         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2696         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2697 #endif
2698 #ifdef CONFIG_IP_VS_PROTO_UDP
2699         pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2700         u->udp_timeout =
2701                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2702 #endif
2703 }
2704 
2705 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
2706         [CMDID(IP_VS_SO_GET_VERSION)]  = 64,
2707         [CMDID(IP_VS_SO_GET_INFO)]     = sizeof(struct ip_vs_getinfo),
2708         [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
2709         [CMDID(IP_VS_SO_GET_SERVICE)]  = sizeof(struct ip_vs_service_entry),
2710         [CMDID(IP_VS_SO_GET_DESTS)]    = sizeof(struct ip_vs_get_dests),
2711         [CMDID(IP_VS_SO_GET_TIMEOUT)]  = sizeof(struct ip_vs_timeout_user),
2712         [CMDID(IP_VS_SO_GET_DAEMON)]   = 2 * sizeof(struct ip_vs_daemon_user),
2713 };
2714 
2715 union ip_vs_get_arglen {
2716         char                            field_IP_VS_SO_GET_VERSION[64];
2717         struct ip_vs_getinfo            field_IP_VS_SO_GET_INFO;
2718         struct ip_vs_get_services       field_IP_VS_SO_GET_SERVICES;
2719         struct ip_vs_service_entry      field_IP_VS_SO_GET_SERVICE;
2720         struct ip_vs_get_dests          field_IP_VS_SO_GET_DESTS;
2721         struct ip_vs_timeout_user       field_IP_VS_SO_GET_TIMEOUT;
2722         struct ip_vs_daemon_user        field_IP_VS_SO_GET_DAEMON[2];
2723 };
2724 
2725 #define MAX_GET_ARGLEN  sizeof(union ip_vs_get_arglen)
2726 
2727 static int
2728 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2729 {
2730         unsigned char arg[MAX_GET_ARGLEN];
2731         int ret = 0;
2732         unsigned int copylen;
2733         struct net *net = sock_net(sk);
2734         struct netns_ipvs *ipvs = net_ipvs(net);
2735 
2736         BUG_ON(!net);
2737         BUILD_BUG_ON(sizeof(arg) > 255);
2738         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2739                 return -EPERM;
2740 
2741         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2742                 return -EINVAL;
2743 
2744         copylen = get_arglen[CMDID(cmd)];
2745         if (*len < (int) copylen) {
2746                 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
2747                 return -EINVAL;
2748         }
2749 
2750         if (copy_from_user(arg, user, copylen) != 0)
2751                 return -EFAULT;
2752         /*
2753          * Handle daemons first since it has its own locking
2754          */
2755         if (cmd == IP_VS_SO_GET_DAEMON) {
2756                 struct ip_vs_daemon_user d[2];
2757 
2758                 memset(&d, 0, sizeof(d));
2759                 mutex_lock(&ipvs->sync_mutex);
2760                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2761                         d[0].state = IP_VS_STATE_MASTER;
2762                         strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
2763                                 sizeof(d[0].mcast_ifn));
2764                         d[0].syncid = ipvs->mcfg.syncid;
2765                 }
2766                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2767                         d[1].state = IP_VS_STATE_BACKUP;
2768                         strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
2769                                 sizeof(d[1].mcast_ifn));
2770                         d[1].syncid = ipvs->bcfg.syncid;
2771                 }
2772                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2773                         ret = -EFAULT;
2774                 mutex_unlock(&ipvs->sync_mutex);
2775                 return ret;
2776         }
2777 
2778         mutex_lock(&__ip_vs_mutex);
2779         switch (cmd) {
2780         case IP_VS_SO_GET_VERSION:
2781         {
2782                 char buf[64];
2783 
2784                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2785                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2786                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2787                         ret = -EFAULT;
2788                         goto out;
2789                 }
2790                 *len = strlen(buf)+1;
2791         }
2792         break;
2793 
2794         case IP_VS_SO_GET_INFO:
2795         {
2796                 struct ip_vs_getinfo info;
2797                 info.version = IP_VS_VERSION_CODE;
2798                 info.size = ip_vs_conn_tab_size;
2799                 info.num_services = ipvs->num_services;
2800                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2801                         ret = -EFAULT;
2802         }
2803         break;
2804 
2805         case IP_VS_SO_GET_SERVICES:
2806         {
2807                 struct ip_vs_get_services *get;
2808                 int size;
2809 
2810                 get = (struct ip_vs_get_services *)arg;
2811                 size = struct_size(get, entrytable, get->num_services);
2812                 if (*len != size) {
2813                         pr_err("length: %u != %u\n", *len, size);
2814                         ret = -EINVAL;
2815                         goto out;
2816                 }
2817                 ret = __ip_vs_get_service_entries(ipvs, get, user);
2818         }
2819         break;
2820 
2821         case IP_VS_SO_GET_SERVICE:
2822         {
2823                 struct ip_vs_service_entry *entry;
2824                 struct ip_vs_service *svc;
2825                 union nf_inet_addr addr;
2826 
2827                 entry = (struct ip_vs_service_entry *)arg;
2828                 addr.ip = entry->addr;
2829                 rcu_read_lock();
2830                 if (entry->fwmark)
2831                         svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
2832                 else
2833                         svc = __ip_vs_service_find(ipvs, AF_INET,
2834                                                    entry->protocol, &addr,
2835                                                    entry->port);
2836                 rcu_read_unlock();
2837                 if (svc) {
2838                         ip_vs_copy_service(entry, svc);
2839                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2840                                 ret = -EFAULT;
2841                 } else
2842                         ret = -ESRCH;
2843         }
2844         break;
2845 
2846         case IP_VS_SO_GET_DESTS:
2847         {
2848                 struct ip_vs_get_dests *get;
2849                 int size;
2850 
2851                 get = (struct ip_vs_get_dests *)arg;
2852                 size = struct_size(get, entrytable, get->num_dests);
2853                 if (*len != size) {
2854                         pr_err("length: %u != %u\n", *len, size);
2855                         ret = -EINVAL;
2856                         goto out;
2857                 }
2858                 ret = __ip_vs_get_dest_entries(ipvs, get, user);
2859         }
2860         break;
2861 
2862         case IP_VS_SO_GET_TIMEOUT:
2863         {
2864                 struct ip_vs_timeout_user t;
2865 
2866                 __ip_vs_get_timeouts(ipvs, &t);
2867                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2868                         ret = -EFAULT;
2869         }
2870         break;
2871 
2872         default:
2873                 ret = -EINVAL;
2874         }
2875 
2876 out:
2877         mutex_unlock(&__ip_vs_mutex);
2878         return ret;
2879 }
2880 
2881 
2882 static struct nf_sockopt_ops ip_vs_sockopts = {
2883         .pf             = PF_INET,
2884         .set_optmin     = IP_VS_BASE_CTL,
2885         .set_optmax     = IP_VS_SO_SET_MAX+1,
2886         .set            = do_ip_vs_set_ctl,
2887         .get_optmin     = IP_VS_BASE_CTL,
2888         .get_optmax     = IP_VS_SO_GET_MAX+1,
2889         .get            = do_ip_vs_get_ctl,
2890         .owner          = THIS_MODULE,
2891 };
2892 
2893 /*
2894  * Generic Netlink interface
2895  */
2896 
2897 /* IPVS genetlink family */
2898 static struct genl_family ip_vs_genl_family;
2899 
2900 /* Policy used for first-level command attributes */
2901 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2902         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2903         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2904         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2905         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2906         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2907         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2908 };
2909 
2910 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2911 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2912         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2913         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2914                                             .len = IP_VS_IFNAME_MAXLEN - 1 },
2915         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2916         [IPVS_DAEMON_ATTR_SYNC_MAXLEN]  = { .type = NLA_U16 },
2917         [IPVS_DAEMON_ATTR_MCAST_GROUP]  = { .type = NLA_U32 },
2918         [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
2919         [IPVS_DAEMON_ATTR_MCAST_PORT]   = { .type = NLA_U16 },
2920         [IPVS_DAEMON_ATTR_MCAST_TTL]    = { .type = NLA_U8 },
2921 };
2922 
2923 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2924 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2925         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2926         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2927         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2928                                             .len = sizeof(union nf_inet_addr) },
2929         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2930         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2931         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2932                                             .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
2933         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2934                                             .len = IP_VS_PENAME_MAXLEN },
2935         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2936                                             .len = sizeof(struct ip_vs_flags) },
2937         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2938         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2939         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2940 };
2941 
2942 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2943 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2944         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2945                                             .len = sizeof(union nf_inet_addr) },
2946         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2947         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2948         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2949         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2950         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2951         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2952         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2953         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2954         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2955         [IPVS_DEST_ATTR_ADDR_FAMILY]    = { .type = NLA_U16 },
2956         [IPVS_DEST_ATTR_TUN_TYPE]       = { .type = NLA_U8 },
2957         [IPVS_DEST_ATTR_TUN_PORT]       = { .type = NLA_U16 },
2958         [IPVS_DEST_ATTR_TUN_FLAGS]      = { .type = NLA_U16 },
2959 };
2960 
2961 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2962                                  struct ip_vs_kstats *kstats)
2963 {
2964         struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
2965 
2966         if (!nl_stats)
2967                 return -EMSGSIZE;
2968 
2969         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
2970             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
2971             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
2972             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
2973                               IPVS_STATS_ATTR_PAD) ||
2974             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
2975                               IPVS_STATS_ATTR_PAD) ||
2976             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
2977             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
2978             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
2979             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
2980             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
2981                 goto nla_put_failure;
2982         nla_nest_end(skb, nl_stats);
2983 
2984         return 0;
2985 
2986 nla_put_failure:
2987         nla_nest_cancel(skb, nl_stats);
2988         return -EMSGSIZE;
2989 }
2990 
2991 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
2992                                    struct ip_vs_kstats *kstats)
2993 {
2994         struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
2995 
2996         if (!nl_stats)
2997                 return -EMSGSIZE;
2998 
2999         if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns,
3000                               IPVS_STATS_ATTR_PAD) ||
3001             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts,
3002                               IPVS_STATS_ATTR_PAD) ||
3003             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts,
3004                               IPVS_STATS_ATTR_PAD) ||
3005             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
3006                               IPVS_STATS_ATTR_PAD) ||
3007             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
3008                               IPVS_STATS_ATTR_PAD) ||
3009             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps,
3010                               IPVS_STATS_ATTR_PAD) ||
3011             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps,
3012                               IPVS_STATS_ATTR_PAD) ||
3013             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps,
3014                               IPVS_STATS_ATTR_PAD) ||
3015             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps,
3016                               IPVS_STATS_ATTR_PAD) ||
3017             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps,
3018                               IPVS_STATS_ATTR_PAD))
3019                 goto nla_put_failure;
3020         nla_nest_end(skb, nl_stats);
3021 
3022         return 0;
3023 
3024 nla_put_failure:
3025         nla_nest_cancel(skb, nl_stats);
3026         return -EMSGSIZE;
3027 }
3028 
3029 static int ip_vs_genl_fill_service(struct sk_buff *skb,
3030                                    struct ip_vs_service *svc)
3031 {
3032         struct ip_vs_scheduler *sched;
3033         struct ip_vs_pe *pe;
3034         struct nlattr *nl_service;
3035         struct ip_vs_flags flags = { .flags = svc->flags,
3036                                      .mask = ~0 };
3037         struct ip_vs_kstats kstats;
3038         char *sched_name;
3039 
3040         nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE);
3041         if (!nl_service)
3042                 return -EMSGSIZE;
3043 
3044         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
3045                 goto nla_put_failure;
3046         if (svc->fwmark) {
3047                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
3048                         goto nla_put_failure;
3049         } else {
3050                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
3051                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
3052                     nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
3053                         goto nla_put_failure;
3054         }
3055 
3056         sched = rcu_dereference_protected(svc->scheduler, 1);
3057         sched_name = sched ? sched->name : "none";
3058         pe = rcu_dereference_protected(svc->pe, 1);
3059         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
3060             (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
3061             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
3062             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
3063             nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
3064                 goto nla_put_failure;
3065         ip_vs_copy_stats(&kstats, &svc->stats);
3066         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
3067                 goto nla_put_failure;
3068         if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
3069                 goto nla_put_failure;
3070 
3071         nla_nest_end(skb, nl_service);
3072 
3073         return 0;
3074 
3075 nla_put_failure:
3076         nla_nest_cancel(skb, nl_service);
3077         return -EMSGSIZE;
3078 }
3079 
3080 static int ip_vs_genl_dump_service(struct sk_buff *skb,
3081                                    struct ip_vs_service *svc,
3082                                    struct netlink_callback *cb)
3083 {
3084         void *hdr;
3085 
3086         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3087                           &ip_vs_genl_family, NLM_F_MULTI,
3088                           IPVS_CMD_NEW_SERVICE);
3089         if (!hdr)
3090                 return -EMSGSIZE;
3091 
3092         if (ip_vs_genl_fill_service(skb, svc) < 0)
3093                 goto nla_put_failure;
3094 
3095         genlmsg_end(skb, hdr);
3096         return 0;
3097 
3098 nla_put_failure:
3099         genlmsg_cancel(skb, hdr);
3100         return -EMSGSIZE;
3101 }
3102 
3103 static int ip_vs_genl_dump_services(struct sk_buff *skb,
3104                                     struct netlink_callback *cb)
3105 {
3106         int idx = 0, i;
3107         int start = cb->args[0];
3108         struct ip_vs_service *svc;
3109         struct net *net = sock_net(skb->sk);
3110         struct netns_ipvs *ipvs = net_ipvs(net);
3111 
3112         mutex_lock(&__ip_vs_mutex);
3113         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3114                 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
3115                         if (++idx <= start || (svc->ipvs != ipvs))
3116                                 continue;
3117                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3118                                 idx--;
3119                                 goto nla_put_failure;
3120                         }
3121                 }
3122         }
3123 
3124         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3125                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
3126                         if (++idx <= start || (svc->ipvs != ipvs))
3127                                 continue;
3128                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3129                                 idx--;
3130                                 goto nla_put_failure;
3131                         }
3132                 }
3133         }
3134 
3135 nla_put_failure:
3136         mutex_unlock(&__ip_vs_mutex);
3137         cb->args[0] = idx;
3138 
3139         return skb->len;
3140 }
3141 
3142 static bool ip_vs_is_af_valid(int af)
3143 {
3144         if (af == AF_INET)
3145                 return true;
3146 #ifdef CONFIG_IP_VS_IPV6
3147         if (af == AF_INET6 && ipv6_mod_enabled())
3148                 return true;
3149 #endif
3150         return false;
3151 }
3152 
3153 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
3154                                     struct ip_vs_service_user_kern *usvc,
3155                                     struct nlattr *nla, bool full_entry,
3156                                     struct ip_vs_service **ret_svc)
3157 {
3158         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3159         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3160         struct ip_vs_service *svc;
3161 
3162         /* Parse mandatory identifying service fields first */
3163         if (nla == NULL ||
3164             nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL))
3165                 return -EINVAL;
3166 
3167         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3168         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3169         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3170         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3171         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3172 
3173         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3174                 return -EINVAL;
3175 
3176         memset(usvc, 0, sizeof(*usvc));
3177 
3178         usvc->af = nla_get_u16(nla_af);
3179         if (!ip_vs_is_af_valid(usvc->af))
3180                 return -EAFNOSUPPORT;
3181 
3182         if (nla_fwmark) {
3183                 usvc->protocol = IPPROTO_TCP;
3184                 usvc->fwmark = nla_get_u32(nla_fwmark);
3185         } else {
3186                 usvc->protocol = nla_get_u16(nla_protocol);
3187                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3188                 usvc->port = nla_get_be16(nla_port);
3189                 usvc->fwmark = 0;
3190         }
3191 
3192         rcu_read_lock();
3193         if (usvc->fwmark)
3194                 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
3195         else
3196                 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
3197                                            &usvc->addr, usvc->port);
3198         rcu_read_unlock();
3199         *ret_svc = svc;
3200 
3201         /* If a full entry was requested, check for the additional fields */
3202         if (full_entry) {
3203                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3204                               *nla_netmask;
3205                 struct ip_vs_flags flags;
3206 
3207                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3208                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3209                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3210                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3211                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3212 
3213                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3214                         return -EINVAL;
3215 
3216                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3217 
3218                 /* prefill flags from service if it already exists */
3219                 if (svc)
3220                         usvc->flags = svc->flags;
3221 
3222                 /* set new flags from userland */
3223                 usvc->flags = (usvc->flags & ~flags.mask) |
3224                               (flags.flags & flags.mask);
3225                 usvc->sched_name = nla_data(nla_sched);
3226                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3227                 usvc->timeout = nla_get_u32(nla_timeout);
3228                 usvc->netmask = nla_get_be32(nla_netmask);
3229         }
3230 
3231         return 0;
3232 }
3233 
3234 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
3235                                                      struct nlattr *nla)
3236 {
3237         struct ip_vs_service_user_kern usvc;
3238         struct ip_vs_service *svc;
3239         int ret;
3240 
3241         ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc);
3242         return ret ? ERR_PTR(ret) : svc;
3243 }
3244 
3245 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3246 {
3247         struct nlattr *nl_dest;
3248         struct ip_vs_kstats kstats;
3249 
3250         nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST);
3251         if (!nl_dest)
3252                 return -EMSGSIZE;
3253 
3254         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3255             nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3256             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3257                         (atomic_read(&dest->conn_flags) &
3258                          IP_VS_CONN_F_FWD_MASK)) ||
3259             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3260                         atomic_read(&dest->weight)) ||
3261             nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
3262                        dest->tun_type) ||
3263             nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
3264                          dest->tun_port) ||
3265             nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
3266                         dest->tun_flags) ||
3267             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3268             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3269             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3270                         atomic_read(&dest->activeconns)) ||
3271             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3272                         atomic_read(&dest->inactconns)) ||
3273             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3274                         atomic_read(&dest->persistconns)) ||
3275             nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
3276                 goto nla_put_failure;
3277         ip_vs_copy_stats(&kstats, &dest->stats);
3278         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
3279                 goto nla_put_failure;
3280         if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
3281                 goto nla_put_failure;
3282 
3283         nla_nest_end(skb, nl_dest);
3284 
3285         return 0;
3286 
3287 nla_put_failure:
3288         nla_nest_cancel(skb, nl_dest);
3289         return -EMSGSIZE;
3290 }
3291 
3292 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3293                                 struct netlink_callback *cb)
3294 {
3295         void *hdr;
3296 
3297         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3298                           &ip_vs_genl_family, NLM_F_MULTI,
3299                           IPVS_CMD_NEW_DEST);
3300         if (!hdr)
3301                 return -EMSGSIZE;
3302 
3303         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3304                 goto nla_put_failure;
3305 
3306         genlmsg_end(skb, hdr);
3307         return 0;
3308 
3309 nla_put_failure:
3310         genlmsg_cancel(skb, hdr);
3311         return -EMSGSIZE;
3312 }
3313 
3314 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3315                                  struct netlink_callback *cb)
3316 {
3317         int idx = 0;
3318         int start = cb->args[0];
3319         struct ip_vs_service *svc;
3320         struct ip_vs_dest *dest;
3321         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3322         struct net *net = sock_net(skb->sk);
3323         struct netns_ipvs *ipvs = net_ipvs(net);
3324 
3325         mutex_lock(&__ip_vs_mutex);
3326 
3327         /* Try to find the service for which to dump destinations */
3328         if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack))
3329                 goto out_err;
3330 
3331 
3332         svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3333         if (IS_ERR_OR_NULL(svc))
3334                 goto out_err;
3335 
3336         /* Dump the destinations */
3337         list_for_each_entry(dest, &svc->destinations, n_list) {
3338                 if (++idx <= start)
3339                         continue;
3340                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3341                         idx--;
3342                         goto nla_put_failure;
3343                 }
3344         }
3345 
3346 nla_put_failure:
3347         cb->args[0] = idx;
3348 
3349 out_err:
3350         mutex_unlock(&__ip_vs_mutex);
3351 
3352         return skb->len;
3353 }
3354 
3355 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3356                                  struct nlattr *nla, bool full_entry)
3357 {
3358         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3359         struct nlattr *nla_addr, *nla_port;
3360         struct nlattr *nla_addr_family;
3361 
3362         /* Parse mandatory identifying destination fields first */
3363         if (nla == NULL ||
3364             nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL))
3365                 return -EINVAL;
3366 
3367         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3368         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3369         nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
3370 
3371         if (!(nla_addr && nla_port))
3372                 return -EINVAL;
3373 
3374         memset(udest, 0, sizeof(*udest));
3375 
3376         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3377         udest->port = nla_get_be16(nla_port);
3378 
3379         if (nla_addr_family)
3380                 udest->af = nla_get_u16(nla_addr_family);
3381         else
3382                 udest->af = 0;
3383 
3384         /* If a full entry was requested, check for the additional fields */
3385         if (full_entry) {
3386                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3387                               *nla_l_thresh, *nla_tun_type, *nla_tun_port,
3388                               *nla_tun_flags;
3389 
3390                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3391                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3392                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3393                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3394                 nla_tun_type    = attrs[IPVS_DEST_ATTR_TUN_TYPE];
3395                 nla_tun_port    = attrs[IPVS_DEST_ATTR_TUN_PORT];
3396                 nla_tun_flags   = attrs[IPVS_DEST_ATTR_TUN_FLAGS];
3397 
3398                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3399                         return -EINVAL;
3400 
3401                 udest->conn_flags = nla_get_u32(nla_fwd)
3402                                     & IP_VS_CONN_F_FWD_MASK;
3403                 udest->weight = nla_get_u32(nla_weight);
3404                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3405                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3406 
3407                 if (nla_tun_type)
3408                         udest->tun_type = nla_get_u8(nla_tun_type);
3409 
3410                 if (nla_tun_port)
3411                         udest->tun_port = nla_get_be16(nla_tun_port);
3412 
3413                 if (nla_tun_flags)
3414                         udest->tun_flags = nla_get_u16(nla_tun_flags);
3415         }
3416 
3417         return 0;
3418 }
3419 
3420 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3421                                   struct ipvs_sync_daemon_cfg *c)
3422 {
3423         struct nlattr *nl_daemon;
3424 
3425         nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON);
3426         if (!nl_daemon)
3427                 return -EMSGSIZE;
3428 
3429         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3430             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
3431             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
3432             nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
3433             nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
3434             nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
3435                 goto nla_put_failure;
3436 #ifdef CONFIG_IP_VS_IPV6
3437         if (c->mcast_af == AF_INET6) {
3438                 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
3439                                      &c->mcast_group.in6))
3440                         goto nla_put_failure;
3441         } else
3442 #endif
3443                 if (c->mcast_af == AF_INET &&
3444                     nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
3445                                     c->mcast_group.ip))
3446                         goto nla_put_failure;
3447         nla_nest_end(skb, nl_daemon);
3448 
3449         return 0;
3450 
3451 nla_put_failure:
3452         nla_nest_cancel(skb, nl_daemon);
3453         return -EMSGSIZE;
3454 }
3455 
3456 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3457                                   struct ipvs_sync_daemon_cfg *c,
3458                                   struct netlink_callback *cb)
3459 {
3460         void *hdr;
3461         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3462                           &ip_vs_genl_family, NLM_F_MULTI,
3463                           IPVS_CMD_NEW_DAEMON);
3464         if (!hdr)
3465                 return -EMSGSIZE;
3466 
3467         if (ip_vs_genl_fill_daemon(skb, state, c))
3468                 goto nla_put_failure;
3469 
3470         genlmsg_end(skb, hdr);
3471         return 0;
3472 
3473 nla_put_failure:
3474         genlmsg_cancel(skb, hdr);
3475         return -EMSGSIZE;
3476 }
3477 
3478 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3479                                    struct netlink_callback *cb)
3480 {
3481         struct net *net = sock_net(skb->sk);
3482         struct netns_ipvs *ipvs = net_ipvs(net);
3483 
3484         mutex_lock(&ipvs->sync_mutex);
3485         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3486                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3487                                            &ipvs->mcfg, cb) < 0)
3488                         goto nla_put_failure;
3489 
3490                 cb->args[0] = 1;
3491         }
3492 
3493         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3494                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3495                                            &ipvs->bcfg, cb) < 0)
3496                         goto nla_put_failure;
3497 
3498                 cb->args[1] = 1;
3499         }
3500 
3501 nla_put_failure:
3502         mutex_unlock(&ipvs->sync_mutex);
3503 
3504         return skb->len;
3505 }
3506 
3507 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3508 {
3509         struct ipvs_sync_daemon_cfg c;
3510         struct nlattr *a;
3511         int ret;
3512 
3513         memset(&c, 0, sizeof(c));
3514         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3515               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3516               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3517                 return -EINVAL;
3518         strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3519                 sizeof(c.mcast_ifn));
3520         c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
3521 
3522         a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
3523         if (a)
3524                 c.sync_maxlen = nla_get_u16(a);
3525 
3526         a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
3527         if (a) {
3528                 c.mcast_af = AF_INET;
3529                 c.mcast_group.ip = nla_get_in_addr(a);
3530                 if (!ipv4_is_multicast(c.mcast_group.ip))
3531                         return -EINVAL;
3532         } else {
3533                 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
3534                 if (a) {
3535 #ifdef CONFIG_IP_VS_IPV6
3536                         int addr_type;
3537 
3538                         c.mcast_af = AF_INET6;
3539                         c.mcast_group.in6 = nla_get_in6_addr(a);
3540                         addr_type = ipv6_addr_type(&c.mcast_group.in6);
3541                         if (!(addr_type & IPV6_ADDR_MULTICAST))
3542                                 return -EINVAL;
3543 #else
3544                         return -EAFNOSUPPORT;
3545 #endif
3546                 }
3547         }
3548 
3549         a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
3550         if (a)
3551                 c.mcast_port = nla_get_u16(a);
3552 
3553         a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
3554         if (a)
3555                 c.mcast_ttl = nla_get_u8(a);
3556 
3557         /* The synchronization protocol is incompatible with mixed family
3558          * services
3559          */
3560         if (ipvs->mixed_address_family_dests > 0)
3561                 return -EINVAL;
3562 
3563         ret = start_sync_thread(ipvs, &c,
3564                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3565         return ret;
3566 }
3567 
3568 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3569 {
3570         int ret;
3571 
3572         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3573                 return -EINVAL;
3574 
3575         ret = stop_sync_thread(ipvs,
3576                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3577         return ret;
3578 }
3579 
3580 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
3581 {
3582         struct ip_vs_timeout_user t;
3583 
3584         __ip_vs_get_timeouts(ipvs, &t);
3585 
3586         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3587                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3588 
3589         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3590                 t.tcp_fin_timeout =
3591                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3592 
3593         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3594                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3595 
3596         return ip_vs_set_timeout(ipvs, &t);
3597 }
3598 
3599 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3600 {
3601         int ret = -EINVAL, cmd;
3602         struct net *net = sock_net(skb->sk);
3603         struct netns_ipvs *ipvs = net_ipvs(net);
3604 
3605         cmd = info->genlhdr->cmd;
3606 
3607         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3608                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3609 
3610                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3611                     nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack))
3612                         goto out;
3613 
3614                 if (cmd == IPVS_CMD_NEW_DAEMON)
3615                         ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
3616                 else
3617                         ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
3618         }
3619 
3620 out:
3621         return ret;
3622 }
3623 
3624 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3625 {
3626         bool need_full_svc = false, need_full_dest = false;
3627         struct ip_vs_service *svc = NULL;
3628         struct ip_vs_service_user_kern usvc;
3629         struct ip_vs_dest_user_kern udest;
3630         int ret = 0, cmd;
3631         struct net *net = sock_net(skb->sk);
3632         struct netns_ipvs *ipvs = net_ipvs(net);
3633 
3634         cmd = info->genlhdr->cmd;
3635 
3636         mutex_lock(&__ip_vs_mutex);
3637 
3638         if (cmd == IPVS_CMD_FLUSH) {
3639                 ret = ip_vs_flush(ipvs, false);
3640                 goto out;
3641         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3642                 ret = ip_vs_genl_set_config(ipvs, info->attrs);
3643                 goto out;
3644         } else if (cmd == IPVS_CMD_ZERO &&
3645                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3646                 ret = ip_vs_zero_all(ipvs);
3647                 goto out;
3648         }
3649 
3650         /* All following commands require a service argument, so check if we
3651          * received a valid one. We need a full service specification when
3652          * adding / editing a service. Only identifying members otherwise. */
3653         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3654                 need_full_svc = true;
3655 
3656         ret = ip_vs_genl_parse_service(ipvs, &usvc,
3657                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3658                                        need_full_svc, &svc);
3659         if (ret)
3660                 goto out;
3661 
3662         /* Unless we're adding a new service, the service must already exist */
3663         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3664                 ret = -ESRCH;
3665                 goto out;
3666         }
3667 
3668         /* Destination commands require a valid destination argument. For
3669          * adding / editing a destination, we need a full destination
3670          * specification. */
3671         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3672             cmd == IPVS_CMD_DEL_DEST) {
3673                 if (cmd != IPVS_CMD_DEL_DEST)
3674                         need_full_dest = true;
3675 
3676                 ret = ip_vs_genl_parse_dest(&udest,
3677                                             info->attrs[IPVS_CMD_ATTR_DEST],
3678                                             need_full_dest);
3679                 if (ret)
3680                         goto out;
3681 
3682                 /* Old protocols did not allow the user to specify address
3683                  * family, so we set it to zero instead.  We also didn't
3684                  * allow heterogeneous pools in the old code, so it's safe
3685                  * to assume that this will have the same address family as
3686                  * the service.
3687                  */
3688                 if (udest.af == 0)
3689                         udest.af = svc->af;
3690 
3691                 if (!ip_vs_is_af_valid(udest.af)) {
3692                         ret = -EAFNOSUPPORT;
3693                         goto out;
3694                 }
3695 
3696                 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
3697                         /* The synchronization protocol is incompatible
3698                          * with mixed family services
3699                          */
3700                         if (ipvs->sync_state) {
3701                                 ret = -EINVAL;
3702                                 goto out;
3703                         }
3704 
3705                         /* Which connection types do we support? */
3706                         switch (udest.conn_flags) {
3707                         case IP_VS_CONN_F_TUNNEL:
3708                                 /* We are able to forward this */
3709                                 break;
3710                         default:
3711                                 ret = -EINVAL;
3712                                 goto out;
3713                         }
3714                 }
3715         }
3716 
3717         switch (cmd) {
3718         case IPVS_CMD_NEW_SERVICE:
3719                 if (svc == NULL)
3720                         ret = ip_vs_add_service(ipvs, &usvc, &svc);
3721                 else
3722                         ret = -EEXIST;
3723                 break;
3724         case IPVS_CMD_SET_SERVICE:
3725                 ret = ip_vs_edit_service(svc, &usvc);
3726                 break;
3727         case IPVS_CMD_DEL_SERVICE:
3728                 ret = ip_vs_del_service(svc);
3729                 /* do not use svc, it can be freed */
3730                 break;
3731         case IPVS_CMD_NEW_DEST:
3732                 ret = ip_vs_add_dest(svc, &udest);
3733                 break;
3734         case IPVS_CMD_SET_DEST:
3735                 ret = ip_vs_edit_dest(svc, &udest);
3736                 break;
3737         case IPVS_CMD_DEL_DEST:
3738                 ret = ip_vs_del_dest(svc, &udest);
3739                 break;
3740         case IPVS_CMD_ZERO:
3741                 ret = ip_vs_zero_service(svc);
3742                 break;
3743         default:
3744                 ret = -EINVAL;
3745         }
3746 
3747 out:
3748         mutex_unlock(&__ip_vs_mutex);
3749 
3750         return ret;
3751 }
3752 
3753 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3754 {
3755         struct sk_buff *msg;
3756         void *reply;
3757         int ret, cmd, reply_cmd;
3758         struct net *net = sock_net(skb->sk);
3759         struct netns_ipvs *ipvs = net_ipvs(net);
3760 
3761         cmd = info->genlhdr->cmd;
3762 
3763         if (cmd == IPVS_CMD_GET_SERVICE)
3764                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3765         else if (cmd == IPVS_CMD_GET_INFO)
3766                 reply_cmd = IPVS_CMD_SET_INFO;
3767         else if (cmd == IPVS_CMD_GET_CONFIG)
3768                 reply_cmd = IPVS_CMD_SET_CONFIG;
3769         else {
3770                 pr_err("unknown Generic Netlink command\n");
3771                 return -EINVAL;
3772         }
3773 
3774         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3775         if (!msg)
3776                 return -ENOMEM;
3777 
3778         mutex_lock(&__ip_vs_mutex);
3779 
3780         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3781         if (reply == NULL)
3782                 goto nla_put_failure;
3783 
3784         switch (cmd) {
3785         case IPVS_CMD_GET_SERVICE:
3786         {
3787                 struct ip_vs_service *svc;
3788 
3789                 svc = ip_vs_genl_find_service(ipvs,
3790                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3791                 if (IS_ERR(svc)) {
3792                         ret = PTR_ERR(svc);
3793                         goto out_err;
3794                 } else if (svc) {
3795                         ret = ip_vs_genl_fill_service(msg, svc);
3796                         if (ret)
3797                                 goto nla_put_failure;
3798                 } else {
3799                         ret = -ESRCH;
3800                         goto out_err;
3801                 }
3802 
3803                 break;
3804         }
3805 
3806         case IPVS_CMD_GET_CONFIG:
3807         {
3808                 struct ip_vs_timeout_user t;
3809 
3810                 __ip_vs_get_timeouts(ipvs, &t);
3811 #ifdef CONFIG_IP_VS_PROTO_TCP
3812                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3813                                 t.tcp_timeout) ||
3814                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3815                                 t.tcp_fin_timeout))
3816                         goto nla_put_failure;
3817 #endif
3818 #ifdef CONFIG_IP_VS_PROTO_UDP
3819                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3820                         goto nla_put_failure;
3821 #endif
3822 
3823                 break;
3824         }
3825 
3826         case IPVS_CMD_GET_INFO:
3827                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3828                                 IP_VS_VERSION_CODE) ||
3829                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3830                                 ip_vs_conn_tab_size))
3831                         goto nla_put_failure;
3832                 break;
3833         }
3834 
3835         genlmsg_end(msg, reply);
3836         ret = genlmsg_reply(msg, info);
3837         goto out;
3838 
3839 nla_put_failure:
3840         pr_err("not enough space in Netlink message\n");
3841         ret = -EMSGSIZE;
3842 
3843 out_err:
3844         nlmsg_free(msg);
3845 out:
3846         mutex_unlock(&__ip_vs_mutex);
3847 
3848         return ret;
3849 }
3850 
3851 
3852 static const struct genl_ops ip_vs_genl_ops[] = {
3853         {
3854                 .cmd    = IPVS_CMD_NEW_SERVICE,
3855                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3856                 .flags  = GENL_ADMIN_PERM,
3857                 .doit   = ip_vs_genl_set_cmd,
3858         },
3859         {
3860                 .cmd    = IPVS_CMD_SET_SERVICE,
3861                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3862                 .flags  = GENL_ADMIN_PERM,
3863                 .doit   = ip_vs_genl_set_cmd,
3864         },
3865         {
3866                 .cmd    = IPVS_CMD_DEL_SERVICE,
3867                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3868                 .flags  = GENL_ADMIN_PERM,
3869                 .doit   = ip_vs_genl_set_cmd,
3870         },
3871         {
3872                 .cmd    = IPVS_CMD_GET_SERVICE,
3873                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3874                 .flags  = GENL_ADMIN_PERM,
3875                 .doit   = ip_vs_genl_get_cmd,
3876                 .dumpit = ip_vs_genl_dump_services,
3877         },
3878         {
3879                 .cmd    = IPVS_CMD_NEW_DEST,
3880                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3881                 .flags  = GENL_ADMIN_PERM,
3882                 .doit   = ip_vs_genl_set_cmd,
3883         },
3884         {
3885                 .cmd    = IPVS_CMD_SET_DEST,
3886                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3887                 .flags  = GENL_ADMIN_PERM,
3888                 .doit   = ip_vs_genl_set_cmd,
3889         },
3890         {
3891                 .cmd    = IPVS_CMD_DEL_DEST,
3892                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3893                 .flags  = GENL_ADMIN_PERM,
3894                 .doit   = ip_vs_genl_set_cmd,
3895         },
3896         {
3897                 .cmd    = IPVS_CMD_GET_DEST,
3898                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3899                 .flags  = GENL_ADMIN_PERM,
3900                 .dumpit = ip_vs_genl_dump_dests,
3901         },
3902         {
3903                 .cmd    = IPVS_CMD_NEW_DAEMON,
3904                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3905                 .flags  = GENL_ADMIN_PERM,
3906                 .doit   = ip_vs_genl_set_daemon,
3907         },
3908         {
3909                 .cmd    = IPVS_CMD_DEL_DAEMON,
3910                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3911                 .flags  = GENL_ADMIN_PERM,
3912                 .doit   = ip_vs_genl_set_daemon,
3913         },
3914         {
3915                 .cmd    = IPVS_CMD_GET_DAEMON,
3916                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3917                 .flags  = GENL_ADMIN_PERM,
3918                 .dumpit = ip_vs_genl_dump_daemons,
3919         },
3920         {
3921                 .cmd    = IPVS_CMD_SET_CONFIG,
3922                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3923                 .flags  = GENL_ADMIN_PERM,
3924                 .doit   = ip_vs_genl_set_cmd,
3925         },
3926         {
3927                 .cmd    = IPVS_CMD_GET_CONFIG,
3928                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3929                 .flags  = GENL_ADMIN_PERM,
3930                 .doit   = ip_vs_genl_get_cmd,
3931         },
3932         {
3933                 .cmd    = IPVS_CMD_GET_INFO,
3934                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3935                 .flags  = GENL_ADMIN_PERM,
3936                 .doit   = ip_vs_genl_get_cmd,
3937         },
3938         {
3939                 .cmd    = IPVS_CMD_ZERO,
3940                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3941                 .flags  = GENL_ADMIN_PERM,
3942                 .doit   = ip_vs_genl_set_cmd,
3943         },
3944         {
3945                 .cmd    = IPVS_CMD_FLUSH,
3946                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3947                 .flags  = GENL_ADMIN_PERM,
3948                 .doit   = ip_vs_genl_set_cmd,
3949         },
3950 };
3951 
3952 static struct genl_family ip_vs_genl_family __ro_after_init = {
3953         .hdrsize        = 0,
3954         .name           = IPVS_GENL_NAME,
3955         .version        = IPVS_GENL_VERSION,
3956         .maxattr        = IPVS_CMD_ATTR_MAX,
3957         .policy = ip_vs_cmd_policy,
3958         .netnsok        = true,         /* Make ipvsadm to work on netns */
3959         .module         = THIS_MODULE,
3960         .ops            = ip_vs_genl_ops,
3961         .n_ops          = ARRAY_SIZE(ip_vs_genl_ops),
3962 };
3963 
3964 static int __init ip_vs_genl_register(void)
3965 {
3966         return genl_register_family(&ip_vs_genl_family);
3967 }
3968 
3969 static void ip_vs_genl_unregister(void)
3970 {
3971         genl_unregister_family(&ip_vs_genl_family);
3972 }
3973 
3974 /* End of Generic Netlink interface definitions */
3975 
3976 /*
3977  * per netns intit/exit func.
3978  */
3979 #ifdef CONFIG_SYSCTL
3980 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
3981 {
3982         struct net *net = ipvs->net;
3983         int idx;
3984         struct ctl_table *tbl;
3985 
3986         atomic_set(&ipvs->dropentry, 0);
3987         spin_lock_init(&ipvs->dropentry_lock);
3988         spin_lock_init(&ipvs->droppacket_lock);
3989         spin_lock_init(&ipvs->securetcp_lock);
3990 
3991         if (!net_eq(net, &init_net)) {
3992                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3993                 if (tbl == NULL)
3994                         return -ENOMEM;
3995 
3996                 /* Don't export sysctls to unprivileged users */
3997                 if (net->user_ns != &init_user_ns)
3998                         tbl[0].procname = NULL;
3999         } else
4000                 tbl = vs_vars;
4001         /* Initialize sysctl defaults */
4002         for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
4003                 if (tbl[idx].proc_handler == proc_do_defense_mode)
4004                         tbl[idx].extra2 = ipvs;
4005         }
4006         idx = 0;
4007         ipvs->sysctl_amemthresh = 1024;
4008         tbl[idx++].data = &ipvs->sysctl_amemthresh;
4009         ipvs->sysctl_am_droprate = 10;
4010         tbl[idx++].data = &ipvs->sysctl_am_droprate;
4011         tbl[idx++].data = &ipvs->sysctl_drop_entry;
4012         tbl[idx++].data = &ipvs->sysctl_drop_packet;
4013 #ifdef CONFIG_IP_VS_NFCT
4014         tbl[idx++].data = &ipvs->sysctl_conntrack;
4015 #endif
4016         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
4017         ipvs->sysctl_snat_reroute = 1;
4018         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
4019         ipvs->sysctl_sync_ver = 1;
4020         tbl[idx++].data = &ipvs->sysctl_sync_ver;
4021         ipvs->sysctl_sync_ports = 1;
4022         tbl[idx++].data = &ipvs->sysctl_sync_ports;
4023         tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
4024         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
4025         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
4026         ipvs->sysctl_sync_sock_size = 0;
4027         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
4028         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
4029         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
4030         tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
4031         tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
4032         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
4033         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
4034         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
4035         tbl[idx].data = &ipvs->sysctl_sync_threshold;
4036         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
4037         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
4038         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
4039         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
4040         tbl[idx++].data = &ipvs->sysctl_sync_retries;
4041         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
4042         ipvs->sysctl_pmtu_disc = 1;
4043         tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
4044         tbl[idx++].data = &ipvs->sysctl_backup_only;
4045         ipvs->sysctl_conn_reuse_mode = 1;
4046         tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
4047         tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
4048         tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
4049 
4050         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
4051         if (ipvs->sysctl_hdr == NULL) {
4052                 if (!net_eq(net, &init_net))
4053                         kfree(tbl);
4054                 return -ENOMEM;
4055         }
4056         ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
4057         ipvs->sysctl_tbl = tbl;
4058         /* Schedule defense work */
4059         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
4060         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
4061 
4062         return 0;
4063 }
4064 
4065 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
4066 {
4067         struct net *net = ipvs->net;
4068 
4069         cancel_delayed_work_sync(&ipvs->defense_work);
4070         cancel_work_sync(&ipvs->defense_work.work);
4071         unregister_net_sysctl_table(ipvs->sysctl_hdr);
4072         ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
4073 
4074         if (!net_eq(net, &init_net))
4075                 kfree(ipvs->sysctl_tbl);
4076 }
4077 
4078 #else
4079 
4080 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
4081 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
4082 
4083 #endif
4084 
4085 static struct notifier_block ip_vs_dst_notifier = {
4086         .notifier_call = ip_vs_dst_event,
4087 #ifdef CONFIG_IP_VS_IPV6
4088         .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
4089 #endif
4090 };
4091 
4092 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
4093 {
4094         int i, idx;
4095 
4096         /* Initialize rs_table */
4097         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
4098                 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
4099 
4100         INIT_LIST_HEAD(&ipvs->dest_trash);
4101         spin_lock_init(&ipvs->dest_trash_lock);
4102         timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
4103         atomic_set(&ipvs->ftpsvc_counter, 0);
4104         atomic_set(&ipvs->nullsvc_counter, 0);
4105         atomic_set(&ipvs->conn_out_counter, 0);
4106 
4107         /* procfs stats */
4108         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
4109         if (!ipvs->tot_stats.cpustats)
4110                 return -ENOMEM;
4111 
4112         for_each_possible_cpu(i) {
4113                 struct ip_vs_cpu_stats *ipvs_tot_stats;
4114                 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
4115                 u64_stats_init(&ipvs_tot_stats->syncp);
4116         }
4117 
4118         spin_lock_init(&ipvs->tot_stats.lock);
4119 
4120         proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
4121                         sizeof(struct ip_vs_iter));
4122         proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
4123                         ip_vs_stats_show, NULL);
4124         proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
4125                         ip_vs_stats_percpu_show, NULL);
4126 
4127         if (ip_vs_control_net_init_sysctl(ipvs))
4128                 goto err;
4129 
4130         return 0;
4131 
4132 err:
4133         free_percpu(ipvs->tot_stats.cpustats);
4134         return -ENOMEM;
4135 }
4136 
4137 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
4138 {
4139         ip_vs_trash_cleanup(ipvs);
4140         ip_vs_control_net_cleanup_sysctl(ipvs);
4141         remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
4142         remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
4143         remove_proc_entry("ip_vs", ipvs->net->proc_net);
4144         free_percpu(ipvs->tot_stats.cpustats);
4145 }
4146 
4147 int __init ip_vs_register_nl_ioctl(void)
4148 {
4149         int ret;
4150 
4151         ret = nf_register_sockopt(&ip_vs_sockopts);
4152         if (ret) {
4153                 pr_err("cannot register sockopt.\n");
4154                 goto err_sock;
4155         }
4156 
4157         ret = ip_vs_genl_register();
4158         if (ret) {
4159                 pr_err("cannot register Generic Netlink interface.\n");
4160                 goto err_genl;
4161         }
4162         return 0;
4163 
4164 err_genl:
4165         nf_unregister_sockopt(&ip_vs_sockopts);
4166 err_sock:
4167         return ret;
4168 }
4169 
4170 void ip_vs_unregister_nl_ioctl(void)
4171 {
4172         ip_vs_genl_unregister();
4173         nf_unregister_sockopt(&ip_vs_sockopts);
4174 }
4175 
4176 int __init ip_vs_control_init(void)
4177 {
4178         int idx;
4179         int ret;
4180 
4181         EnterFunction(2);
4182 
4183         /* Initialize svc_table, ip_vs_svc_fwm_table */
4184         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4185                 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
4186                 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
4187         }
4188 
4189         smp_wmb();      /* Do we really need it now ? */
4190 
4191         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
4192         if (ret < 0)
4193                 return ret;
4194 
4195         LeaveFunction(2);
4196         return 0;
4197 }
4198 
4199 
4200 void ip_vs_control_cleanup(void)
4201 {
4202         EnterFunction(2);
4203         unregister_netdevice_notifier(&ip_vs_dst_notifier);
4204         LeaveFunction(2);
4205 }

/* [<][>][^][v][top][bottom][index][help] */