root/net/netfilter/ipvs/ip_vs_proto_tcp.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tcp_conn_schedule
  2. tcp_fast_csum_update
  3. tcp_partial_csum_update
  4. tcp_snat_handler
  5. tcp_dnat_handler
  6. tcp_csum_check
  7. tcp_state_name
  8. tcp_state_active
  9. tcp_timeout_change
  10. tcp_state_idx
  11. set_tcp_state
  12. tcp_state_transition
  13. tcp_app_hashkey
  14. tcp_register_app
  15. tcp_unregister_app
  16. tcp_app_conn_bind
  17. ip_vs_tcp_conn_listen
  18. __ip_vs_tcp_init
  19. __ip_vs_tcp_exit

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
   4  *
   5  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   6  *              Julian Anastasov <ja@ssi.bg>
   7  *
   8  * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
   9  *
  10  *              Network name space (netns) aware.
  11  *              Global data moved to netns i.e struct netns_ipvs
  12  *              tcp_timeouts table has copy per netns in a hash table per
  13  *              protocol ip_vs_proto_data and is handled by netns
  14  */
  15 
  16 #define KMSG_COMPONENT "IPVS"
  17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18 
  19 #include <linux/kernel.h>
  20 #include <linux/ip.h>
  21 #include <linux/tcp.h>                  /* for tcphdr */
  22 #include <net/ip.h>
  23 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
  24 #include <net/ip6_checksum.h>
  25 #include <linux/netfilter.h>
  26 #include <linux/netfilter_ipv4.h>
  27 #include <linux/indirect_call_wrapper.h>
  28 
  29 #include <net/ip_vs.h>
  30 
  31 static int
  32 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
  33 
  34 static int
  35 tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
  36                   struct ip_vs_proto_data *pd,
  37                   int *verdict, struct ip_vs_conn **cpp,
  38                   struct ip_vs_iphdr *iph)
  39 {
  40         struct ip_vs_service *svc;
  41         struct tcphdr _tcph, *th;
  42         __be16 _ports[2], *ports = NULL;
  43 
  44         /* In the event of icmp, we're only guaranteed to have the first 8
  45          * bytes of the transport header, so we only check the rest of the
  46          * TCP packet for non-ICMP packets
  47          */
  48         if (likely(!ip_vs_iph_icmp(iph))) {
  49                 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
  50                 if (th) {
  51                         if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
  52                                 return 1;
  53                         ports = &th->source;
  54                 }
  55         } else {
  56                 ports = skb_header_pointer(
  57                         skb, iph->len, sizeof(_ports), &_ports);
  58         }
  59 
  60         if (!ports) {
  61                 *verdict = NF_DROP;
  62                 return 0;
  63         }
  64 
  65         /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
  66 
  67         if (likely(!ip_vs_iph_inverse(iph)))
  68                 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  69                                          &iph->daddr, ports[1]);
  70         else
  71                 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  72                                          &iph->saddr, ports[0]);
  73 
  74         if (svc) {
  75                 int ignored;
  76 
  77                 if (ip_vs_todrop(ipvs)) {
  78                         /*
  79                          * It seems that we are very loaded.
  80                          * We have to drop this packet :(
  81                          */
  82                         *verdict = NF_DROP;
  83                         return 0;
  84                 }
  85 
  86                 /*
  87                  * Let the virtual server select a real server for the
  88                  * incoming connection, and create a connection entry.
  89                  */
  90                 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
  91                 if (!*cpp && ignored <= 0) {
  92                         if (!ignored)
  93                                 *verdict = ip_vs_leave(svc, skb, pd, iph);
  94                         else
  95                                 *verdict = NF_DROP;
  96                         return 0;
  97                 }
  98         }
  99         /* NF_ACCEPT */
 100         return 1;
 101 }
 102 
 103 
 104 static inline void
 105 tcp_fast_csum_update(int af, struct tcphdr *tcph,
 106                      const union nf_inet_addr *oldip,
 107                      const union nf_inet_addr *newip,
 108                      __be16 oldport, __be16 newport)
 109 {
 110 #ifdef CONFIG_IP_VS_IPV6
 111         if (af == AF_INET6)
 112                 tcph->check =
 113                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 114                                          ip_vs_check_diff2(oldport, newport,
 115                                                 ~csum_unfold(tcph->check))));
 116         else
 117 #endif
 118         tcph->check =
 119                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 120                                  ip_vs_check_diff2(oldport, newport,
 121                                                 ~csum_unfold(tcph->check))));
 122 }
 123 
 124 
 125 static inline void
 126 tcp_partial_csum_update(int af, struct tcphdr *tcph,
 127                      const union nf_inet_addr *oldip,
 128                      const union nf_inet_addr *newip,
 129                      __be16 oldlen, __be16 newlen)
 130 {
 131 #ifdef CONFIG_IP_VS_IPV6
 132         if (af == AF_INET6)
 133                 tcph->check =
 134                         ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 135                                          ip_vs_check_diff2(oldlen, newlen,
 136                                                 csum_unfold(tcph->check))));
 137         else
 138 #endif
 139         tcph->check =
 140                 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 141                                 ip_vs_check_diff2(oldlen, newlen,
 142                                                 csum_unfold(tcph->check))));
 143 }
 144 
 145 
 146 INDIRECT_CALLABLE_SCOPE int
 147 tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 148                  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 149 {
 150         struct tcphdr *tcph;
 151         unsigned int tcphoff = iph->len;
 152         bool payload_csum = false;
 153         int oldlen;
 154 
 155 #ifdef CONFIG_IP_VS_IPV6
 156         if (cp->af == AF_INET6 && iph->fragoffs)
 157                 return 1;
 158 #endif
 159         oldlen = skb->len - tcphoff;
 160 
 161         /* csum_check requires unshared skb */
 162         if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
 163                 return 0;
 164 
 165         if (unlikely(cp->app != NULL)) {
 166                 int ret;
 167 
 168                 /* Some checks before mangling */
 169                 if (!tcp_csum_check(cp->af, skb, pp))
 170                         return 0;
 171 
 172                 /* Call application helper if needed */
 173                 if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
 174                         return 0;
 175                 /* ret=2: csum update is needed after payload mangling */
 176                 if (ret == 1)
 177                         oldlen = skb->len - tcphoff;
 178                 else
 179                         payload_csum = true;
 180         }
 181 
 182         tcph = (void *)skb_network_header(skb) + tcphoff;
 183         tcph->source = cp->vport;
 184 
 185         /* Adjust TCP checksums */
 186         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 187                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 188                                         htons(oldlen),
 189                                         htons(skb->len - tcphoff));
 190         } else if (!payload_csum) {
 191                 /* Only port and addr are changed, do fast csum update */
 192                 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 193                                      cp->dport, cp->vport);
 194                 if (skb->ip_summed == CHECKSUM_COMPLETE)
 195                         skb->ip_summed = cp->app ?
 196                                          CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 197         } else {
 198                 /* full checksum calculation */
 199                 tcph->check = 0;
 200                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 201 #ifdef CONFIG_IP_VS_IPV6
 202                 if (cp->af == AF_INET6)
 203                         tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
 204                                                       &cp->caddr.in6,
 205                                                       skb->len - tcphoff,
 206                                                       cp->protocol, skb->csum);
 207                 else
 208 #endif
 209                         tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
 210                                                         cp->caddr.ip,
 211                                                         skb->len - tcphoff,
 212                                                         cp->protocol,
 213                                                         skb->csum);
 214                 skb->ip_summed = CHECKSUM_UNNECESSARY;
 215 
 216                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 217                           pp->name, tcph->check,
 218                           (char*)&(tcph->check) - (char*)tcph);
 219         }
 220         return 1;
 221 }
 222 
 223 
 224 static int
 225 tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 226                  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 227 {
 228         struct tcphdr *tcph;
 229         unsigned int tcphoff = iph->len;
 230         bool payload_csum = false;
 231         int oldlen;
 232 
 233 #ifdef CONFIG_IP_VS_IPV6
 234         if (cp->af == AF_INET6 && iph->fragoffs)
 235                 return 1;
 236 #endif
 237         oldlen = skb->len - tcphoff;
 238 
 239         /* csum_check requires unshared skb */
 240         if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
 241                 return 0;
 242 
 243         if (unlikely(cp->app != NULL)) {
 244                 int ret;
 245 
 246                 /* Some checks before mangling */
 247                 if (!tcp_csum_check(cp->af, skb, pp))
 248                         return 0;
 249 
 250                 /*
 251                  *      Attempt ip_vs_app call.
 252                  *      It will fix ip_vs_conn and iph ack_seq stuff
 253                  */
 254                 if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
 255                         return 0;
 256                 /* ret=2: csum update is needed after payload mangling */
 257                 if (ret == 1)
 258                         oldlen = skb->len - tcphoff;
 259                 else
 260                         payload_csum = true;
 261         }
 262 
 263         tcph = (void *)skb_network_header(skb) + tcphoff;
 264         tcph->dest = cp->dport;
 265 
 266         /*
 267          *      Adjust TCP checksums
 268          */
 269         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 270                 tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 271                                         htons(oldlen),
 272                                         htons(skb->len - tcphoff));
 273         } else if (!payload_csum) {
 274                 /* Only port and addr are changed, do fast csum update */
 275                 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 276                                      cp->vport, cp->dport);
 277                 if (skb->ip_summed == CHECKSUM_COMPLETE)
 278                         skb->ip_summed = cp->app ?
 279                                          CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 280         } else {
 281                 /* full checksum calculation */
 282                 tcph->check = 0;
 283                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 284 #ifdef CONFIG_IP_VS_IPV6
 285                 if (cp->af == AF_INET6)
 286                         tcph->check = csum_ipv6_magic(&cp->caddr.in6,
 287                                                       &cp->daddr.in6,
 288                                                       skb->len - tcphoff,
 289                                                       cp->protocol, skb->csum);
 290                 else
 291 #endif
 292                         tcph->check = csum_tcpudp_magic(cp->caddr.ip,
 293                                                         cp->daddr.ip,
 294                                                         skb->len - tcphoff,
 295                                                         cp->protocol,
 296                                                         skb->csum);
 297                 skb->ip_summed = CHECKSUM_UNNECESSARY;
 298         }
 299         return 1;
 300 }
 301 
 302 
 303 static int
 304 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 305 {
 306         unsigned int tcphoff;
 307 
 308 #ifdef CONFIG_IP_VS_IPV6
 309         if (af == AF_INET6)
 310                 tcphoff = sizeof(struct ipv6hdr);
 311         else
 312 #endif
 313                 tcphoff = ip_hdrlen(skb);
 314 
 315         switch (skb->ip_summed) {
 316         case CHECKSUM_NONE:
 317                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 318                 /* fall through */
 319         case CHECKSUM_COMPLETE:
 320 #ifdef CONFIG_IP_VS_IPV6
 321                 if (af == AF_INET6) {
 322                         if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 323                                             &ipv6_hdr(skb)->daddr,
 324                                             skb->len - tcphoff,
 325                                             ipv6_hdr(skb)->nexthdr,
 326                                             skb->csum)) {
 327                                 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 328                                                  "Failed checksum for");
 329                                 return 0;
 330                         }
 331                 } else
 332 #endif
 333                         if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
 334                                               ip_hdr(skb)->daddr,
 335                                               skb->len - tcphoff,
 336                                               ip_hdr(skb)->protocol,
 337                                               skb->csum)) {
 338                                 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 339                                                  "Failed checksum for");
 340                                 return 0;
 341                         }
 342                 break;
 343         default:
 344                 /* No need to checksum. */
 345                 break;
 346         }
 347 
 348         return 1;
 349 }
 350 
 351 
 352 #define TCP_DIR_INPUT           0
 353 #define TCP_DIR_OUTPUT          4
 354 #define TCP_DIR_INPUT_ONLY      8
 355 
 356 static const int tcp_state_off[IP_VS_DIR_LAST] = {
 357         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
 358         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
 359         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
 360 };
 361 
 362 /*
 363  *      Timeout table[state]
 364  */
 365 static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 366         [IP_VS_TCP_S_NONE]              =       2*HZ,
 367         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
 368         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
 369         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
 370         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
 371         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
 372         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
 373         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
 374         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
 375         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
 376         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
 377         [IP_VS_TCP_S_LAST]              =       2*HZ,
 378 };
 379 
 380 static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
 381         [IP_VS_TCP_S_NONE]              =       "NONE",
 382         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
 383         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
 384         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
 385         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
 386         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
 387         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
 388         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
 389         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
 390         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
 391         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
 392         [IP_VS_TCP_S_LAST]              =       "BUG!",
 393 };
 394 
 395 static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = {
 396         [IP_VS_TCP_S_NONE]              =       false,
 397         [IP_VS_TCP_S_ESTABLISHED]       =       true,
 398         [IP_VS_TCP_S_SYN_SENT]          =       true,
 399         [IP_VS_TCP_S_SYN_RECV]          =       true,
 400         [IP_VS_TCP_S_FIN_WAIT]          =       false,
 401         [IP_VS_TCP_S_TIME_WAIT]         =       false,
 402         [IP_VS_TCP_S_CLOSE]             =       false,
 403         [IP_VS_TCP_S_CLOSE_WAIT]        =       false,
 404         [IP_VS_TCP_S_LAST_ACK]          =       false,
 405         [IP_VS_TCP_S_LISTEN]            =       false,
 406         [IP_VS_TCP_S_SYNACK]            =       true,
 407 };
 408 
 409 #define sNO IP_VS_TCP_S_NONE
 410 #define sES IP_VS_TCP_S_ESTABLISHED
 411 #define sSS IP_VS_TCP_S_SYN_SENT
 412 #define sSR IP_VS_TCP_S_SYN_RECV
 413 #define sFW IP_VS_TCP_S_FIN_WAIT
 414 #define sTW IP_VS_TCP_S_TIME_WAIT
 415 #define sCL IP_VS_TCP_S_CLOSE
 416 #define sCW IP_VS_TCP_S_CLOSE_WAIT
 417 #define sLA IP_VS_TCP_S_LAST_ACK
 418 #define sLI IP_VS_TCP_S_LISTEN
 419 #define sSA IP_VS_TCP_S_SYNACK
 420 
 421 struct tcp_states_t {
 422         int next_state[IP_VS_TCP_S_LAST];
 423 };
 424 
 425 static const char * tcp_state_name(int state)
 426 {
 427         if (state >= IP_VS_TCP_S_LAST)
 428                 return "ERR!";
 429         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
 430 }
 431 
 432 static bool tcp_state_active(int state)
 433 {
 434         if (state >= IP_VS_TCP_S_LAST)
 435                 return false;
 436         return tcp_state_active_table[state];
 437 }
 438 
 439 static struct tcp_states_t tcp_states[] = {
 440 /*      INPUT */
 441 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
 442 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
 443 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
 444 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 445 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
 446 
 447 /*      OUTPUT */
 448 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
 449 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
 450 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
 451 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
 452 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
 453 
 454 /*      INPUT-ONLY */
 455 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
 456 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
 457 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
 458 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 459 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 460 };
 461 
 462 static struct tcp_states_t tcp_states_dos[] = {
 463 /*      INPUT */
 464 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
 465 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
 466 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
 467 /*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
 468 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 469 
 470 /*      OUTPUT */
 471 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
 472 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
 473 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
 474 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
 475 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
 476 
 477 /*      INPUT-ONLY */
 478 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
 479 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
 480 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
 481 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 482 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 483 };
 484 
 485 static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
 486 {
 487         int on = (flags & 1);           /* secure_tcp */
 488 
 489         /*
 490         ** FIXME: change secure_tcp to independent sysctl var
 491         ** or make it per-service or per-app because it is valid
 492         ** for most if not for all of the applications. Something
 493         ** like "capabilities" (flags) for each object.
 494         */
 495         pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
 496 }
 497 
 498 static inline int tcp_state_idx(struct tcphdr *th)
 499 {
 500         if (th->rst)
 501                 return 3;
 502         if (th->syn)
 503                 return 0;
 504         if (th->fin)
 505                 return 1;
 506         if (th->ack)
 507                 return 2;
 508         return -1;
 509 }
 510 
 511 static inline void
 512 set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 513               int direction, struct tcphdr *th)
 514 {
 515         int state_idx;
 516         int new_state = IP_VS_TCP_S_CLOSE;
 517         int state_off = tcp_state_off[direction];
 518 
 519         /*
 520          *    Update state offset to INPUT_ONLY if necessary
 521          *    or delete NO_OUTPUT flag if output packet detected
 522          */
 523         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
 524                 if (state_off == TCP_DIR_OUTPUT)
 525                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
 526                 else
 527                         state_off = TCP_DIR_INPUT_ONLY;
 528         }
 529 
 530         if ((state_idx = tcp_state_idx(th)) < 0) {
 531                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
 532                 goto tcp_state_out;
 533         }
 534 
 535         new_state =
 536                 pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
 537 
 538   tcp_state_out:
 539         if (new_state != cp->state) {
 540                 struct ip_vs_dest *dest = cp->dest;
 541 
 542                 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
 543                               "%s:%d state: %s->%s conn->refcnt:%d\n",
 544                               pd->pp->name,
 545                               ((state_off == TCP_DIR_OUTPUT) ?
 546                                "output " : "input "),
 547                               th->syn ? 'S' : '.',
 548                               th->fin ? 'F' : '.',
 549                               th->ack ? 'A' : '.',
 550                               th->rst ? 'R' : '.',
 551                               IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
 552                               ntohs(cp->dport),
 553                               IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 554                               ntohs(cp->cport),
 555                               tcp_state_name(cp->state),
 556                               tcp_state_name(new_state),
 557                               refcount_read(&cp->refcnt));
 558 
 559                 if (dest) {
 560                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
 561                             !tcp_state_active(new_state)) {
 562                                 atomic_dec(&dest->activeconns);
 563                                 atomic_inc(&dest->inactconns);
 564                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
 565                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
 566                                    tcp_state_active(new_state)) {
 567                                 atomic_inc(&dest->activeconns);
 568                                 atomic_dec(&dest->inactconns);
 569                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 570                         }
 571                 }
 572                 if (new_state == IP_VS_TCP_S_ESTABLISHED)
 573                         ip_vs_control_assure_ct(cp);
 574         }
 575 
 576         if (likely(pd))
 577                 cp->timeout = pd->timeout_table[cp->state = new_state];
 578         else    /* What to do ? */
 579                 cp->timeout = tcp_timeouts[cp->state = new_state];
 580 }
 581 
 582 /*
 583  *      Handle state transitions
 584  */
 585 static void
 586 tcp_state_transition(struct ip_vs_conn *cp, int direction,
 587                      const struct sk_buff *skb,
 588                      struct ip_vs_proto_data *pd)
 589 {
 590         struct tcphdr _tcph, *th;
 591 
 592 #ifdef CONFIG_IP_VS_IPV6
 593         int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
 594 #else
 595         int ihl = ip_hdrlen(skb);
 596 #endif
 597 
 598         th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
 599         if (th == NULL)
 600                 return;
 601 
 602         spin_lock_bh(&cp->lock);
 603         set_tcp_state(pd, cp, direction, th);
 604         spin_unlock_bh(&cp->lock);
 605 }
 606 
 607 static inline __u16 tcp_app_hashkey(__be16 port)
 608 {
 609         return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
 610                 & TCP_APP_TAB_MASK;
 611 }
 612 
 613 
 614 static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 615 {
 616         struct ip_vs_app *i;
 617         __u16 hash;
 618         __be16 port = inc->port;
 619         int ret = 0;
 620         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 621 
 622         hash = tcp_app_hashkey(port);
 623 
 624         list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
 625                 if (i->port == port) {
 626                         ret = -EEXIST;
 627                         goto out;
 628                 }
 629         }
 630         list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
 631         atomic_inc(&pd->appcnt);
 632 
 633   out:
 634         return ret;
 635 }
 636 
 637 
 638 static void
 639 tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 640 {
 641         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 642 
 643         atomic_dec(&pd->appcnt);
 644         list_del_rcu(&inc->p_list);
 645 }
 646 
 647 
 648 static int
 649 tcp_app_conn_bind(struct ip_vs_conn *cp)
 650 {
 651         struct netns_ipvs *ipvs = cp->ipvs;
 652         int hash;
 653         struct ip_vs_app *inc;
 654         int result = 0;
 655 
 656         /* Default binding: bind app only for NAT */
 657         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
 658                 return 0;
 659 
 660         /* Lookup application incarnations and bind the right one */
 661         hash = tcp_app_hashkey(cp->vport);
 662 
 663         list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
 664                 if (inc->port == cp->vport) {
 665                         if (unlikely(!ip_vs_app_inc_get(inc)))
 666                                 break;
 667 
 668                         IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 669                                       "%s:%u to app %s on port %u\n",
 670                                       __func__,
 671                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 672                                       ntohs(cp->cport),
 673                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 674                                       ntohs(cp->vport),
 675                                       inc->name, ntohs(inc->port));
 676 
 677                         cp->app = inc;
 678                         if (inc->init_conn)
 679                                 result = inc->init_conn(inc, cp);
 680                         break;
 681                 }
 682         }
 683 
 684         return result;
 685 }
 686 
 687 
 688 /*
 689  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
 690  */
 691 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
 692 {
 693         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
 694 
 695         spin_lock_bh(&cp->lock);
 696         cp->state = IP_VS_TCP_S_LISTEN;
 697         cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
 698                            : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
 699         spin_unlock_bh(&cp->lock);
 700 }
 701 
 702 /* ---------------------------------------------
 703  *   timeouts is netns related now.
 704  * ---------------------------------------------
 705  */
 706 static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 707 {
 708         ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
 709         pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
 710                                                         sizeof(tcp_timeouts));
 711         if (!pd->timeout_table)
 712                 return -ENOMEM;
 713         pd->tcp_state_table = tcp_states;
 714         return 0;
 715 }
 716 
 717 static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 718 {
 719         kfree(pd->timeout_table);
 720 }
 721 
 722 
 723 struct ip_vs_protocol ip_vs_protocol_tcp = {
 724         .name =                 "TCP",
 725         .protocol =             IPPROTO_TCP,
 726         .num_states =           IP_VS_TCP_S_LAST,
 727         .dont_defrag =          0,
 728         .init =                 NULL,
 729         .exit =                 NULL,
 730         .init_netns =           __ip_vs_tcp_init,
 731         .exit_netns =           __ip_vs_tcp_exit,
 732         .register_app =         tcp_register_app,
 733         .unregister_app =       tcp_unregister_app,
 734         .conn_schedule =        tcp_conn_schedule,
 735         .conn_in_get =          ip_vs_conn_in_get_proto,
 736         .conn_out_get =         ip_vs_conn_out_get_proto,
 737         .snat_handler =         tcp_snat_handler,
 738         .dnat_handler =         tcp_dnat_handler,
 739         .state_name =           tcp_state_name,
 740         .state_transition =     tcp_state_transition,
 741         .app_conn_bind =        tcp_app_conn_bind,
 742         .debug_packet =         ip_vs_tcpudp_debug_packet,
 743         .timeout_change =       tcp_timeout_change,
 744 };

/* [<][>][^][v][top][bottom][index][help] */