1/* 2 * xt_HMARK - Netfilter module to set mark by means of hashing 3 * 4 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> 5 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 */ 11 12#include <linux/module.h> 13#include <linux/skbuff.h> 14#include <linux/icmp.h> 15 16#include <linux/netfilter/x_tables.h> 17#include <linux/netfilter/xt_HMARK.h> 18 19#include <net/ip.h> 20#if IS_ENABLED(CONFIG_NF_CONNTRACK) 21#include <net/netfilter/nf_conntrack.h> 22#endif 23#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 24#include <net/ipv6.h> 25#include <linux/netfilter_ipv6/ip6_tables.h> 26#endif 27 28MODULE_LICENSE("GPL"); 29MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); 30MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); 31MODULE_ALIAS("ipt_HMARK"); 32MODULE_ALIAS("ip6t_HMARK"); 33 34struct hmark_tuple { 35 __be32 src; 36 __be32 dst; 37 union hmark_ports uports; 38 u8 proto; 39}; 40 41static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) 42{ 43 return (addr32[0] & mask[0]) ^ 44 (addr32[1] & mask[1]) ^ 45 (addr32[2] & mask[2]) ^ 46 (addr32[3] & mask[3]); 47} 48 49static inline __be32 50hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) 51{ 52 switch (l3num) { 53 case AF_INET: 54 return *addr32 & *mask; 55 case AF_INET6: 56 return hmark_addr6_mask(addr32, mask); 57 } 58 return 0; 59} 60 61static inline void hmark_swap_ports(union hmark_ports *uports, 62 const struct xt_hmark_info *info) 63{ 64 union hmark_ports hp; 65 u16 src, dst; 66 67 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; 68 src = ntohs(hp.b16.src); 69 dst = ntohs(hp.b16.dst); 70 71 if (dst > src) 72 uports->v32 = (dst << 16) | src; 73 else 74 uports->v32 = (src << 16) | dst; 75} 76 77static int 78hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, 79 const struct xt_hmark_info *info) 80{ 81#if IS_ENABLED(CONFIG_NF_CONNTRACK) 82 enum ip_conntrack_info ctinfo; 83 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 84 struct nf_conntrack_tuple *otuple; 85 struct nf_conntrack_tuple *rtuple; 86 87 if (ct == NULL || nf_ct_is_untracked(ct)) 88 return -1; 89 90 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 91 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 92 93 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, 94 info->src_mask.ip6); 95 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, 96 info->dst_mask.ip6); 97 98 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 99 return 0; 100 101 t->proto = nf_ct_protonum(ct); 102 if (t->proto != IPPROTO_ICMP) { 103 t->uports.b16.src = otuple->src.u.all; 104 t->uports.b16.dst = rtuple->src.u.all; 105 hmark_swap_ports(&t->uports, info); 106 } 107 108 return 0; 109#else 110 return -1; 111#endif 112} 113 114/* This hash function is endian independent, to ensure consistent hashing if 115 * the cluster is composed of big and little endian systems. */ 116static inline u32 117hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) 118{ 119 u32 hash; 120 u32 src = ntohl(t->src); 121 u32 dst = ntohl(t->dst); 122 123 if (dst < src) 124 swap(src, dst); 125 126 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); 127 hash = hash ^ (t->proto & info->proto_mask); 128 129 return reciprocal_scale(hash, info->hmodulus) + info->hoffset; 130} 131 132static void 133hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, 134 struct hmark_tuple *t, const struct xt_hmark_info *info) 135{ 136 int protoff; 137 138 protoff = proto_ports_offset(t->proto); 139 if (protoff < 0) 140 return; 141 142 nhoff += protoff; 143 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) 144 return; 145 146 hmark_swap_ports(&t->uports, info); 147} 148 149#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 150static int get_inner6_hdr(const struct sk_buff *skb, int *offset) 151{ 152 struct icmp6hdr *icmp6h, _ih6; 153 154 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); 155 if (icmp6h == NULL) 156 return 0; 157 158 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { 159 *offset += sizeof(struct icmp6hdr); 160 return 1; 161 } 162 return 0; 163} 164 165static int 166hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, 167 const struct xt_hmark_info *info) 168{ 169 struct ipv6hdr *ip6, _ip6; 170 int flag = IP6_FH_F_AUTH; 171 unsigned int nhoff = 0; 172 u16 fragoff = 0; 173 int nexthdr; 174 175 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); 176 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 177 if (nexthdr < 0) 178 return 0; 179 /* No need to check for icmp errors on fragments */ 180 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) 181 goto noicmp; 182 /* Use inner header in case of ICMP errors */ 183 if (get_inner6_hdr(skb, &nhoff)) { 184 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); 185 if (ip6 == NULL) 186 return -1; 187 /* If AH present, use SPI like in ESP. */ 188 flag = IP6_FH_F_AUTH; 189 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 190 if (nexthdr < 0) 191 return -1; 192 } 193noicmp: 194 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); 195 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); 196 197 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 198 return 0; 199 200 t->proto = nexthdr; 201 if (t->proto == IPPROTO_ICMPV6) 202 return 0; 203 204 if (flag & IP6_FH_F_FRAG) 205 return 0; 206 207 hmark_set_tuple_ports(skb, nhoff, t, info); 208 return 0; 209} 210 211static unsigned int 212hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) 213{ 214 const struct xt_hmark_info *info = par->targinfo; 215 struct hmark_tuple t; 216 217 memset(&t, 0, sizeof(struct hmark_tuple)); 218 219 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 220 if (hmark_ct_set_htuple(skb, &t, info) < 0) 221 return XT_CONTINUE; 222 } else { 223 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) 224 return XT_CONTINUE; 225 } 226 227 skb->mark = hmark_hash(&t, info); 228 return XT_CONTINUE; 229} 230#endif 231 232static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) 233{ 234 const struct icmphdr *icmph; 235 struct icmphdr _ih; 236 237 /* Not enough header? */ 238 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); 239 if (icmph == NULL || icmph->type > NR_ICMP_TYPES) 240 return 0; 241 242 /* Error message? */ 243 if (icmph->type != ICMP_DEST_UNREACH && 244 icmph->type != ICMP_SOURCE_QUENCH && 245 icmph->type != ICMP_TIME_EXCEEDED && 246 icmph->type != ICMP_PARAMETERPROB && 247 icmph->type != ICMP_REDIRECT) 248 return 0; 249 250 *nhoff += iphsz + sizeof(_ih); 251 return 1; 252} 253 254static int 255hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, 256 const struct xt_hmark_info *info) 257{ 258 struct iphdr *ip, _ip; 259 int nhoff = skb_network_offset(skb); 260 261 ip = (struct iphdr *) (skb->data + nhoff); 262 if (ip->protocol == IPPROTO_ICMP) { 263 /* Use inner header in case of ICMP errors */ 264 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { 265 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); 266 if (ip == NULL) 267 return -1; 268 } 269 } 270 271 t->src = ip->saddr & info->src_mask.ip; 272 t->dst = ip->daddr & info->dst_mask.ip; 273 274 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 275 return 0; 276 277 t->proto = ip->protocol; 278 279 /* ICMP has no ports, skip */ 280 if (t->proto == IPPROTO_ICMP) 281 return 0; 282 283 /* follow-up fragments don't contain ports, skip all fragments */ 284 if (ip->frag_off & htons(IP_MF | IP_OFFSET)) 285 return 0; 286 287 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); 288 289 return 0; 290} 291 292static unsigned int 293hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) 294{ 295 const struct xt_hmark_info *info = par->targinfo; 296 struct hmark_tuple t; 297 298 memset(&t, 0, sizeof(struct hmark_tuple)); 299 300 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 301 if (hmark_ct_set_htuple(skb, &t, info) < 0) 302 return XT_CONTINUE; 303 } else { 304 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) 305 return XT_CONTINUE; 306 } 307 308 skb->mark = hmark_hash(&t, info); 309 return XT_CONTINUE; 310} 311 312static int hmark_tg_check(const struct xt_tgchk_param *par) 313{ 314 const struct xt_hmark_info *info = par->targinfo; 315 316 if (!info->hmodulus) { 317 pr_info("xt_HMARK: hash modulus can't be zero\n"); 318 return -EINVAL; 319 } 320 if (info->proto_mask && 321 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { 322 pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); 323 return -EINVAL; 324 } 325 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && 326 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | 327 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { 328 pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); 329 return -EINVAL; 330 } 331 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && 332 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | 333 XT_HMARK_FLAG(XT_HMARK_DPORT)))) { 334 pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); 335 return -EINVAL; 336 } 337 return 0; 338} 339 340static struct xt_target hmark_tg_reg[] __read_mostly = { 341 { 342 .name = "HMARK", 343 .family = NFPROTO_IPV4, 344 .target = hmark_tg_v4, 345 .targetsize = sizeof(struct xt_hmark_info), 346 .checkentry = hmark_tg_check, 347 .me = THIS_MODULE, 348 }, 349#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 350 { 351 .name = "HMARK", 352 .family = NFPROTO_IPV6, 353 .target = hmark_tg_v6, 354 .targetsize = sizeof(struct xt_hmark_info), 355 .checkentry = hmark_tg_check, 356 .me = THIS_MODULE, 357 }, 358#endif 359}; 360 361static int __init hmark_tg_init(void) 362{ 363 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 364} 365 366static void __exit hmark_tg_exit(void) 367{ 368 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 369} 370 371module_init(hmark_tg_init); 372module_exit(hmark_tg_exit); 373