1/* 2 * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> 3 * (C) 2011 Intra2net AG <http://www.intra2net.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation (or any later at your option). 8 */ 9#include <linux/init.h> 10#include <linux/module.h> 11#include <linux/kernel.h> 12#include <linux/skbuff.h> 13#include <linux/atomic.h> 14#include <linux/netlink.h> 15#include <linux/rculist.h> 16#include <linux/slab.h> 17#include <linux/types.h> 18#include <linux/errno.h> 19#include <net/netlink.h> 20#include <net/sock.h> 21 22#include <linux/netfilter.h> 23#include <linux/netfilter/nfnetlink.h> 24#include <linux/netfilter/nfnetlink_acct.h> 25 26MODULE_LICENSE("GPL"); 27MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 28MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); 29 30struct nf_acct { 31 atomic64_t pkts; 32 atomic64_t bytes; 33 unsigned long flags; 34 struct list_head head; 35 atomic_t refcnt; 36 char name[NFACCT_NAME_MAX]; 37 struct rcu_head rcu_head; 38 char data[0]; 39}; 40 41struct nfacct_filter { 42 u32 value; 43 u32 mask; 44}; 45 46#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) 47#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ 48 49static int 50nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, 51 const struct nlmsghdr *nlh, const struct nlattr * const tb[]) 52{ 53 struct nf_acct *nfacct, *matching = NULL; 54 struct net *net = sock_net(nfnl); 55 char *acct_name; 56 unsigned int size = 0; 57 u32 flags = 0; 58 59 if (!tb[NFACCT_NAME]) 60 return -EINVAL; 61 62 acct_name = nla_data(tb[NFACCT_NAME]); 63 if (strlen(acct_name) == 0) 64 return -EINVAL; 65 66 list_for_each_entry(nfacct, &net->nfnl_acct_list, head) { 67 if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) 68 continue; 69 70 if (nlh->nlmsg_flags & NLM_F_EXCL) 71 return -EEXIST; 72 73 matching = nfacct; 74 break; 75 } 76 77 if (matching) { 78 if (nlh->nlmsg_flags & NLM_F_REPLACE) { 79 /* reset counters if you request a replacement. */ 80 atomic64_set(&matching->pkts, 0); 81 atomic64_set(&matching->bytes, 0); 82 smp_mb__before_atomic(); 83 /* reset overquota flag if quota is enabled. */ 84 if ((matching->flags & NFACCT_F_QUOTA)) 85 clear_bit(NFACCT_OVERQUOTA_BIT, 86 &matching->flags); 87 return 0; 88 } 89 return -EBUSY; 90 } 91 92 if (tb[NFACCT_FLAGS]) { 93 flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); 94 if (flags & ~NFACCT_F_QUOTA) 95 return -EOPNOTSUPP; 96 if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) 97 return -EINVAL; 98 if (flags & NFACCT_F_OVERQUOTA) 99 return -EINVAL; 100 101 size += sizeof(u64); 102 } 103 104 nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); 105 if (nfacct == NULL) 106 return -ENOMEM; 107 108 if (flags & NFACCT_F_QUOTA) { 109 u64 *quota = (u64 *)nfacct->data; 110 111 *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); 112 nfacct->flags = flags; 113 } 114 115 strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); 116 117 if (tb[NFACCT_BYTES]) { 118 atomic64_set(&nfacct->bytes, 119 be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES]))); 120 } 121 if (tb[NFACCT_PKTS]) { 122 atomic64_set(&nfacct->pkts, 123 be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); 124 } 125 atomic_set(&nfacct->refcnt, 1); 126 list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); 127 return 0; 128} 129 130static int 131nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, 132 int event, struct nf_acct *acct) 133{ 134 struct nlmsghdr *nlh; 135 struct nfgenmsg *nfmsg; 136 unsigned int flags = portid ? NLM_F_MULTI : 0; 137 u64 pkts, bytes; 138 u32 old_flags; 139 140 event |= NFNL_SUBSYS_ACCT << 8; 141 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); 142 if (nlh == NULL) 143 goto nlmsg_failure; 144 145 nfmsg = nlmsg_data(nlh); 146 nfmsg->nfgen_family = AF_UNSPEC; 147 nfmsg->version = NFNETLINK_V0; 148 nfmsg->res_id = 0; 149 150 if (nla_put_string(skb, NFACCT_NAME, acct->name)) 151 goto nla_put_failure; 152 153 old_flags = acct->flags; 154 if (type == NFNL_MSG_ACCT_GET_CTRZERO) { 155 pkts = atomic64_xchg(&acct->pkts, 0); 156 bytes = atomic64_xchg(&acct->bytes, 0); 157 smp_mb__before_atomic(); 158 if (acct->flags & NFACCT_F_QUOTA) 159 clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags); 160 } else { 161 pkts = atomic64_read(&acct->pkts); 162 bytes = atomic64_read(&acct->bytes); 163 } 164 if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) || 165 nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || 166 nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) 167 goto nla_put_failure; 168 if (acct->flags & NFACCT_F_QUOTA) { 169 u64 *quota = (u64 *)acct->data; 170 171 if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) || 172 nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) 173 goto nla_put_failure; 174 } 175 nlmsg_end(skb, nlh); 176 return skb->len; 177 178nlmsg_failure: 179nla_put_failure: 180 nlmsg_cancel(skb, nlh); 181 return -1; 182} 183 184static int 185nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) 186{ 187 struct net *net = sock_net(skb->sk); 188 struct nf_acct *cur, *last; 189 const struct nfacct_filter *filter = cb->data; 190 191 if (cb->args[2]) 192 return 0; 193 194 last = (struct nf_acct *)cb->args[1]; 195 if (cb->args[1]) 196 cb->args[1] = 0; 197 198 rcu_read_lock(); 199 list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { 200 if (last) { 201 if (cur != last) 202 continue; 203 204 last = NULL; 205 } 206 207 if (filter && (cur->flags & filter->mask) != filter->value) 208 continue; 209 210 if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, 211 cb->nlh->nlmsg_seq, 212 NFNL_MSG_TYPE(cb->nlh->nlmsg_type), 213 NFNL_MSG_ACCT_NEW, cur) < 0) { 214 cb->args[1] = (unsigned long)cur; 215 break; 216 } 217 } 218 if (!cb->args[1]) 219 cb->args[2] = 1; 220 rcu_read_unlock(); 221 return skb->len; 222} 223 224static int nfnl_acct_done(struct netlink_callback *cb) 225{ 226 kfree(cb->data); 227 return 0; 228} 229 230static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = { 231 [NFACCT_FILTER_MASK] = { .type = NLA_U32 }, 232 [NFACCT_FILTER_VALUE] = { .type = NLA_U32 }, 233}; 234 235static struct nfacct_filter * 236nfacct_filter_alloc(const struct nlattr * const attr) 237{ 238 struct nfacct_filter *filter; 239 struct nlattr *tb[NFACCT_FILTER_MAX + 1]; 240 int err; 241 242 err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy); 243 if (err < 0) 244 return ERR_PTR(err); 245 246 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); 247 if (!filter) 248 return ERR_PTR(-ENOMEM); 249 250 filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK])); 251 filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE])); 252 253 return filter; 254} 255 256static int 257nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, 258 const struct nlmsghdr *nlh, const struct nlattr * const tb[]) 259{ 260 struct net *net = sock_net(nfnl); 261 int ret = -ENOENT; 262 struct nf_acct *cur; 263 char *acct_name; 264 265 if (nlh->nlmsg_flags & NLM_F_DUMP) { 266 struct netlink_dump_control c = { 267 .dump = nfnl_acct_dump, 268 .done = nfnl_acct_done, 269 }; 270 271 if (tb[NFACCT_FILTER]) { 272 struct nfacct_filter *filter; 273 274 filter = nfacct_filter_alloc(tb[NFACCT_FILTER]); 275 if (IS_ERR(filter)) 276 return PTR_ERR(filter); 277 278 c.data = filter; 279 } 280 return netlink_dump_start(nfnl, skb, nlh, &c); 281 } 282 283 if (!tb[NFACCT_NAME]) 284 return -EINVAL; 285 acct_name = nla_data(tb[NFACCT_NAME]); 286 287 list_for_each_entry(cur, &net->nfnl_acct_list, head) { 288 struct sk_buff *skb2; 289 290 if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) 291 continue; 292 293 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 294 if (skb2 == NULL) { 295 ret = -ENOMEM; 296 break; 297 } 298 299 ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, 300 nlh->nlmsg_seq, 301 NFNL_MSG_TYPE(nlh->nlmsg_type), 302 NFNL_MSG_ACCT_NEW, cur); 303 if (ret <= 0) { 304 kfree_skb(skb2); 305 break; 306 } 307 ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, 308 MSG_DONTWAIT); 309 if (ret > 0) 310 ret = 0; 311 312 /* this avoids a loop in nfnetlink. */ 313 return ret == -EAGAIN ? -ENOBUFS : ret; 314 } 315 return ret; 316} 317 318/* try to delete object, fail if it is still in use. */ 319static int nfnl_acct_try_del(struct nf_acct *cur) 320{ 321 int ret = 0; 322 323 /* we want to avoid races with nfnl_acct_find_get. */ 324 if (atomic_dec_and_test(&cur->refcnt)) { 325 /* We are protected by nfnl mutex. */ 326 list_del_rcu(&cur->head); 327 kfree_rcu(cur, rcu_head); 328 } else { 329 /* still in use, restore reference counter. */ 330 atomic_inc(&cur->refcnt); 331 ret = -EBUSY; 332 } 333 return ret; 334} 335 336static int 337nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, 338 const struct nlmsghdr *nlh, const struct nlattr * const tb[]) 339{ 340 struct net *net = sock_net(nfnl); 341 char *acct_name; 342 struct nf_acct *cur; 343 int ret = -ENOENT; 344 345 if (!tb[NFACCT_NAME]) { 346 list_for_each_entry(cur, &net->nfnl_acct_list, head) 347 nfnl_acct_try_del(cur); 348 349 return 0; 350 } 351 acct_name = nla_data(tb[NFACCT_NAME]); 352 353 list_for_each_entry(cur, &net->nfnl_acct_list, head) { 354 if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) 355 continue; 356 357 ret = nfnl_acct_try_del(cur); 358 if (ret < 0) 359 return ret; 360 361 break; 362 } 363 return ret; 364} 365 366static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { 367 [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, 368 [NFACCT_BYTES] = { .type = NLA_U64 }, 369 [NFACCT_PKTS] = { .type = NLA_U64 }, 370 [NFACCT_FLAGS] = { .type = NLA_U32 }, 371 [NFACCT_QUOTA] = { .type = NLA_U64 }, 372 [NFACCT_FILTER] = {.type = NLA_NESTED }, 373}; 374 375static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { 376 [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new, 377 .attr_count = NFACCT_MAX, 378 .policy = nfnl_acct_policy }, 379 [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get, 380 .attr_count = NFACCT_MAX, 381 .policy = nfnl_acct_policy }, 382 [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get, 383 .attr_count = NFACCT_MAX, 384 .policy = nfnl_acct_policy }, 385 [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, 386 .attr_count = NFACCT_MAX, 387 .policy = nfnl_acct_policy }, 388}; 389 390static const struct nfnetlink_subsystem nfnl_acct_subsys = { 391 .name = "acct", 392 .subsys_id = NFNL_SUBSYS_ACCT, 393 .cb_count = NFNL_MSG_ACCT_MAX, 394 .cb = nfnl_acct_cb, 395}; 396 397MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); 398 399struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) 400{ 401 struct nf_acct *cur, *acct = NULL; 402 403 rcu_read_lock(); 404 list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { 405 if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) 406 continue; 407 408 if (!try_module_get(THIS_MODULE)) 409 goto err; 410 411 if (!atomic_inc_not_zero(&cur->refcnt)) { 412 module_put(THIS_MODULE); 413 goto err; 414 } 415 416 acct = cur; 417 break; 418 } 419err: 420 rcu_read_unlock(); 421 return acct; 422} 423EXPORT_SYMBOL_GPL(nfnl_acct_find_get); 424 425void nfnl_acct_put(struct nf_acct *acct) 426{ 427 if (atomic_dec_and_test(&acct->refcnt)) 428 kfree_rcu(acct, rcu_head); 429 430 module_put(THIS_MODULE); 431} 432EXPORT_SYMBOL_GPL(nfnl_acct_put); 433 434void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) 435{ 436 atomic64_inc(&nfacct->pkts); 437 atomic64_add(skb->len, &nfacct->bytes); 438} 439EXPORT_SYMBOL_GPL(nfnl_acct_update); 440 441static void nfnl_overquota_report(struct nf_acct *nfacct) 442{ 443 int ret; 444 struct sk_buff *skb; 445 446 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 447 if (skb == NULL) 448 return; 449 450 ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, 451 nfacct); 452 if (ret <= 0) { 453 kfree_skb(skb); 454 return; 455 } 456 netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, 457 GFP_ATOMIC); 458} 459 460int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) 461{ 462 u64 now; 463 u64 *quota; 464 int ret = NFACCT_UNDERQUOTA; 465 466 /* no place here if we don't have a quota */ 467 if (!(nfacct->flags & NFACCT_F_QUOTA)) 468 return NFACCT_NO_QUOTA; 469 470 quota = (u64 *)nfacct->data; 471 now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? 472 atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); 473 474 ret = now > *quota; 475 476 if (now >= *quota && 477 !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) { 478 nfnl_overquota_report(nfacct); 479 } 480 481 return ret; 482} 483EXPORT_SYMBOL_GPL(nfnl_acct_overquota); 484 485static int __net_init nfnl_acct_net_init(struct net *net) 486{ 487 INIT_LIST_HEAD(&net->nfnl_acct_list); 488 489 return 0; 490} 491 492static void __net_exit nfnl_acct_net_exit(struct net *net) 493{ 494 struct nf_acct *cur, *tmp; 495 496 list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { 497 list_del_rcu(&cur->head); 498 499 if (atomic_dec_and_test(&cur->refcnt)) 500 kfree_rcu(cur, rcu_head); 501 } 502} 503 504static struct pernet_operations nfnl_acct_ops = { 505 .init = nfnl_acct_net_init, 506 .exit = nfnl_acct_net_exit, 507}; 508 509static int __init nfnl_acct_init(void) 510{ 511 int ret; 512 513 ret = register_pernet_subsys(&nfnl_acct_ops); 514 if (ret < 0) { 515 pr_err("nfnl_acct_init: failed to register pernet ops\n"); 516 goto err_out; 517 } 518 519 pr_info("nfnl_acct: registering with nfnetlink.\n"); 520 ret = nfnetlink_subsys_register(&nfnl_acct_subsys); 521 if (ret < 0) { 522 pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); 523 goto cleanup_pernet; 524 } 525 return 0; 526 527cleanup_pernet: 528 unregister_pernet_subsys(&nfnl_acct_ops); 529err_out: 530 return ret; 531} 532 533static void __exit nfnl_acct_exit(void) 534{ 535 pr_info("nfnl_acct: unregistering from nfnetlink.\n"); 536 nfnetlink_subsys_unregister(&nfnl_acct_subsys); 537 unregister_pernet_subsys(&nfnl_acct_ops); 538} 539 540module_init(nfnl_acct_init); 541module_exit(nfnl_acct_exit); 542