1#include <net/tcp.h> 2#include <net/tcp_memcontrol.h> 3#include <net/sock.h> 4#include <net/ip.h> 5#include <linux/nsproxy.h> 6#include <linux/memcontrol.h> 7#include <linux/module.h> 8 9int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 10{ 11 /* 12 * The root cgroup does not use page_counters, but rather, 13 * rely on the data already collected by the network 14 * subsystem 15 */ 16 struct mem_cgroup *parent = parent_mem_cgroup(memcg); 17 struct page_counter *counter_parent = NULL; 18 struct cg_proto *cg_proto, *parent_cg; 19 20 cg_proto = tcp_prot.proto_cgroup(memcg); 21 if (!cg_proto) 22 return 0; 23 24 cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; 25 cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; 26 cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; 27 cg_proto->memory_pressure = 0; 28 cg_proto->memcg = memcg; 29 30 parent_cg = tcp_prot.proto_cgroup(parent); 31 if (parent_cg) 32 counter_parent = &parent_cg->memory_allocated; 33 34 page_counter_init(&cg_proto->memory_allocated, counter_parent); 35 percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); 36 37 return 0; 38} 39EXPORT_SYMBOL(tcp_init_cgroup); 40 41void tcp_destroy_cgroup(struct mem_cgroup *memcg) 42{ 43 struct cg_proto *cg_proto; 44 45 cg_proto = tcp_prot.proto_cgroup(memcg); 46 if (!cg_proto) 47 return; 48 49 percpu_counter_destroy(&cg_proto->sockets_allocated); 50 51 if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) 52 static_key_slow_dec(&memcg_socket_limit_enabled); 53 54} 55EXPORT_SYMBOL(tcp_destroy_cgroup); 56 57static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) 58{ 59 struct cg_proto *cg_proto; 60 int i; 61 int ret; 62 63 cg_proto = tcp_prot.proto_cgroup(memcg); 64 if (!cg_proto) 65 return -EINVAL; 66 67 ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); 68 if (ret) 69 return ret; 70 71 for (i = 0; i < 3; i++) 72 cg_proto->sysctl_mem[i] = min_t(long, nr_pages, 73 sysctl_tcp_mem[i]); 74 75 if (nr_pages == PAGE_COUNTER_MAX) 76 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); 77 else { 78 /* 79 * The active bit needs to be written after the static_key 80 * update. This is what guarantees that the socket activation 81 * function is the last one to run. See sock_update_memcg() for 82 * details, and note that we don't mark any socket as belonging 83 * to this memcg until that flag is up. 84 * 85 * We need to do this, because static_keys will span multiple 86 * sites, but we can't control their order. If we mark a socket 87 * as accounted, but the accounting functions are not patched in 88 * yet, we'll lose accounting. 89 * 90 * We never race with the readers in sock_update_memcg(), 91 * because when this value change, the code to process it is not 92 * patched in yet. 93 * 94 * The activated bit is used to guarantee that no two writers 95 * will do the update in the same memcg. Without that, we can't 96 * properly shutdown the static key. 97 */ 98 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) 99 static_key_slow_inc(&memcg_socket_limit_enabled); 100 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); 101 } 102 103 return 0; 104} 105 106enum { 107 RES_USAGE, 108 RES_LIMIT, 109 RES_MAX_USAGE, 110 RES_FAILCNT, 111}; 112 113static DEFINE_MUTEX(tcp_limit_mutex); 114 115static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, 116 char *buf, size_t nbytes, loff_t off) 117{ 118 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); 119 unsigned long nr_pages; 120 int ret = 0; 121 122 buf = strstrip(buf); 123 124 switch (of_cft(of)->private) { 125 case RES_LIMIT: 126 /* see memcontrol.c */ 127 ret = page_counter_memparse(buf, "-1", &nr_pages); 128 if (ret) 129 break; 130 mutex_lock(&tcp_limit_mutex); 131 ret = tcp_update_limit(memcg, nr_pages); 132 mutex_unlock(&tcp_limit_mutex); 133 break; 134 default: 135 ret = -EINVAL; 136 break; 137 } 138 return ret ?: nbytes; 139} 140 141static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) 142{ 143 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 144 struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); 145 u64 val; 146 147 switch (cft->private) { 148 case RES_LIMIT: 149 if (!cg_proto) 150 return PAGE_COUNTER_MAX; 151 val = cg_proto->memory_allocated.limit; 152 val *= PAGE_SIZE; 153 break; 154 case RES_USAGE: 155 if (!cg_proto) 156 val = atomic_long_read(&tcp_memory_allocated); 157 else 158 val = page_counter_read(&cg_proto->memory_allocated); 159 val *= PAGE_SIZE; 160 break; 161 case RES_FAILCNT: 162 if (!cg_proto) 163 return 0; 164 val = cg_proto->memory_allocated.failcnt; 165 break; 166 case RES_MAX_USAGE: 167 if (!cg_proto) 168 return 0; 169 val = cg_proto->memory_allocated.watermark; 170 val *= PAGE_SIZE; 171 break; 172 default: 173 BUG(); 174 } 175 return val; 176} 177 178static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, 179 char *buf, size_t nbytes, loff_t off) 180{ 181 struct mem_cgroup *memcg; 182 struct cg_proto *cg_proto; 183 184 memcg = mem_cgroup_from_css(of_css(of)); 185 cg_proto = tcp_prot.proto_cgroup(memcg); 186 if (!cg_proto) 187 return nbytes; 188 189 switch (of_cft(of)->private) { 190 case RES_MAX_USAGE: 191 page_counter_reset_watermark(&cg_proto->memory_allocated); 192 break; 193 case RES_FAILCNT: 194 cg_proto->memory_allocated.failcnt = 0; 195 break; 196 } 197 198 return nbytes; 199} 200 201static struct cftype tcp_files[] = { 202 { 203 .name = "kmem.tcp.limit_in_bytes", 204 .write = tcp_cgroup_write, 205 .read_u64 = tcp_cgroup_read, 206 .private = RES_LIMIT, 207 }, 208 { 209 .name = "kmem.tcp.usage_in_bytes", 210 .read_u64 = tcp_cgroup_read, 211 .private = RES_USAGE, 212 }, 213 { 214 .name = "kmem.tcp.failcnt", 215 .private = RES_FAILCNT, 216 .write = tcp_cgroup_reset, 217 .read_u64 = tcp_cgroup_read, 218 }, 219 { 220 .name = "kmem.tcp.max_usage_in_bytes", 221 .private = RES_MAX_USAGE, 222 .write = tcp_cgroup_reset, 223 .read_u64 = tcp_cgroup_read, 224 }, 225 { } /* terminate */ 226}; 227 228static int __init tcp_memcontrol_init(void) 229{ 230 WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); 231 return 0; 232} 233__initcall(tcp_memcontrol_init); 234