This source file includes following definitions.
- rds_cong_tree_walk
- rds_cong_from_addr
- rds_cong_add_conn
- rds_cong_remove_conn
- rds_cong_get_maps
- rds_cong_queue_updates
- rds_cong_map_updated
- rds_cong_updated_since
- rds_cong_set_bit
- rds_cong_clear_bit
- rds_cong_test_bit
- rds_cong_add_socket
- rds_cong_remove_socket
- rds_cong_wait
- rds_cong_exit
- rds_cong_update_alloc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 #include <linux/slab.h>
34 #include <linux/types.h>
35 #include <linux/rbtree.h>
36 #include <linux/bitops.h>
37 #include <linux/export.h>
38
39 #include "rds.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 static atomic_t rds_cong_generation = ATOMIC_INIT(0);
82
83
84
85
86 static LIST_HEAD(rds_cong_monitor);
87 static DEFINE_RWLOCK(rds_cong_monitor_lock);
88
89
90
91
92
93
94
95
96
97
98
99
100
101 static DEFINE_SPINLOCK(rds_cong_lock);
102 static struct rb_root rds_cong_tree = RB_ROOT;
103
104 static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr,
105 struct rds_cong_map *insert)
106 {
107 struct rb_node **p = &rds_cong_tree.rb_node;
108 struct rb_node *parent = NULL;
109 struct rds_cong_map *map;
110
111 while (*p) {
112 int diff;
113
114 parent = *p;
115 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
116
117 diff = rds_addr_cmp(addr, &map->m_addr);
118 if (diff < 0)
119 p = &(*p)->rb_left;
120 else if (diff > 0)
121 p = &(*p)->rb_right;
122 else
123 return map;
124 }
125
126 if (insert) {
127 rb_link_node(&insert->m_rb_node, parent, p);
128 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
129 }
130 return NULL;
131 }
132
133
134
135
136
137
138 static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr)
139 {
140 struct rds_cong_map *map;
141 struct rds_cong_map *ret = NULL;
142 unsigned long zp;
143 unsigned long i;
144 unsigned long flags;
145
146 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
147 if (!map)
148 return NULL;
149
150 map->m_addr = *addr;
151 init_waitqueue_head(&map->m_waitq);
152 INIT_LIST_HEAD(&map->m_conn_list);
153
154 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
155 zp = get_zeroed_page(GFP_KERNEL);
156 if (zp == 0)
157 goto out;
158 map->m_page_addrs[i] = zp;
159 }
160
161 spin_lock_irqsave(&rds_cong_lock, flags);
162 ret = rds_cong_tree_walk(addr, map);
163 spin_unlock_irqrestore(&rds_cong_lock, flags);
164
165 if (!ret) {
166 ret = map;
167 map = NULL;
168 }
169
170 out:
171 if (map) {
172 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
173 free_page(map->m_page_addrs[i]);
174 kfree(map);
175 }
176
177 rdsdebug("map %p for addr %pI6c\n", ret, addr);
178
179 return ret;
180 }
181
182
183
184
185
186 void rds_cong_add_conn(struct rds_connection *conn)
187 {
188 unsigned long flags;
189
190 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
191 spin_lock_irqsave(&rds_cong_lock, flags);
192 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
193 spin_unlock_irqrestore(&rds_cong_lock, flags);
194 }
195
196 void rds_cong_remove_conn(struct rds_connection *conn)
197 {
198 unsigned long flags;
199
200 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
201 spin_lock_irqsave(&rds_cong_lock, flags);
202 list_del_init(&conn->c_map_item);
203 spin_unlock_irqrestore(&rds_cong_lock, flags);
204 }
205
206 int rds_cong_get_maps(struct rds_connection *conn)
207 {
208 conn->c_lcong = rds_cong_from_addr(&conn->c_laddr);
209 conn->c_fcong = rds_cong_from_addr(&conn->c_faddr);
210
211 if (!(conn->c_lcong && conn->c_fcong))
212 return -ENOMEM;
213
214 return 0;
215 }
216
217 void rds_cong_queue_updates(struct rds_cong_map *map)
218 {
219 struct rds_connection *conn;
220 unsigned long flags;
221
222 spin_lock_irqsave(&rds_cong_lock, flags);
223
224 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
225 struct rds_conn_path *cp = &conn->c_path[0];
226
227 rcu_read_lock();
228 if (!test_and_set_bit(0, &conn->c_map_queued) &&
229 !rds_destroy_pending(cp->cp_conn)) {
230 rds_stats_inc(s_cong_update_queued);
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
246 }
247 rcu_read_unlock();
248 }
249
250 spin_unlock_irqrestore(&rds_cong_lock, flags);
251 }
252
253 void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
254 {
255 rdsdebug("waking map %p for %pI4\n",
256 map, &map->m_addr);
257 rds_stats_inc(s_cong_update_received);
258 atomic_inc(&rds_cong_generation);
259 if (waitqueue_active(&map->m_waitq))
260 wake_up(&map->m_waitq);
261 if (waitqueue_active(&rds_poll_waitq))
262 wake_up_all(&rds_poll_waitq);
263
264 if (portmask && !list_empty(&rds_cong_monitor)) {
265 unsigned long flags;
266 struct rds_sock *rs;
267
268 read_lock_irqsave(&rds_cong_monitor_lock, flags);
269 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
270 spin_lock(&rs->rs_lock);
271 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
272 rs->rs_cong_mask &= ~portmask;
273 spin_unlock(&rs->rs_lock);
274 if (rs->rs_cong_notify)
275 rds_wake_sk_sleep(rs);
276 }
277 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
278 }
279 }
280 EXPORT_SYMBOL_GPL(rds_cong_map_updated);
281
282 int rds_cong_updated_since(unsigned long *recent)
283 {
284 unsigned long gen = atomic_read(&rds_cong_generation);
285
286 if (likely(*recent == gen))
287 return 0;
288 *recent = gen;
289 return 1;
290 }
291
292
293
294
295
296
297
298
299 void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
300 {
301 unsigned long i;
302 unsigned long off;
303
304 rdsdebug("setting congestion for %pI4:%u in map %p\n",
305 &map->m_addr, ntohs(port), map);
306
307 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
308 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
309
310 set_bit_le(off, (void *)map->m_page_addrs[i]);
311 }
312
313 void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
314 {
315 unsigned long i;
316 unsigned long off;
317
318 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
319 &map->m_addr, ntohs(port), map);
320
321 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
322 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
323
324 clear_bit_le(off, (void *)map->m_page_addrs[i]);
325 }
326
327 static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
328 {
329 unsigned long i;
330 unsigned long off;
331
332 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
333 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
334
335 return test_bit_le(off, (void *)map->m_page_addrs[i]);
336 }
337
338 void rds_cong_add_socket(struct rds_sock *rs)
339 {
340 unsigned long flags;
341
342 write_lock_irqsave(&rds_cong_monitor_lock, flags);
343 if (list_empty(&rs->rs_cong_list))
344 list_add(&rs->rs_cong_list, &rds_cong_monitor);
345 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
346 }
347
348 void rds_cong_remove_socket(struct rds_sock *rs)
349 {
350 unsigned long flags;
351 struct rds_cong_map *map;
352
353 write_lock_irqsave(&rds_cong_monitor_lock, flags);
354 list_del_init(&rs->rs_cong_list);
355 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
356
357
358 spin_lock_irqsave(&rds_cong_lock, flags);
359 map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL);
360 spin_unlock_irqrestore(&rds_cong_lock, flags);
361
362 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
363 rds_cong_clear_bit(map, rs->rs_bound_port);
364 rds_cong_queue_updates(map);
365 }
366 }
367
368 int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
369 struct rds_sock *rs)
370 {
371 if (!rds_cong_test_bit(map, port))
372 return 0;
373 if (nonblock) {
374 if (rs && rs->rs_cong_monitor) {
375 unsigned long flags;
376
377
378
379 spin_lock_irqsave(&rs->rs_lock, flags);
380 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
381 spin_unlock_irqrestore(&rs->rs_lock, flags);
382
383
384
385 if (!rds_cong_test_bit(map, port))
386 return 0;
387 }
388 rds_stats_inc(s_cong_send_error);
389 return -ENOBUFS;
390 }
391
392 rds_stats_inc(s_cong_send_blocked);
393 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
394
395 return wait_event_interruptible(map->m_waitq,
396 !rds_cong_test_bit(map, port));
397 }
398
399 void rds_cong_exit(void)
400 {
401 struct rb_node *node;
402 struct rds_cong_map *map;
403 unsigned long i;
404
405 while ((node = rb_first(&rds_cong_tree))) {
406 map = rb_entry(node, struct rds_cong_map, m_rb_node);
407 rdsdebug("freeing map %p\n", map);
408 rb_erase(&map->m_rb_node, &rds_cong_tree);
409 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
410 free_page(map->m_page_addrs[i]);
411 kfree(map);
412 }
413 }
414
415
416
417
418 struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
419 {
420 struct rds_cong_map *map = conn->c_lcong;
421 struct rds_message *rm;
422
423 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
424 if (!IS_ERR(rm))
425 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
426
427 return rm;
428 }