This source file includes following definitions.
- bictcp_reset
- bictcp_clock
- bictcp_hystart_reset
- bictcp_init
- bictcp_cwnd_event
- cubic_root
- bictcp_update
- bictcp_cong_avoid
- bictcp_recalc_ssthresh
- bictcp_state
- hystart_update
- bictcp_acked
- cubictcp_register
- cubictcp_unregister
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include <linux/mm.h>
28 #include <linux/module.h>
29 #include <linux/math64.h>
30 #include <net/tcp.h>
31
32 #define BICTCP_BETA_SCALE 1024
33
34
35 #define BICTCP_HZ 10
36
37
38 #define HYSTART_ACK_TRAIN 0x1
39 #define HYSTART_DELAY 0x2
40
41
42 #define HYSTART_MIN_SAMPLES 8
43 #define HYSTART_DELAY_MIN (4U<<3)
44 #define HYSTART_DELAY_MAX (16U<<3)
45 #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
46
47 static int fast_convergence __read_mostly = 1;
48 static int beta __read_mostly = 717;
49 static int initial_ssthresh __read_mostly;
50 static int bic_scale __read_mostly = 41;
51 static int tcp_friendliness __read_mostly = 1;
52
53 static int hystart __read_mostly = 1;
54 static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
55 static int hystart_low_window __read_mostly = 16;
56 static int hystart_ack_delta __read_mostly = 2;
57
58 static u32 cube_rtt_scale __read_mostly;
59 static u32 beta_scale __read_mostly;
60 static u64 cube_factor __read_mostly;
61
62
63 module_param(fast_convergence, int, 0644);
64 MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
65 module_param(beta, int, 0644);
66 MODULE_PARM_DESC(beta, "beta for multiplicative increase");
67 module_param(initial_ssthresh, int, 0644);
68 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
69 module_param(bic_scale, int, 0444);
70 MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
71 module_param(tcp_friendliness, int, 0644);
72 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
73 module_param(hystart, int, 0644);
74 MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
75 module_param(hystart_detect, int, 0644);
76 MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
77 " 1: packet-train 2: delay 3: both packet-train and delay");
78 module_param(hystart_low_window, int, 0644);
79 MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
80 module_param(hystart_ack_delta, int, 0644);
81 MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
82
83
84 struct bictcp {
85 u32 cnt;
86 u32 last_max_cwnd;
87 u32 last_cwnd;
88 u32 last_time;
89 u32 bic_origin_point;
90 u32 bic_K;
91
92 u32 delay_min;
93 u32 epoch_start;
94 u32 ack_cnt;
95 u32 tcp_cwnd;
96 u16 unused;
97 u8 sample_cnt;
98 u8 found;
99 u32 round_start;
100 u32 end_seq;
101 u32 last_ack;
102 u32 curr_rtt;
103 };
104
105 static inline void bictcp_reset(struct bictcp *ca)
106 {
107 ca->cnt = 0;
108 ca->last_max_cwnd = 0;
109 ca->last_cwnd = 0;
110 ca->last_time = 0;
111 ca->bic_origin_point = 0;
112 ca->bic_K = 0;
113 ca->delay_min = 0;
114 ca->epoch_start = 0;
115 ca->ack_cnt = 0;
116 ca->tcp_cwnd = 0;
117 ca->found = 0;
118 }
119
120 static inline u32 bictcp_clock(void)
121 {
122 #if HZ < 1000
123 return ktime_to_ms(ktime_get_real());
124 #else
125 return jiffies_to_msecs(jiffies);
126 #endif
127 }
128
129 static inline void bictcp_hystart_reset(struct sock *sk)
130 {
131 struct tcp_sock *tp = tcp_sk(sk);
132 struct bictcp *ca = inet_csk_ca(sk);
133
134 ca->round_start = ca->last_ack = bictcp_clock();
135 ca->end_seq = tp->snd_nxt;
136 ca->curr_rtt = 0;
137 ca->sample_cnt = 0;
138 }
139
140 static void bictcp_init(struct sock *sk)
141 {
142 struct bictcp *ca = inet_csk_ca(sk);
143
144 bictcp_reset(ca);
145
146 if (hystart)
147 bictcp_hystart_reset(sk);
148
149 if (!hystart && initial_ssthresh)
150 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
151 }
152
153 static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
154 {
155 if (event == CA_EVENT_TX_START) {
156 struct bictcp *ca = inet_csk_ca(sk);
157 u32 now = tcp_jiffies32;
158 s32 delta;
159
160 delta = now - tcp_sk(sk)->lsndtime;
161
162
163
164
165 if (ca->epoch_start && delta > 0) {
166 ca->epoch_start += delta;
167 if (after(ca->epoch_start, now))
168 ca->epoch_start = now;
169 }
170 return;
171 }
172 }
173
174
175
176
177
178 static u32 cubic_root(u64 a)
179 {
180 u32 x, b, shift;
181
182
183
184
185
186
187
188
189 static const u8 v[] = {
190 0, 54, 54, 54, 118, 118, 118, 118,
191 123, 129, 134, 138, 143, 147, 151, 156,
192 157, 161, 164, 168, 170, 173, 176, 179,
193 181, 185, 187, 190, 192, 194, 197, 199,
194 200, 202, 204, 206, 209, 211, 213, 215,
195 217, 219, 221, 222, 224, 225, 227, 229,
196 231, 232, 234, 236, 237, 239, 240, 242,
197 244, 245, 246, 248, 250, 251, 252, 254,
198 };
199
200 b = fls64(a);
201 if (b < 7) {
202
203 return ((u32)v[(u32)a] + 35) >> 6;
204 }
205
206 b = ((b * 84) >> 8) - 1;
207 shift = (a >> (b * 3));
208
209 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
210
211
212
213
214
215
216
217 x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
218 x = ((x * 341) >> 10);
219 return x;
220 }
221
222
223
224
225 static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
226 {
227 u32 delta, bic_target, max_cnt;
228 u64 offs, t;
229
230 ca->ack_cnt += acked;
231
232 if (ca->last_cwnd == cwnd &&
233 (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
234 return;
235
236
237
238
239
240 if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
241 goto tcp_friendliness;
242
243 ca->last_cwnd = cwnd;
244 ca->last_time = tcp_jiffies32;
245
246 if (ca->epoch_start == 0) {
247 ca->epoch_start = tcp_jiffies32;
248 ca->ack_cnt = acked;
249 ca->tcp_cwnd = cwnd;
250
251 if (ca->last_max_cwnd <= cwnd) {
252 ca->bic_K = 0;
253 ca->bic_origin_point = cwnd;
254 } else {
255
256
257
258 ca->bic_K = cubic_root(cube_factor
259 * (ca->last_max_cwnd - cwnd));
260 ca->bic_origin_point = ca->last_max_cwnd;
261 }
262 }
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278 t = (s32)(tcp_jiffies32 - ca->epoch_start);
279 t += msecs_to_jiffies(ca->delay_min >> 3);
280
281 t <<= BICTCP_HZ;
282 do_div(t, HZ);
283
284 if (t < ca->bic_K)
285 offs = ca->bic_K - t;
286 else
287 offs = t - ca->bic_K;
288
289
290 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
291 if (t < ca->bic_K)
292 bic_target = ca->bic_origin_point - delta;
293 else
294 bic_target = ca->bic_origin_point + delta;
295
296
297 if (bic_target > cwnd) {
298 ca->cnt = cwnd / (bic_target - cwnd);
299 } else {
300 ca->cnt = 100 * cwnd;
301 }
302
303
304
305
306
307 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
308 ca->cnt = 20;
309
310 tcp_friendliness:
311
312 if (tcp_friendliness) {
313 u32 scale = beta_scale;
314
315 delta = (cwnd * scale) >> 3;
316 while (ca->ack_cnt > delta) {
317 ca->ack_cnt -= delta;
318 ca->tcp_cwnd++;
319 }
320
321 if (ca->tcp_cwnd > cwnd) {
322 delta = ca->tcp_cwnd - cwnd;
323 max_cnt = cwnd / delta;
324 if (ca->cnt > max_cnt)
325 ca->cnt = max_cnt;
326 }
327 }
328
329
330
331
332 ca->cnt = max(ca->cnt, 2U);
333 }
334
335 static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
336 {
337 struct tcp_sock *tp = tcp_sk(sk);
338 struct bictcp *ca = inet_csk_ca(sk);
339
340 if (!tcp_is_cwnd_limited(sk))
341 return;
342
343 if (tcp_in_slow_start(tp)) {
344 if (hystart && after(ack, ca->end_seq))
345 bictcp_hystart_reset(sk);
346 acked = tcp_slow_start(tp, acked);
347 if (!acked)
348 return;
349 }
350 bictcp_update(ca, tp->snd_cwnd, acked);
351 tcp_cong_avoid_ai(tp, ca->cnt, acked);
352 }
353
354 static u32 bictcp_recalc_ssthresh(struct sock *sk)
355 {
356 const struct tcp_sock *tp = tcp_sk(sk);
357 struct bictcp *ca = inet_csk_ca(sk);
358
359 ca->epoch_start = 0;
360
361
362 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
363 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
364 / (2 * BICTCP_BETA_SCALE);
365 else
366 ca->last_max_cwnd = tp->snd_cwnd;
367
368 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
369 }
370
371 static void bictcp_state(struct sock *sk, u8 new_state)
372 {
373 if (new_state == TCP_CA_Loss) {
374 bictcp_reset(inet_csk_ca(sk));
375 bictcp_hystart_reset(sk);
376 }
377 }
378
379 static void hystart_update(struct sock *sk, u32 delay)
380 {
381 struct tcp_sock *tp = tcp_sk(sk);
382 struct bictcp *ca = inet_csk_ca(sk);
383
384 if (ca->found & hystart_detect)
385 return;
386
387 if (hystart_detect & HYSTART_ACK_TRAIN) {
388 u32 now = bictcp_clock();
389
390
391 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
392 ca->last_ack = now;
393 if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
394 ca->found |= HYSTART_ACK_TRAIN;
395 NET_INC_STATS(sock_net(sk),
396 LINUX_MIB_TCPHYSTARTTRAINDETECT);
397 NET_ADD_STATS(sock_net(sk),
398 LINUX_MIB_TCPHYSTARTTRAINCWND,
399 tp->snd_cwnd);
400 tp->snd_ssthresh = tp->snd_cwnd;
401 }
402 }
403 }
404
405 if (hystart_detect & HYSTART_DELAY) {
406
407 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
408 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
409 ca->curr_rtt = delay;
410
411 ca->sample_cnt++;
412 } else {
413 if (ca->curr_rtt > ca->delay_min +
414 HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
415 ca->found |= HYSTART_DELAY;
416 NET_INC_STATS(sock_net(sk),
417 LINUX_MIB_TCPHYSTARTDELAYDETECT);
418 NET_ADD_STATS(sock_net(sk),
419 LINUX_MIB_TCPHYSTARTDELAYCWND,
420 tp->snd_cwnd);
421 tp->snd_ssthresh = tp->snd_cwnd;
422 }
423 }
424 }
425 }
426
427
428
429
430 static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
431 {
432 const struct tcp_sock *tp = tcp_sk(sk);
433 struct bictcp *ca = inet_csk_ca(sk);
434 u32 delay;
435
436
437 if (sample->rtt_us < 0)
438 return;
439
440
441 if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
442 return;
443
444 delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
445 if (delay == 0)
446 delay = 1;
447
448
449 if (ca->delay_min == 0 || ca->delay_min > delay)
450 ca->delay_min = delay;
451
452
453 if (hystart && tcp_in_slow_start(tp) &&
454 tp->snd_cwnd >= hystart_low_window)
455 hystart_update(sk, delay);
456 }
457
458 static struct tcp_congestion_ops cubictcp __read_mostly = {
459 .init = bictcp_init,
460 .ssthresh = bictcp_recalc_ssthresh,
461 .cong_avoid = bictcp_cong_avoid,
462 .set_state = bictcp_state,
463 .undo_cwnd = tcp_reno_undo_cwnd,
464 .cwnd_event = bictcp_cwnd_event,
465 .pkts_acked = bictcp_acked,
466 .owner = THIS_MODULE,
467 .name = "cubic",
468 };
469
470 static int __init cubictcp_register(void)
471 {
472 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
473
474
475
476
477
478 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
479 / (BICTCP_BETA_SCALE - beta);
480
481 cube_rtt_scale = (bic_scale * 10);
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497 cube_factor = 1ull << (10+3*BICTCP_HZ);
498
499
500 do_div(cube_factor, bic_scale * 10);
501
502 return tcp_register_congestion_control(&cubictcp);
503 }
504
505 static void __exit cubictcp_unregister(void)
506 {
507 tcp_unregister_congestion_control(&cubictcp);
508 }
509
510 module_init(cubictcp_register);
511 module_exit(cubictcp_unregister);
512
513 MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
514 MODULE_LICENSE("GPL");
515 MODULE_DESCRIPTION("CUBIC TCP");
516 MODULE_VERSION("2.3");