This source file includes following definitions.
- tcp_mstamp_refresh
- tcp_event_new_data_sent
- tcp_acceptable_seq
- tcp_advertise_mss
- tcp_cwnd_restart
- tcp_event_data_sent
- tcp_event_ack_sent
- tcp_select_initial_window
- tcp_select_window
- tcp_ecn_send_synack
- tcp_ecn_send_syn
- tcp_ecn_clear_syn
- tcp_ecn_make_synack
- tcp_ecn_send
- tcp_init_nondata_skb
- tcp_urg_mode
- smc_options_write
- tcp_options_write
- smc_set_option
- smc_set_option_cond
- tcp_syn_options
- tcp_synack_options
- tcp_established_options
- tcp_tsq_write
- tcp_tsq_handler
- tcp_tasklet_func
- tcp_release_cb
- tcp_tasklet_init
- tcp_wfree
- tcp_pace_kick
- tcp_update_skb_after_send
- __tcp_transmit_skb
- tcp_transmit_skb
- tcp_queue_skb
- tcp_set_skb_tso_segs
- tcp_adjust_pcount
- tcp_has_tx_tstamp
- tcp_fragment_tstamp
- tcp_skb_fragment_eor
- tcp_insert_write_queue_after
- tcp_fragment
- __pskb_trim_head
- tcp_trim_head
- __tcp_mtu_to_mss
- tcp_mtu_to_mss
- tcp_mss_to_mtu
- tcp_mtup_init
- tcp_sync_mss
- tcp_current_mss
- tcp_cwnd_application_limited
- tcp_cwnd_validate
- tcp_minshall_check
- tcp_minshall_update
- tcp_nagle_check
- tcp_tso_autosize
- tcp_tso_segs
- tcp_mss_split_point
- tcp_cwnd_test
- tcp_init_tso_segs
- tcp_nagle_test
- tcp_snd_wnd_test
- tso_fragment
- tcp_tso_should_defer
- tcp_mtu_check_reprobe
- tcp_can_coalesce_send_queue_head
- tcp_mtu_probe
- tcp_pacing_check
- tcp_small_queue_check
- tcp_chrono_set
- tcp_chrono_start
- tcp_chrono_stop
- tcp_write_xmit
- tcp_schedule_loss_probe
- skb_still_in_host_queue
- tcp_send_loss_probe
- __tcp_push_pending_frames
- tcp_push_one
- __tcp_select_window
- tcp_skb_collapse_tstamp
- tcp_collapse_retrans
- tcp_can_collapse
- tcp_retrans_try_collapse
- __tcp_retransmit_skb
- tcp_retransmit_skb
- tcp_xmit_retransmit_queue
- sk_forced_mem_schedule
- tcp_send_fin
- tcp_send_active_reset
- tcp_send_synack
- tcp_make_synack
- tcp_ca_dst_init
- tcp_connect_init
- tcp_connect_queue_skb
- tcp_send_syn_data
- tcp_connect
- tcp_send_delayed_ack
- __tcp_send_ack
- tcp_send_ack
- tcp_xmit_probe_skb
- tcp_send_window_probe
- tcp_write_wakeup
- tcp_send_probe0
- tcp_rtx_synack
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 #define pr_fmt(fmt) "TCP: " fmt
39
40 #include <net/tcp.h>
41
42 #include <linux/compiler.h>
43 #include <linux/gfp.h>
44 #include <linux/module.h>
45 #include <linux/static_key.h>
46
47 #include <trace/events/tcp.h>
48
49
50
51
52 void tcp_mstamp_refresh(struct tcp_sock *tp)
53 {
54 u64 val = tcp_clock_ns();
55
56 tp->tcp_clock_cache = val;
57 tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC);
58 }
59
60 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
61 int push_one, gfp_t gfp);
62
63
64 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
65 {
66 struct inet_connection_sock *icsk = inet_csk(sk);
67 struct tcp_sock *tp = tcp_sk(sk);
68 unsigned int prior_packets = tp->packets_out;
69
70 WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq);
71
72 __skb_unlink(skb, &sk->sk_write_queue);
73 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
74
75 if (tp->highest_sack == NULL)
76 tp->highest_sack = skb;
77
78 tp->packets_out += tcp_skb_pcount(skb);
79 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
80 tcp_rearm_rto(sk);
81
82 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
83 tcp_skb_pcount(skb));
84 }
85
86
87
88
89
90
91
92
93 static inline __u32 tcp_acceptable_seq(const struct sock *sk)
94 {
95 const struct tcp_sock *tp = tcp_sk(sk);
96
97 if (!before(tcp_wnd_end(tp), tp->snd_nxt) ||
98 (tp->rx_opt.wscale_ok &&
99 ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale))))
100 return tp->snd_nxt;
101 else
102 return tcp_wnd_end(tp);
103 }
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 static __u16 tcp_advertise_mss(struct sock *sk)
120 {
121 struct tcp_sock *tp = tcp_sk(sk);
122 const struct dst_entry *dst = __sk_dst_get(sk);
123 int mss = tp->advmss;
124
125 if (dst) {
126 unsigned int metric = dst_metric_advmss(dst);
127
128 if (metric < mss) {
129 mss = metric;
130 tp->advmss = mss;
131 }
132 }
133
134 return (__u16)mss;
135 }
136
137
138
139
140 void tcp_cwnd_restart(struct sock *sk, s32 delta)
141 {
142 struct tcp_sock *tp = tcp_sk(sk);
143 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
144 u32 cwnd = tp->snd_cwnd;
145
146 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
147
148 tp->snd_ssthresh = tcp_current_ssthresh(sk);
149 restart_cwnd = min(restart_cwnd, cwnd);
150
151 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
152 cwnd >>= 1;
153 tp->snd_cwnd = max(cwnd, restart_cwnd);
154 tp->snd_cwnd_stamp = tcp_jiffies32;
155 tp->snd_cwnd_used = 0;
156 }
157
158
159 static void tcp_event_data_sent(struct tcp_sock *tp,
160 struct sock *sk)
161 {
162 struct inet_connection_sock *icsk = inet_csk(sk);
163 const u32 now = tcp_jiffies32;
164
165 if (tcp_packets_in_flight(tp) == 0)
166 tcp_ca_event(sk, CA_EVENT_TX_START);
167
168
169
170
171
172
173 if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
174 (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
175 inet_csk_inc_pingpong_cnt(sk);
176
177 tp->lsndtime = now;
178 }
179
180
181 static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
182 u32 rcv_nxt)
183 {
184 struct tcp_sock *tp = tcp_sk(sk);
185
186 if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
187 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
188 tp->compressed_ack - TCP_FASTRETRANS_THRESH);
189 tp->compressed_ack = TCP_FASTRETRANS_THRESH;
190 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
191 __sock_put(sk);
192 }
193
194 if (unlikely(rcv_nxt != tp->rcv_nxt))
195 return;
196 tcp_dec_quickack_mode(sk, pkts);
197 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
198 }
199
200
201
202
203
204
205
206
207 void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
208 __u32 *rcv_wnd, __u32 *window_clamp,
209 int wscale_ok, __u8 *rcv_wscale,
210 __u32 init_rcv_wnd)
211 {
212 unsigned int space = (__space < 0 ? 0 : __space);
213
214
215 if (*window_clamp == 0)
216 (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
217 space = min(*window_clamp, space);
218
219
220 if (space > mss)
221 space = rounddown(space, mss);
222
223
224
225
226
227
228
229
230
231 if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
232 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
233 else
234 (*rcv_wnd) = min_t(u32, space, U16_MAX);
235
236 if (init_rcv_wnd)
237 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
238
239 *rcv_wscale = 0;
240 if (wscale_ok) {
241
242 space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
243 space = max_t(u32, space, sysctl_rmem_max);
244 space = min_t(u32, space, *window_clamp);
245 *rcv_wscale = clamp_t(int, ilog2(space) - 15,
246 0, TCP_MAX_WSCALE);
247 }
248
249 (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
250 }
251 EXPORT_SYMBOL(tcp_select_initial_window);
252
253
254
255
256
257
258 static u16 tcp_select_window(struct sock *sk)
259 {
260 struct tcp_sock *tp = tcp_sk(sk);
261 u32 old_win = tp->rcv_wnd;
262 u32 cur_win = tcp_receive_window(tp);
263 u32 new_win = __tcp_select_window(sk);
264
265
266 if (new_win < cur_win) {
267
268
269
270
271
272
273
274 if (new_win == 0)
275 NET_INC_STATS(sock_net(sk),
276 LINUX_MIB_TCPWANTZEROWINDOWADV);
277 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
278 }
279 tp->rcv_wnd = new_win;
280 tp->rcv_wup = tp->rcv_nxt;
281
282
283
284
285 if (!tp->rx_opt.rcv_wscale &&
286 sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
287 new_win = min(new_win, MAX_TCP_WINDOW);
288 else
289 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
290
291
292 new_win >>= tp->rx_opt.rcv_wscale;
293
294
295 if (new_win == 0) {
296 tp->pred_flags = 0;
297 if (old_win)
298 NET_INC_STATS(sock_net(sk),
299 LINUX_MIB_TCPTOZEROWINDOWADV);
300 } else if (old_win == 0) {
301 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
302 }
303
304 return new_win;
305 }
306
307
308 static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
309 {
310 const struct tcp_sock *tp = tcp_sk(sk);
311
312 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
313 if (!(tp->ecn_flags & TCP_ECN_OK))
314 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
315 else if (tcp_ca_needs_ecn(sk) ||
316 tcp_bpf_ca_needs_ecn(sk))
317 INET_ECN_xmit(sk);
318 }
319
320
321 static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
322 {
323 struct tcp_sock *tp = tcp_sk(sk);
324 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
325 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
326 tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
327
328 if (!use_ecn) {
329 const struct dst_entry *dst = __sk_dst_get(sk);
330
331 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
332 use_ecn = true;
333 }
334
335 tp->ecn_flags = 0;
336
337 if (use_ecn) {
338 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
339 tp->ecn_flags = TCP_ECN_OK;
340 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
341 INET_ECN_xmit(sk);
342 }
343 }
344
345 static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
346 {
347 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
348
349
350
351 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
352 }
353
354 static void
355 tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
356 {
357 if (inet_rsk(req)->ecn_ok)
358 th->ece = 1;
359 }
360
361
362
363
364 static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
365 struct tcphdr *th, int tcp_header_len)
366 {
367 struct tcp_sock *tp = tcp_sk(sk);
368
369 if (tp->ecn_flags & TCP_ECN_OK) {
370
371 if (skb->len != tcp_header_len &&
372 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
373 INET_ECN_xmit(sk);
374 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
375 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
376 th->cwr = 1;
377 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
378 }
379 } else if (!tcp_ca_needs_ecn(sk)) {
380
381 INET_ECN_dontxmit(sk);
382 }
383 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
384 th->ece = 1;
385 }
386 }
387
388
389
390
391 static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
392 {
393 skb->ip_summed = CHECKSUM_PARTIAL;
394
395 TCP_SKB_CB(skb)->tcp_flags = flags;
396 TCP_SKB_CB(skb)->sacked = 0;
397
398 tcp_skb_pcount_set(skb, 1);
399
400 TCP_SKB_CB(skb)->seq = seq;
401 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
402 seq++;
403 TCP_SKB_CB(skb)->end_seq = seq;
404 }
405
406 static inline bool tcp_urg_mode(const struct tcp_sock *tp)
407 {
408 return tp->snd_una != tp->snd_up;
409 }
410
411 #define OPTION_SACK_ADVERTISE (1 << 0)
412 #define OPTION_TS (1 << 1)
413 #define OPTION_MD5 (1 << 2)
414 #define OPTION_WSCALE (1 << 3)
415 #define OPTION_FAST_OPEN_COOKIE (1 << 8)
416 #define OPTION_SMC (1 << 9)
417
418 static void smc_options_write(__be32 *ptr, u16 *options)
419 {
420 #if IS_ENABLED(CONFIG_SMC)
421 if (static_branch_unlikely(&tcp_have_smc)) {
422 if (unlikely(OPTION_SMC & *options)) {
423 *ptr++ = htonl((TCPOPT_NOP << 24) |
424 (TCPOPT_NOP << 16) |
425 (TCPOPT_EXP << 8) |
426 (TCPOLEN_EXP_SMC_BASE));
427 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
428 }
429 }
430 #endif
431 }
432
433 struct tcp_out_options {
434 u16 options;
435 u16 mss;
436 u8 ws;
437 u8 num_sack_blocks;
438 u8 hash_size;
439 __u8 *hash_location;
440 __u32 tsval, tsecr;
441 struct tcp_fastopen_cookie *fastopen_cookie;
442 };
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
458 struct tcp_out_options *opts)
459 {
460 u16 options = opts->options;
461
462 if (unlikely(OPTION_MD5 & options)) {
463 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
464 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
465
466 opts->hash_location = (__u8 *)ptr;
467 ptr += 4;
468 }
469
470 if (unlikely(opts->mss)) {
471 *ptr++ = htonl((TCPOPT_MSS << 24) |
472 (TCPOLEN_MSS << 16) |
473 opts->mss);
474 }
475
476 if (likely(OPTION_TS & options)) {
477 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
478 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
479 (TCPOLEN_SACK_PERM << 16) |
480 (TCPOPT_TIMESTAMP << 8) |
481 TCPOLEN_TIMESTAMP);
482 options &= ~OPTION_SACK_ADVERTISE;
483 } else {
484 *ptr++ = htonl((TCPOPT_NOP << 24) |
485 (TCPOPT_NOP << 16) |
486 (TCPOPT_TIMESTAMP << 8) |
487 TCPOLEN_TIMESTAMP);
488 }
489 *ptr++ = htonl(opts->tsval);
490 *ptr++ = htonl(opts->tsecr);
491 }
492
493 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
494 *ptr++ = htonl((TCPOPT_NOP << 24) |
495 (TCPOPT_NOP << 16) |
496 (TCPOPT_SACK_PERM << 8) |
497 TCPOLEN_SACK_PERM);
498 }
499
500 if (unlikely(OPTION_WSCALE & options)) {
501 *ptr++ = htonl((TCPOPT_NOP << 24) |
502 (TCPOPT_WINDOW << 16) |
503 (TCPOLEN_WINDOW << 8) |
504 opts->ws);
505 }
506
507 if (unlikely(opts->num_sack_blocks)) {
508 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
509 tp->duplicate_sack : tp->selective_acks;
510 int this_sack;
511
512 *ptr++ = htonl((TCPOPT_NOP << 24) |
513 (TCPOPT_NOP << 16) |
514 (TCPOPT_SACK << 8) |
515 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
516 TCPOLEN_SACK_PERBLOCK)));
517
518 for (this_sack = 0; this_sack < opts->num_sack_blocks;
519 ++this_sack) {
520 *ptr++ = htonl(sp[this_sack].start_seq);
521 *ptr++ = htonl(sp[this_sack].end_seq);
522 }
523
524 tp->rx_opt.dsack = 0;
525 }
526
527 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
528 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
529 u8 *p = (u8 *)ptr;
530 u32 len;
531
532 if (foc->exp) {
533 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
534 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
535 TCPOPT_FASTOPEN_MAGIC);
536 p += TCPOLEN_EXP_FASTOPEN_BASE;
537 } else {
538 len = TCPOLEN_FASTOPEN_BASE + foc->len;
539 *p++ = TCPOPT_FASTOPEN;
540 *p++ = len;
541 }
542
543 memcpy(p, foc->val, foc->len);
544 if ((len & 3) == 2) {
545 p[foc->len] = TCPOPT_NOP;
546 p[foc->len + 1] = TCPOPT_NOP;
547 }
548 ptr += (len + 3) >> 2;
549 }
550
551 smc_options_write(ptr, &options);
552 }
553
554 static void smc_set_option(const struct tcp_sock *tp,
555 struct tcp_out_options *opts,
556 unsigned int *remaining)
557 {
558 #if IS_ENABLED(CONFIG_SMC)
559 if (static_branch_unlikely(&tcp_have_smc)) {
560 if (tp->syn_smc) {
561 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
562 opts->options |= OPTION_SMC;
563 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
564 }
565 }
566 }
567 #endif
568 }
569
570 static void smc_set_option_cond(const struct tcp_sock *tp,
571 const struct inet_request_sock *ireq,
572 struct tcp_out_options *opts,
573 unsigned int *remaining)
574 {
575 #if IS_ENABLED(CONFIG_SMC)
576 if (static_branch_unlikely(&tcp_have_smc)) {
577 if (tp->syn_smc && ireq->smc_ok) {
578 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
579 opts->options |= OPTION_SMC;
580 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
581 }
582 }
583 }
584 #endif
585 }
586
587
588
589
590 static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
591 struct tcp_out_options *opts,
592 struct tcp_md5sig_key **md5)
593 {
594 struct tcp_sock *tp = tcp_sk(sk);
595 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
597
598 *md5 = NULL;
599 #ifdef CONFIG_TCP_MD5SIG
600 if (static_branch_unlikely(&tcp_md5_needed) &&
601 rcu_access_pointer(tp->md5sig_info)) {
602 *md5 = tp->af_specific->md5_lookup(sk, sk);
603 if (*md5) {
604 opts->options |= OPTION_MD5;
605 remaining -= TCPOLEN_MD5SIG_ALIGNED;
606 }
607 }
608 #endif
609
610
611
612
613
614
615
616
617
618
619 opts->mss = tcp_advertise_mss(sk);
620 remaining -= TCPOLEN_MSS_ALIGNED;
621
622 if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
623 opts->options |= OPTION_TS;
624 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
625 opts->tsecr = tp->rx_opt.ts_recent;
626 remaining -= TCPOLEN_TSTAMP_ALIGNED;
627 }
628 if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
629 opts->ws = tp->rx_opt.rcv_wscale;
630 opts->options |= OPTION_WSCALE;
631 remaining -= TCPOLEN_WSCALE_ALIGNED;
632 }
633 if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
634 opts->options |= OPTION_SACK_ADVERTISE;
635 if (unlikely(!(OPTION_TS & opts->options)))
636 remaining -= TCPOLEN_SACKPERM_ALIGNED;
637 }
638
639 if (fastopen && fastopen->cookie.len >= 0) {
640 u32 need = fastopen->cookie.len;
641
642 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
643 TCPOLEN_FASTOPEN_BASE;
644 need = (need + 3) & ~3U;
645 if (remaining >= need) {
646 opts->options |= OPTION_FAST_OPEN_COOKIE;
647 opts->fastopen_cookie = &fastopen->cookie;
648 remaining -= need;
649 tp->syn_fastopen = 1;
650 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
651 }
652 }
653
654 smc_set_option(tp, opts, &remaining);
655
656 return MAX_TCP_OPTION_SPACE - remaining;
657 }
658
659
660 static unsigned int tcp_synack_options(const struct sock *sk,
661 struct request_sock *req,
662 unsigned int mss, struct sk_buff *skb,
663 struct tcp_out_options *opts,
664 const struct tcp_md5sig_key *md5,
665 struct tcp_fastopen_cookie *foc)
666 {
667 struct inet_request_sock *ireq = inet_rsk(req);
668 unsigned int remaining = MAX_TCP_OPTION_SPACE;
669
670 #ifdef CONFIG_TCP_MD5SIG
671 if (md5) {
672 opts->options |= OPTION_MD5;
673 remaining -= TCPOLEN_MD5SIG_ALIGNED;
674
675
676
677
678
679
680 ireq->tstamp_ok &= !ireq->sack_ok;
681 }
682 #endif
683
684
685 opts->mss = mss;
686 remaining -= TCPOLEN_MSS_ALIGNED;
687
688 if (likely(ireq->wscale_ok)) {
689 opts->ws = ireq->rcv_wscale;
690 opts->options |= OPTION_WSCALE;
691 remaining -= TCPOLEN_WSCALE_ALIGNED;
692 }
693 if (likely(ireq->tstamp_ok)) {
694 opts->options |= OPTION_TS;
695 opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
696 opts->tsecr = req->ts_recent;
697 remaining -= TCPOLEN_TSTAMP_ALIGNED;
698 }
699 if (likely(ireq->sack_ok)) {
700 opts->options |= OPTION_SACK_ADVERTISE;
701 if (unlikely(!ireq->tstamp_ok))
702 remaining -= TCPOLEN_SACKPERM_ALIGNED;
703 }
704 if (foc != NULL && foc->len >= 0) {
705 u32 need = foc->len;
706
707 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
708 TCPOLEN_FASTOPEN_BASE;
709 need = (need + 3) & ~3U;
710 if (remaining >= need) {
711 opts->options |= OPTION_FAST_OPEN_COOKIE;
712 opts->fastopen_cookie = foc;
713 remaining -= need;
714 }
715 }
716
717 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
718
719 return MAX_TCP_OPTION_SPACE - remaining;
720 }
721
722
723
724
725 static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
726 struct tcp_out_options *opts,
727 struct tcp_md5sig_key **md5)
728 {
729 struct tcp_sock *tp = tcp_sk(sk);
730 unsigned int size = 0;
731 unsigned int eff_sacks;
732
733 opts->options = 0;
734
735 *md5 = NULL;
736 #ifdef CONFIG_TCP_MD5SIG
737 if (static_branch_unlikely(&tcp_md5_needed) &&
738 rcu_access_pointer(tp->md5sig_info)) {
739 *md5 = tp->af_specific->md5_lookup(sk, sk);
740 if (*md5) {
741 opts->options |= OPTION_MD5;
742 size += TCPOLEN_MD5SIG_ALIGNED;
743 }
744 }
745 #endif
746
747 if (likely(tp->rx_opt.tstamp_ok)) {
748 opts->options |= OPTION_TS;
749 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
750 opts->tsecr = tp->rx_opt.ts_recent;
751 size += TCPOLEN_TSTAMP_ALIGNED;
752 }
753
754 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
755 if (unlikely(eff_sacks)) {
756 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
757 opts->num_sack_blocks =
758 min_t(unsigned int, eff_sacks,
759 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
760 TCPOLEN_SACK_PERBLOCK);
761 if (likely(opts->num_sack_blocks))
762 size += TCPOLEN_SACK_BASE_ALIGNED +
763 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
764 }
765
766 return size;
767 }
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784 struct tsq_tasklet {
785 struct tasklet_struct tasklet;
786 struct list_head head;
787 };
788 static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
789
790 static void tcp_tsq_write(struct sock *sk)
791 {
792 if ((1 << sk->sk_state) &
793 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
794 TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
795 struct tcp_sock *tp = tcp_sk(sk);
796
797 if (tp->lost_out > tp->retrans_out &&
798 tp->snd_cwnd > tcp_packets_in_flight(tp)) {
799 tcp_mstamp_refresh(tp);
800 tcp_xmit_retransmit_queue(sk);
801 }
802
803 tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
804 0, GFP_ATOMIC);
805 }
806 }
807
808 static void tcp_tsq_handler(struct sock *sk)
809 {
810 bh_lock_sock(sk);
811 if (!sock_owned_by_user(sk))
812 tcp_tsq_write(sk);
813 else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
814 sock_hold(sk);
815 bh_unlock_sock(sk);
816 }
817
818
819
820
821
822
823 static void tcp_tasklet_func(unsigned long data)
824 {
825 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
826 LIST_HEAD(list);
827 unsigned long flags;
828 struct list_head *q, *n;
829 struct tcp_sock *tp;
830 struct sock *sk;
831
832 local_irq_save(flags);
833 list_splice_init(&tsq->head, &list);
834 local_irq_restore(flags);
835
836 list_for_each_safe(q, n, &list) {
837 tp = list_entry(q, struct tcp_sock, tsq_node);
838 list_del(&tp->tsq_node);
839
840 sk = (struct sock *)tp;
841 smp_mb__before_atomic();
842 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
843
844 tcp_tsq_handler(sk);
845 sk_free(sk);
846 }
847 }
848
849 #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
850 TCPF_WRITE_TIMER_DEFERRED | \
851 TCPF_DELACK_TIMER_DEFERRED | \
852 TCPF_MTU_REDUCED_DEFERRED)
853
854
855
856
857
858
859
860 void tcp_release_cb(struct sock *sk)
861 {
862 unsigned long flags, nflags;
863
864
865 do {
866 flags = sk->sk_tsq_flags;
867 if (!(flags & TCP_DEFERRED_ALL))
868 return;
869 nflags = flags & ~TCP_DEFERRED_ALL;
870 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
871
872 if (flags & TCPF_TSQ_DEFERRED) {
873 tcp_tsq_write(sk);
874 __sock_put(sk);
875 }
876
877
878
879
880
881
882
883
884
885 sock_release_ownership(sk);
886
887 if (flags & TCPF_WRITE_TIMER_DEFERRED) {
888 tcp_write_timer_handler(sk);
889 __sock_put(sk);
890 }
891 if (flags & TCPF_DELACK_TIMER_DEFERRED) {
892 tcp_delack_timer_handler(sk);
893 __sock_put(sk);
894 }
895 if (flags & TCPF_MTU_REDUCED_DEFERRED) {
896 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
897 __sock_put(sk);
898 }
899 }
900 EXPORT_SYMBOL(tcp_release_cb);
901
902 void __init tcp_tasklet_init(void)
903 {
904 int i;
905
906 for_each_possible_cpu(i) {
907 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
908
909 INIT_LIST_HEAD(&tsq->head);
910 tasklet_init(&tsq->tasklet,
911 tcp_tasklet_func,
912 (unsigned long)tsq);
913 }
914 }
915
916
917
918
919
920
921 void tcp_wfree(struct sk_buff *skb)
922 {
923 struct sock *sk = skb->sk;
924 struct tcp_sock *tp = tcp_sk(sk);
925 unsigned long flags, nval, oval;
926
927
928
929
930 WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
931
932
933
934
935
936
937
938
939 if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
940 goto out;
941
942 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
943 struct tsq_tasklet *tsq;
944 bool empty;
945
946 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
947 goto out;
948
949 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
950 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
951 if (nval != oval)
952 continue;
953
954
955 local_irq_save(flags);
956 tsq = this_cpu_ptr(&tsq_tasklet);
957 empty = list_empty(&tsq->head);
958 list_add(&tp->tsq_node, &tsq->head);
959 if (empty)
960 tasklet_schedule(&tsq->tasklet);
961 local_irq_restore(flags);
962 return;
963 }
964 out:
965 sk_free(sk);
966 }
967
968
969
970
971 enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
972 {
973 struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
974 struct sock *sk = (struct sock *)tp;
975
976 tcp_tsq_handler(sk);
977 sock_put(sk);
978
979 return HRTIMER_NORESTART;
980 }
981
982 static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
983 u64 prior_wstamp)
984 {
985 struct tcp_sock *tp = tcp_sk(sk);
986
987 if (sk->sk_pacing_status != SK_PACING_NONE) {
988 unsigned long rate = sk->sk_pacing_rate;
989
990
991
992
993
994 if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
995 u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
996 u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
997
998
999 len_ns -= min_t(u64, len_ns / 2, credit);
1000 tp->tcp_wstamp_ns += len_ns;
1001 }
1002 }
1003 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
1004 }
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017 static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1018 int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
1019 {
1020 const struct inet_connection_sock *icsk = inet_csk(sk);
1021 struct inet_sock *inet;
1022 struct tcp_sock *tp;
1023 struct tcp_skb_cb *tcb;
1024 struct tcp_out_options opts;
1025 unsigned int tcp_options_size, tcp_header_size;
1026 struct sk_buff *oskb = NULL;
1027 struct tcp_md5sig_key *md5;
1028 struct tcphdr *th;
1029 u64 prior_wstamp;
1030 int err;
1031
1032 BUG_ON(!skb || !tcp_skb_pcount(skb));
1033 tp = tcp_sk(sk);
1034 prior_wstamp = tp->tcp_wstamp_ns;
1035 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1036 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1037 if (clone_it) {
1038 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1039 - tp->snd_una;
1040 oskb = skb;
1041
1042 tcp_skb_tsorted_save(oskb) {
1043 if (unlikely(skb_cloned(oskb)))
1044 skb = pskb_copy(oskb, gfp_mask);
1045 else
1046 skb = skb_clone(oskb, gfp_mask);
1047 } tcp_skb_tsorted_restore(oskb);
1048
1049 if (unlikely(!skb))
1050 return -ENOBUFS;
1051
1052
1053
1054 skb->dev = NULL;
1055 }
1056
1057 inet = inet_sk(sk);
1058 tcb = TCP_SKB_CB(skb);
1059 memset(&opts, 0, sizeof(opts));
1060
1061 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
1062 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1063 } else {
1064 tcp_options_size = tcp_established_options(sk, skb, &opts,
1065 &md5);
1066
1067
1068
1069
1070
1071
1072
1073
1074 if (tcp_skb_pcount(skb) > 1)
1075 tcb->tcp_flags |= TCPHDR_PSH;
1076 }
1077 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1078
1079
1080
1081
1082
1083
1084
1085
1086 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
1087
1088
1089
1090
1091
1092
1093 skb->pfmemalloc = 0;
1094
1095 skb_push(skb, tcp_header_size);
1096 skb_reset_transport_header(skb);
1097
1098 skb_orphan(skb);
1099 skb->sk = sk;
1100 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
1101 skb_set_hash_from_sk(skb, sk);
1102 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
1103
1104 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
1105
1106
1107 th = (struct tcphdr *)skb->data;
1108 th->source = inet->inet_sport;
1109 th->dest = inet->inet_dport;
1110 th->seq = htonl(tcb->seq);
1111 th->ack_seq = htonl(rcv_nxt);
1112 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1113 tcb->tcp_flags);
1114
1115 th->check = 0;
1116 th->urg_ptr = 0;
1117
1118
1119 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1120 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1121 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1122 th->urg = 1;
1123 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1124 th->urg_ptr = htons(0xFFFF);
1125 th->urg = 1;
1126 }
1127 }
1128
1129 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1130 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1131 if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
1132 th->window = htons(tcp_select_window(sk));
1133 tcp_ecn_send(sk, skb, th, tcp_header_size);
1134 } else {
1135
1136
1137
1138 th->window = htons(min(tp->rcv_wnd, 65535U));
1139 }
1140 #ifdef CONFIG_TCP_MD5SIG
1141
1142 if (md5) {
1143 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1144 tp->af_specific->calc_md5_hash(opts.hash_location,
1145 md5, sk, skb);
1146 }
1147 #endif
1148
1149 icsk->icsk_af_ops->send_check(sk, skb);
1150
1151 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1152 tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
1153
1154 if (skb->len != tcp_header_size) {
1155 tcp_event_data_sent(tp, sk);
1156 tp->data_segs_out += tcp_skb_pcount(skb);
1157 tp->bytes_sent += skb->len - tcp_header_size;
1158 }
1159
1160 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1161 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1162 tcp_skb_pcount(skb));
1163
1164 tp->segs_out += tcp_skb_pcount(skb);
1165
1166 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1167 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1168
1169
1170
1171
1172 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1173 sizeof(struct inet6_skb_parm)));
1174
1175 tcp_add_tx_delay(skb, tp);
1176
1177 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1178
1179 if (unlikely(err > 0)) {
1180 tcp_enter_cwr(sk);
1181 err = net_xmit_eval(err);
1182 }
1183 if (!err && oskb) {
1184 tcp_update_skb_after_send(sk, oskb, prior_wstamp);
1185 tcp_rate_skb_sent(sk, oskb);
1186 }
1187 return err;
1188 }
1189
1190 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1191 gfp_t gfp_mask)
1192 {
1193 return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
1194 tcp_sk(sk)->rcv_nxt);
1195 }
1196
1197
1198
1199
1200
1201
1202 static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1203 {
1204 struct tcp_sock *tp = tcp_sk(sk);
1205
1206
1207 WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
1208 __skb_header_release(skb);
1209 tcp_add_write_queue_tail(sk, skb);
1210 sk_wmem_queued_add(sk, skb->truesize);
1211 sk_mem_charge(sk, skb->truesize);
1212 }
1213
1214
1215 static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1216 {
1217 if (skb->len <= mss_now) {
1218
1219
1220
1221 tcp_skb_pcount_set(skb, 1);
1222 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1223 } else {
1224 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1225 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1226 }
1227 }
1228
1229
1230
1231
1232 static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1233 {
1234 struct tcp_sock *tp = tcp_sk(sk);
1235
1236 tp->packets_out -= decr;
1237
1238 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1239 tp->sacked_out -= decr;
1240 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1241 tp->retrans_out -= decr;
1242 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1243 tp->lost_out -= decr;
1244
1245
1246 if (tcp_is_reno(tp) && decr > 0)
1247 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1248
1249 if (tp->lost_skb_hint &&
1250 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1251 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1252 tp->lost_cnt_hint -= decr;
1253
1254 tcp_verify_left_out(tp);
1255 }
1256
1257 static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
1258 {
1259 return TCP_SKB_CB(skb)->txstamp_ack ||
1260 (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
1261 }
1262
1263 static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1264 {
1265 struct skb_shared_info *shinfo = skb_shinfo(skb);
1266
1267 if (unlikely(tcp_has_tx_tstamp(skb)) &&
1268 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1269 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1270 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1271
1272 shinfo->tx_flags &= ~tsflags;
1273 shinfo2->tx_flags |= tsflags;
1274 swap(shinfo->tskey, shinfo2->tskey);
1275 TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack;
1276 TCP_SKB_CB(skb)->txstamp_ack = 0;
1277 }
1278 }
1279
1280 static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1281 {
1282 TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
1283 TCP_SKB_CB(skb)->eor = 0;
1284 }
1285
1286
1287 static void tcp_insert_write_queue_after(struct sk_buff *skb,
1288 struct sk_buff *buff,
1289 struct sock *sk,
1290 enum tcp_queue tcp_queue)
1291 {
1292 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1293 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1294 else
1295 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1296 }
1297
1298
1299
1300
1301
1302
1303 int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1304 struct sk_buff *skb, u32 len,
1305 unsigned int mss_now, gfp_t gfp)
1306 {
1307 struct tcp_sock *tp = tcp_sk(sk);
1308 struct sk_buff *buff;
1309 int nsize, old_factor;
1310 long limit;
1311 int nlen;
1312 u8 flags;
1313
1314 if (WARN_ON(len > skb->len))
1315 return -EINVAL;
1316
1317 nsize = skb_headlen(skb) - len;
1318 if (nsize < 0)
1319 nsize = 0;
1320
1321
1322
1323
1324
1325
1326 limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
1327 if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
1328 tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
1329 skb != tcp_rtx_queue_head(sk) &&
1330 skb != tcp_rtx_queue_tail(sk))) {
1331 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
1332 return -ENOMEM;
1333 }
1334
1335 if (skb_unclone(skb, gfp))
1336 return -ENOMEM;
1337
1338
1339 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1340 if (!buff)
1341 return -ENOMEM;
1342 skb_copy_decrypted(buff, skb);
1343
1344 sk_wmem_queued_add(sk, buff->truesize);
1345 sk_mem_charge(sk, buff->truesize);
1346 nlen = skb->len - len - nsize;
1347 buff->truesize += nlen;
1348 skb->truesize -= nlen;
1349
1350
1351 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1352 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1353 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1354
1355
1356 flags = TCP_SKB_CB(skb)->tcp_flags;
1357 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1358 TCP_SKB_CB(buff)->tcp_flags = flags;
1359 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1360 tcp_skb_fragment_eor(skb, buff);
1361
1362 skb_split(skb, buff, len);
1363
1364 buff->ip_summed = CHECKSUM_PARTIAL;
1365
1366 buff->tstamp = skb->tstamp;
1367 tcp_fragment_tstamp(skb, buff);
1368
1369 old_factor = tcp_skb_pcount(skb);
1370
1371
1372 tcp_set_skb_tso_segs(skb, mss_now);
1373 tcp_set_skb_tso_segs(buff, mss_now);
1374
1375
1376 TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
1377
1378
1379
1380
1381 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1382 int diff = old_factor - tcp_skb_pcount(skb) -
1383 tcp_skb_pcount(buff);
1384
1385 if (diff)
1386 tcp_adjust_pcount(sk, skb, diff);
1387 }
1388
1389
1390 __skb_header_release(buff);
1391 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1392 if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
1393 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1394
1395 return 0;
1396 }
1397
1398
1399
1400
1401 static int __pskb_trim_head(struct sk_buff *skb, int len)
1402 {
1403 struct skb_shared_info *shinfo;
1404 int i, k, eat;
1405
1406 eat = min_t(int, len, skb_headlen(skb));
1407 if (eat) {
1408 __skb_pull(skb, eat);
1409 len -= eat;
1410 if (!len)
1411 return 0;
1412 }
1413 eat = len;
1414 k = 0;
1415 shinfo = skb_shinfo(skb);
1416 for (i = 0; i < shinfo->nr_frags; i++) {
1417 int size = skb_frag_size(&shinfo->frags[i]);
1418
1419 if (size <= eat) {
1420 skb_frag_unref(skb, i);
1421 eat -= size;
1422 } else {
1423 shinfo->frags[k] = shinfo->frags[i];
1424 if (eat) {
1425 skb_frag_off_add(&shinfo->frags[k], eat);
1426 skb_frag_size_sub(&shinfo->frags[k], eat);
1427 eat = 0;
1428 }
1429 k++;
1430 }
1431 }
1432 shinfo->nr_frags = k;
1433
1434 skb->data_len -= len;
1435 skb->len = skb->data_len;
1436 return len;
1437 }
1438
1439
1440 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1441 {
1442 u32 delta_truesize;
1443
1444 if (skb_unclone(skb, GFP_ATOMIC))
1445 return -ENOMEM;
1446
1447 delta_truesize = __pskb_trim_head(skb, len);
1448
1449 TCP_SKB_CB(skb)->seq += len;
1450 skb->ip_summed = CHECKSUM_PARTIAL;
1451
1452 if (delta_truesize) {
1453 skb->truesize -= delta_truesize;
1454 sk_wmem_queued_add(sk, -delta_truesize);
1455 sk_mem_uncharge(sk, delta_truesize);
1456 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1457 }
1458
1459
1460 if (tcp_skb_pcount(skb) > 1)
1461 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1462
1463 return 0;
1464 }
1465
1466
1467 static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1468 {
1469 const struct tcp_sock *tp = tcp_sk(sk);
1470 const struct inet_connection_sock *icsk = inet_csk(sk);
1471 int mss_now;
1472
1473
1474
1475
1476 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1477
1478
1479 if (icsk->icsk_af_ops->net_frag_header_len) {
1480 const struct dst_entry *dst = __sk_dst_get(sk);
1481
1482 if (dst && dst_allfrag(dst))
1483 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1484 }
1485
1486
1487 if (mss_now > tp->rx_opt.mss_clamp)
1488 mss_now = tp->rx_opt.mss_clamp;
1489
1490
1491 mss_now -= icsk->icsk_ext_hdr_len;
1492
1493
1494 mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
1495 return mss_now;
1496 }
1497
1498
1499 int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1500 {
1501
1502 return __tcp_mtu_to_mss(sk, pmtu) -
1503 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1504 }
1505
1506
1507 int tcp_mss_to_mtu(struct sock *sk, int mss)
1508 {
1509 const struct tcp_sock *tp = tcp_sk(sk);
1510 const struct inet_connection_sock *icsk = inet_csk(sk);
1511 int mtu;
1512
1513 mtu = mss +
1514 tp->tcp_header_len +
1515 icsk->icsk_ext_hdr_len +
1516 icsk->icsk_af_ops->net_header_len;
1517
1518
1519 if (icsk->icsk_af_ops->net_frag_header_len) {
1520 const struct dst_entry *dst = __sk_dst_get(sk);
1521
1522 if (dst && dst_allfrag(dst))
1523 mtu += icsk->icsk_af_ops->net_frag_header_len;
1524 }
1525 return mtu;
1526 }
1527 EXPORT_SYMBOL(tcp_mss_to_mtu);
1528
1529
1530 void tcp_mtup_init(struct sock *sk)
1531 {
1532 struct tcp_sock *tp = tcp_sk(sk);
1533 struct inet_connection_sock *icsk = inet_csk(sk);
1534 struct net *net = sock_net(sk);
1535
1536 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1537 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1538 icsk->icsk_af_ops->net_header_len;
1539 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1540 icsk->icsk_mtup.probe_size = 0;
1541 if (icsk->icsk_mtup.enabled)
1542 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
1543 }
1544 EXPORT_SYMBOL(tcp_mtup_init);
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1569 {
1570 struct tcp_sock *tp = tcp_sk(sk);
1571 struct inet_connection_sock *icsk = inet_csk(sk);
1572 int mss_now;
1573
1574 if (icsk->icsk_mtup.search_high > pmtu)
1575 icsk->icsk_mtup.search_high = pmtu;
1576
1577 mss_now = tcp_mtu_to_mss(sk, pmtu);
1578 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1579
1580
1581 icsk->icsk_pmtu_cookie = pmtu;
1582 if (icsk->icsk_mtup.enabled)
1583 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1584 tp->mss_cache = mss_now;
1585
1586 return mss_now;
1587 }
1588 EXPORT_SYMBOL(tcp_sync_mss);
1589
1590
1591
1592
1593 unsigned int tcp_current_mss(struct sock *sk)
1594 {
1595 const struct tcp_sock *tp = tcp_sk(sk);
1596 const struct dst_entry *dst = __sk_dst_get(sk);
1597 u32 mss_now;
1598 unsigned int header_len;
1599 struct tcp_out_options opts;
1600 struct tcp_md5sig_key *md5;
1601
1602 mss_now = tp->mss_cache;
1603
1604 if (dst) {
1605 u32 mtu = dst_mtu(dst);
1606 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1607 mss_now = tcp_sync_mss(sk, mtu);
1608 }
1609
1610 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1611 sizeof(struct tcphdr);
1612
1613
1614
1615
1616 if (header_len != tp->tcp_header_len) {
1617 int delta = (int) header_len - tp->tcp_header_len;
1618 mss_now -= delta;
1619 }
1620
1621 return mss_now;
1622 }
1623
1624
1625
1626
1627
1628 static void tcp_cwnd_application_limited(struct sock *sk)
1629 {
1630 struct tcp_sock *tp = tcp_sk(sk);
1631
1632 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1633 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1634
1635 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1636 u32 win_used = max(tp->snd_cwnd_used, init_win);
1637 if (win_used < tp->snd_cwnd) {
1638 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1639 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1640 }
1641 tp->snd_cwnd_used = 0;
1642 }
1643 tp->snd_cwnd_stamp = tcp_jiffies32;
1644 }
1645
1646 static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1647 {
1648 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1649 struct tcp_sock *tp = tcp_sk(sk);
1650
1651
1652
1653
1654 if (!before(tp->snd_una, tp->max_packets_seq) ||
1655 tp->packets_out > tp->max_packets_out) {
1656 tp->max_packets_out = tp->packets_out;
1657 tp->max_packets_seq = tp->snd_nxt;
1658 tp->is_cwnd_limited = is_cwnd_limited;
1659 }
1660
1661 if (tcp_is_cwnd_limited(sk)) {
1662
1663 tp->snd_cwnd_used = 0;
1664 tp->snd_cwnd_stamp = tcp_jiffies32;
1665 } else {
1666
1667 if (tp->packets_out > tp->snd_cwnd_used)
1668 tp->snd_cwnd_used = tp->packets_out;
1669
1670 if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1671 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
1672 !ca_ops->cong_control)
1673 tcp_cwnd_application_limited(sk);
1674
1675
1676
1677
1678
1679
1680
1681
1682 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1683 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1684 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1685 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
1686 }
1687 }
1688
1689
1690 static bool tcp_minshall_check(const struct tcp_sock *tp)
1691 {
1692 return after(tp->snd_sml, tp->snd_una) &&
1693 !after(tp->snd_sml, tp->snd_nxt);
1694 }
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704 static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1705 const struct sk_buff *skb)
1706 {
1707 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1708 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1709 }
1710
1711
1712
1713
1714
1715
1716
1717
1718 static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1719 int nonagle)
1720 {
1721 return partial &&
1722 ((nonagle & TCP_NAGLE_CORK) ||
1723 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1724 }
1725
1726
1727
1728
1729 static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1730 int min_tso_segs)
1731 {
1732 u32 bytes, segs;
1733
1734 bytes = min_t(unsigned long,
1735 sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
1736 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1737
1738
1739
1740
1741
1742
1743 segs = max_t(u32, bytes / mss_now, min_tso_segs);
1744
1745 return segs;
1746 }
1747
1748
1749
1750
1751 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1752 {
1753 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1754 u32 min_tso, tso_segs;
1755
1756 min_tso = ca_ops->min_tso_segs ?
1757 ca_ops->min_tso_segs(sk) :
1758 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
1759
1760 tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
1761 return min_t(u32, tso_segs, sk->sk_gso_max_segs);
1762 }
1763
1764
1765 static unsigned int tcp_mss_split_point(const struct sock *sk,
1766 const struct sk_buff *skb,
1767 unsigned int mss_now,
1768 unsigned int max_segs,
1769 int nonagle)
1770 {
1771 const struct tcp_sock *tp = tcp_sk(sk);
1772 u32 partial, needed, window, max_len;
1773
1774 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1775 max_len = mss_now * max_segs;
1776
1777 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1778 return max_len;
1779
1780 needed = min(skb->len, window);
1781
1782 if (max_len <= needed)
1783 return max_len;
1784
1785 partial = needed % mss_now;
1786
1787
1788
1789
1790 if (tcp_nagle_check(partial != 0, tp, nonagle))
1791 return needed - partial;
1792
1793 return needed;
1794 }
1795
1796
1797
1798
1799 static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1800 const struct sk_buff *skb)
1801 {
1802 u32 in_flight, cwnd, halfcwnd;
1803
1804
1805 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1806 tcp_skb_pcount(skb) == 1)
1807 return 1;
1808
1809 in_flight = tcp_packets_in_flight(tp);
1810 cwnd = tp->snd_cwnd;
1811 if (in_flight >= cwnd)
1812 return 0;
1813
1814
1815
1816
1817 halfcwnd = max(cwnd >> 1, 1U);
1818 return min(halfcwnd, cwnd - in_flight);
1819 }
1820
1821
1822
1823
1824
1825 static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1826 {
1827 int tso_segs = tcp_skb_pcount(skb);
1828
1829 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1830 tcp_set_skb_tso_segs(skb, mss_now);
1831 tso_segs = tcp_skb_pcount(skb);
1832 }
1833 return tso_segs;
1834 }
1835
1836
1837
1838
1839
1840 static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1841 unsigned int cur_mss, int nonagle)
1842 {
1843
1844
1845
1846
1847
1848
1849 if (nonagle & TCP_NAGLE_PUSH)
1850 return true;
1851
1852
1853 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1854 return true;
1855
1856 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1857 return true;
1858
1859 return false;
1860 }
1861
1862
1863 static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1864 const struct sk_buff *skb,
1865 unsigned int cur_mss)
1866 {
1867 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1868
1869 if (skb->len > cur_mss)
1870 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1871
1872 return !after(end_seq, tcp_wnd_end(tp));
1873 }
1874
1875
1876
1877
1878
1879
1880
1881
1882 static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1883 unsigned int mss_now, gfp_t gfp)
1884 {
1885 int nlen = skb->len - len;
1886 struct sk_buff *buff;
1887 u8 flags;
1888
1889
1890 if (skb->len != skb->data_len)
1891 return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
1892 skb, len, mss_now, gfp);
1893
1894 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1895 if (unlikely(!buff))
1896 return -ENOMEM;
1897 skb_copy_decrypted(buff, skb);
1898
1899 sk_wmem_queued_add(sk, buff->truesize);
1900 sk_mem_charge(sk, buff->truesize);
1901 buff->truesize += nlen;
1902 skb->truesize -= nlen;
1903
1904
1905 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1906 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1907 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1908
1909
1910 flags = TCP_SKB_CB(skb)->tcp_flags;
1911 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1912 TCP_SKB_CB(buff)->tcp_flags = flags;
1913
1914
1915 TCP_SKB_CB(buff)->sacked = 0;
1916
1917 tcp_skb_fragment_eor(skb, buff);
1918
1919 buff->ip_summed = CHECKSUM_PARTIAL;
1920 skb_split(skb, buff, len);
1921 tcp_fragment_tstamp(skb, buff);
1922
1923
1924 tcp_set_skb_tso_segs(skb, mss_now);
1925 tcp_set_skb_tso_segs(buff, mss_now);
1926
1927
1928 __skb_header_release(buff);
1929 tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE);
1930
1931 return 0;
1932 }
1933
1934
1935
1936
1937
1938
1939 static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1940 bool *is_cwnd_limited,
1941 bool *is_rwnd_limited,
1942 u32 max_segs)
1943 {
1944 const struct inet_connection_sock *icsk = inet_csk(sk);
1945 u32 send_win, cong_win, limit, in_flight;
1946 struct tcp_sock *tp = tcp_sk(sk);
1947 struct sk_buff *head;
1948 int win_divisor;
1949 s64 delta;
1950
1951 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1952 goto send_now;
1953
1954
1955
1956
1957
1958
1959 delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
1960 if (delta > 0)
1961 goto send_now;
1962
1963 in_flight = tcp_packets_in_flight(tp);
1964
1965 BUG_ON(tcp_skb_pcount(skb) <= 1);
1966 BUG_ON(tp->snd_cwnd <= in_flight);
1967
1968 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1969
1970
1971 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1972
1973 limit = min(send_win, cong_win);
1974
1975
1976 if (limit >= max_segs * tp->mss_cache)
1977 goto send_now;
1978
1979
1980 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1981 goto send_now;
1982
1983 win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
1984 if (win_divisor) {
1985 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1986
1987
1988
1989
1990 chunk /= win_divisor;
1991 if (limit >= chunk)
1992 goto send_now;
1993 } else {
1994
1995
1996
1997
1998
1999 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
2000 goto send_now;
2001 }
2002
2003
2004 head = tcp_rtx_queue_head(sk);
2005 if (!head)
2006 goto send_now;
2007 delta = tp->tcp_clock_cache - head->tstamp;
2008
2009 if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
2010 goto send_now;
2011
2012
2013
2014
2015
2016
2017
2018 if (cong_win < send_win) {
2019 if (cong_win <= skb->len) {
2020 *is_cwnd_limited = true;
2021 return true;
2022 }
2023 } else {
2024 if (send_win <= skb->len) {
2025 *is_rwnd_limited = true;
2026 return true;
2027 }
2028 }
2029
2030
2031 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
2032 TCP_SKB_CB(skb)->eor)
2033 goto send_now;
2034
2035 return true;
2036
2037 send_now:
2038 return false;
2039 }
2040
2041 static inline void tcp_mtu_check_reprobe(struct sock *sk)
2042 {
2043 struct inet_connection_sock *icsk = inet_csk(sk);
2044 struct tcp_sock *tp = tcp_sk(sk);
2045 struct net *net = sock_net(sk);
2046 u32 interval;
2047 s32 delta;
2048
2049 interval = net->ipv4.sysctl_tcp_probe_interval;
2050 delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
2051 if (unlikely(delta >= interval * HZ)) {
2052 int mss = tcp_current_mss(sk);
2053
2054
2055 icsk->icsk_mtup.probe_size = 0;
2056 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
2057 sizeof(struct tcphdr) +
2058 icsk->icsk_af_ops->net_header_len;
2059 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
2060
2061
2062 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
2063 }
2064 }
2065
2066 static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2067 {
2068 struct sk_buff *skb, *next;
2069
2070 skb = tcp_send_head(sk);
2071 tcp_for_write_queue_from_safe(skb, next, sk) {
2072 if (len <= skb->len)
2073 break;
2074
2075 if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
2076 return false;
2077
2078 len -= skb->len;
2079 }
2080
2081 return true;
2082 }
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093 static int tcp_mtu_probe(struct sock *sk)
2094 {
2095 struct inet_connection_sock *icsk = inet_csk(sk);
2096 struct tcp_sock *tp = tcp_sk(sk);
2097 struct sk_buff *skb, *nskb, *next;
2098 struct net *net = sock_net(sk);
2099 int probe_size;
2100 int size_needed;
2101 int copy, len;
2102 int mss_now;
2103 int interval;
2104
2105
2106
2107
2108
2109
2110 if (likely(!icsk->icsk_mtup.enabled ||
2111 icsk->icsk_mtup.probe_size ||
2112 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
2113 tp->snd_cwnd < 11 ||
2114 tp->rx_opt.num_sacks || tp->rx_opt.dsack))
2115 return -1;
2116
2117
2118
2119
2120
2121 mss_now = tcp_current_mss(sk);
2122 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
2123 icsk->icsk_mtup.search_low) >> 1);
2124 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
2125 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
2126
2127
2128
2129
2130 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
2131 interval < net->ipv4.sysctl_tcp_probe_threshold) {
2132
2133
2134
2135 tcp_mtu_check_reprobe(sk);
2136 return -1;
2137 }
2138
2139
2140 if (tp->write_seq - tp->snd_nxt < size_needed)
2141 return -1;
2142
2143 if (tp->snd_wnd < size_needed)
2144 return -1;
2145 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
2146 return 0;
2147
2148
2149 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
2150 if (!tcp_packets_in_flight(tp))
2151 return -1;
2152 else
2153 return 0;
2154 }
2155
2156 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
2157 return -1;
2158
2159
2160 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2161 if (!nskb)
2162 return -1;
2163 sk_wmem_queued_add(sk, nskb->truesize);
2164 sk_mem_charge(sk, nskb->truesize);
2165
2166 skb = tcp_send_head(sk);
2167 skb_copy_decrypted(nskb, skb);
2168
2169 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
2170 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
2171 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2172 TCP_SKB_CB(nskb)->sacked = 0;
2173 nskb->csum = 0;
2174 nskb->ip_summed = CHECKSUM_PARTIAL;
2175
2176 tcp_insert_write_queue_before(nskb, skb, sk);
2177 tcp_highest_sack_replace(sk, skb, nskb);
2178
2179 len = 0;
2180 tcp_for_write_queue_from_safe(skb, next, sk) {
2181 copy = min_t(int, skb->len, probe_size - len);
2182 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2183
2184 if (skb->len <= copy) {
2185
2186
2187 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2188
2189
2190
2191 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
2192 tcp_skb_collapse_tstamp(nskb, skb);
2193 tcp_unlink_write_queue(skb, sk);
2194 sk_wmem_free_skb(sk, skb);
2195 } else {
2196 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
2197 ~(TCPHDR_FIN|TCPHDR_PSH);
2198 if (!skb_shinfo(skb)->nr_frags) {
2199 skb_pull(skb, copy);
2200 } else {
2201 __pskb_trim_head(skb, copy);
2202 tcp_set_skb_tso_segs(skb, mss_now);
2203 }
2204 TCP_SKB_CB(skb)->seq += copy;
2205 }
2206
2207 len += copy;
2208
2209 if (len >= probe_size)
2210 break;
2211 }
2212 tcp_init_tso_segs(nskb, nskb->len);
2213
2214
2215
2216
2217 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
2218
2219
2220 tp->snd_cwnd--;
2221 tcp_event_new_data_sent(sk, nskb);
2222
2223 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
2224 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
2225 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
2226
2227 return 1;
2228 }
2229
2230 return -1;
2231 }
2232
2233 static bool tcp_pacing_check(struct sock *sk)
2234 {
2235 struct tcp_sock *tp = tcp_sk(sk);
2236
2237 if (!tcp_needs_internal_pacing(sk))
2238 return false;
2239
2240 if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
2241 return false;
2242
2243 if (!hrtimer_is_queued(&tp->pacing_timer)) {
2244 hrtimer_start(&tp->pacing_timer,
2245 ns_to_ktime(tp->tcp_wstamp_ns),
2246 HRTIMER_MODE_ABS_PINNED_SOFT);
2247 sock_hold(sk);
2248 }
2249 return true;
2250 }
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263 static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2264 unsigned int factor)
2265 {
2266 unsigned long limit;
2267
2268 limit = max_t(unsigned long,
2269 2 * skb->truesize,
2270 sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
2271 if (sk->sk_pacing_status == SK_PACING_NONE)
2272 limit = min_t(unsigned long, limit,
2273 sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2274 limit <<= factor;
2275
2276 if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
2277 tcp_sk(sk)->tcp_tx_delay) {
2278 u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
2279
2280
2281
2282
2283
2284
2285 extra_bytes >>= (20 - 1);
2286 limit += extra_bytes;
2287 }
2288 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2289
2290
2291
2292
2293
2294 if (tcp_rtx_queue_empty(sk))
2295 return false;
2296
2297 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
2298
2299
2300
2301
2302 smp_mb__after_atomic();
2303 if (refcount_read(&sk->sk_wmem_alloc) > limit)
2304 return true;
2305 }
2306 return false;
2307 }
2308
2309 static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
2310 {
2311 const u32 now = tcp_jiffies32;
2312 enum tcp_chrono old = tp->chrono_type;
2313
2314 if (old > TCP_CHRONO_UNSPEC)
2315 tp->chrono_stat[old - 1] += now - tp->chrono_start;
2316 tp->chrono_start = now;
2317 tp->chrono_type = new;
2318 }
2319
2320 void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
2321 {
2322 struct tcp_sock *tp = tcp_sk(sk);
2323
2324
2325
2326
2327
2328
2329 if (type > tp->chrono_type)
2330 tcp_chrono_set(tp, type);
2331 }
2332
2333 void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2334 {
2335 struct tcp_sock *tp = tcp_sk(sk);
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345 if (tcp_rtx_and_write_queues_empty(sk))
2346 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2347 else if (type == tp->chrono_type)
2348 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
2349 }
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2366 int push_one, gfp_t gfp)
2367 {
2368 struct tcp_sock *tp = tcp_sk(sk);
2369 struct sk_buff *skb;
2370 unsigned int tso_segs, sent_pkts;
2371 int cwnd_quota;
2372 int result;
2373 bool is_cwnd_limited = false, is_rwnd_limited = false;
2374 u32 max_segs;
2375
2376 sent_pkts = 0;
2377
2378 tcp_mstamp_refresh(tp);
2379 if (!push_one) {
2380
2381 result = tcp_mtu_probe(sk);
2382 if (!result) {
2383 return false;
2384 } else if (result > 0) {
2385 sent_pkts = 1;
2386 }
2387 }
2388
2389 max_segs = tcp_tso_segs(sk, mss_now);
2390 while ((skb = tcp_send_head(sk))) {
2391 unsigned int limit;
2392
2393 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2394
2395 skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
2396 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
2397 tcp_init_tso_segs(skb, mss_now);
2398 goto repair;
2399 }
2400
2401 if (tcp_pacing_check(sk))
2402 break;
2403
2404 tso_segs = tcp_init_tso_segs(skb, mss_now);
2405 BUG_ON(!tso_segs);
2406
2407 cwnd_quota = tcp_cwnd_test(tp, skb);
2408 if (!cwnd_quota) {
2409 if (push_one == 2)
2410
2411 cwnd_quota = 1;
2412 else
2413 break;
2414 }
2415
2416 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
2417 is_rwnd_limited = true;
2418 break;
2419 }
2420
2421 if (tso_segs == 1) {
2422 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2423 (tcp_skb_is_last(sk, skb) ?
2424 nonagle : TCP_NAGLE_PUSH))))
2425 break;
2426 } else {
2427 if (!push_one &&
2428 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2429 &is_rwnd_limited, max_segs))
2430 break;
2431 }
2432
2433 limit = mss_now;
2434 if (tso_segs > 1 && !tcp_urg_mode(tp))
2435 limit = tcp_mss_split_point(sk, skb, mss_now,
2436 min_t(unsigned int,
2437 cwnd_quota,
2438 max_segs),
2439 nonagle);
2440
2441 if (skb->len > limit &&
2442 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2443 break;
2444
2445 if (tcp_small_queue_check(sk, skb, 0))
2446 break;
2447
2448
2449
2450
2451
2452
2453 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
2454 break;
2455
2456 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2457 break;
2458
2459 repair:
2460
2461
2462
2463 tcp_event_new_data_sent(sk, skb);
2464
2465 tcp_minshall_update(tp, mss_now, skb);
2466 sent_pkts += tcp_skb_pcount(skb);
2467
2468 if (push_one)
2469 break;
2470 }
2471
2472 if (is_rwnd_limited)
2473 tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
2474 else
2475 tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
2476
2477 if (likely(sent_pkts)) {
2478 if (tcp_in_cwnd_reduction(sk))
2479 tp->prr_out += sent_pkts;
2480
2481
2482 if (push_one != 2)
2483 tcp_schedule_loss_probe(sk, false);
2484 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2485 tcp_cwnd_validate(sk, is_cwnd_limited);
2486 return false;
2487 }
2488 return !tp->packets_out && !tcp_write_queue_empty(sk);
2489 }
2490
2491 bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2492 {
2493 struct inet_connection_sock *icsk = inet_csk(sk);
2494 struct tcp_sock *tp = tcp_sk(sk);
2495 u32 timeout, rto_delta_us;
2496 int early_retrans;
2497
2498
2499
2500
2501 if (rcu_access_pointer(tp->fastopen_rsk))
2502 return false;
2503
2504 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2505
2506
2507
2508 if ((early_retrans != 3 && early_retrans != 4) ||
2509 !tp->packets_out || !tcp_is_sack(tp) ||
2510 (icsk->icsk_ca_state != TCP_CA_Open &&
2511 icsk->icsk_ca_state != TCP_CA_CWR))
2512 return false;
2513
2514
2515
2516
2517
2518 if (tp->srtt_us) {
2519 timeout = usecs_to_jiffies(tp->srtt_us >> 2);
2520 if (tp->packets_out == 1)
2521 timeout += TCP_RTO_MIN;
2522 else
2523 timeout += TCP_TIMEOUT_MIN;
2524 } else {
2525 timeout = TCP_TIMEOUT_INIT;
2526 }
2527
2528
2529 rto_delta_us = advancing_rto ?
2530 jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
2531 tcp_rto_delta_us(sk);
2532 if (rto_delta_us > 0)
2533 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
2534
2535 tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2536 TCP_RTO_MAX, NULL);
2537 return true;
2538 }
2539
2540
2541
2542
2543
2544 static bool skb_still_in_host_queue(const struct sock *sk,
2545 const struct sk_buff *skb)
2546 {
2547 if (unlikely(skb_fclone_busy(sk, skb))) {
2548 NET_INC_STATS(sock_net(sk),
2549 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2550 return true;
2551 }
2552 return false;
2553 }
2554
2555
2556
2557
2558 void tcp_send_loss_probe(struct sock *sk)
2559 {
2560 struct tcp_sock *tp = tcp_sk(sk);
2561 struct sk_buff *skb;
2562 int pcount;
2563 int mss = tcp_current_mss(sk);
2564
2565 skb = tcp_send_head(sk);
2566 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2567 pcount = tp->packets_out;
2568 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2569 if (tp->packets_out > pcount)
2570 goto probe_sent;
2571 goto rearm_timer;
2572 }
2573 skb = skb_rb_last(&sk->tcp_rtx_queue);
2574 if (unlikely(!skb)) {
2575 WARN_ONCE(tp->packets_out,
2576 "invalid inflight: %u state %u cwnd %u mss %d\n",
2577 tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
2578 inet_csk(sk)->icsk_pending = 0;
2579 return;
2580 }
2581
2582
2583 if (tp->tlp_high_seq)
2584 goto rearm_timer;
2585
2586 if (skb_still_in_host_queue(sk, skb))
2587 goto rearm_timer;
2588
2589 pcount = tcp_skb_pcount(skb);
2590 if (WARN_ON(!pcount))
2591 goto rearm_timer;
2592
2593 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2594 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2595 (pcount - 1) * mss, mss,
2596 GFP_ATOMIC)))
2597 goto rearm_timer;
2598 skb = skb_rb_next(skb);
2599 }
2600
2601 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2602 goto rearm_timer;
2603
2604 if (__tcp_retransmit_skb(sk, skb, 1))
2605 goto rearm_timer;
2606
2607
2608 tp->tlp_high_seq = tp->snd_nxt;
2609
2610 probe_sent:
2611 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2612
2613 inet_csk(sk)->icsk_pending = 0;
2614 rearm_timer:
2615 tcp_rearm_rto(sk);
2616 }
2617
2618
2619
2620
2621
2622 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2623 int nonagle)
2624 {
2625
2626
2627
2628
2629 if (unlikely(sk->sk_state == TCP_CLOSE))
2630 return;
2631
2632 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2633 sk_gfp_mask(sk, GFP_ATOMIC)))
2634 tcp_check_probe_timer(sk);
2635 }
2636
2637
2638
2639
2640 void tcp_push_one(struct sock *sk, unsigned int mss_now)
2641 {
2642 struct sk_buff *skb = tcp_send_head(sk);
2643
2644 BUG_ON(!skb || skb->len < mss_now);
2645
2646 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2647 }
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701 u32 __tcp_select_window(struct sock *sk)
2702 {
2703 struct inet_connection_sock *icsk = inet_csk(sk);
2704 struct tcp_sock *tp = tcp_sk(sk);
2705
2706
2707
2708
2709
2710
2711 int mss = icsk->icsk_ack.rcv_mss;
2712 int free_space = tcp_space(sk);
2713 int allowed_space = tcp_full_space(sk);
2714 int full_space = min_t(int, tp->window_clamp, allowed_space);
2715 int window;
2716
2717 if (unlikely(mss > full_space)) {
2718 mss = full_space;
2719 if (mss <= 0)
2720 return 0;
2721 }
2722 if (free_space < (full_space >> 1)) {
2723 icsk->icsk_ack.quick = 0;
2724
2725 if (tcp_under_memory_pressure(sk))
2726 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2727 4U * tp->advmss);
2728
2729
2730
2731
2732 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2733
2734
2735
2736
2737
2738
2739
2740
2741 if (free_space < (allowed_space >> 4) || free_space < mss)
2742 return 0;
2743 }
2744
2745 if (free_space > tp->rcv_ssthresh)
2746 free_space = tp->rcv_ssthresh;
2747
2748
2749
2750
2751 if (tp->rx_opt.rcv_wscale) {
2752 window = free_space;
2753
2754
2755
2756
2757
2758 window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
2759 } else {
2760 window = tp->rcv_wnd;
2761
2762
2763
2764
2765
2766
2767
2768
2769 if (window <= free_space - mss || window > free_space)
2770 window = rounddown(free_space, mss);
2771 else if (mss == full_space &&
2772 free_space > window + (full_space >> 1))
2773 window = free_space;
2774 }
2775
2776 return window;
2777 }
2778
2779 void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2780 const struct sk_buff *next_skb)
2781 {
2782 if (unlikely(tcp_has_tx_tstamp(next_skb))) {
2783 const struct skb_shared_info *next_shinfo =
2784 skb_shinfo(next_skb);
2785 struct skb_shared_info *shinfo = skb_shinfo(skb);
2786
2787 shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2788 shinfo->tskey = next_shinfo->tskey;
2789 TCP_SKB_CB(skb)->txstamp_ack |=
2790 TCP_SKB_CB(next_skb)->txstamp_ack;
2791 }
2792 }
2793
2794
2795 static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2796 {
2797 struct tcp_sock *tp = tcp_sk(sk);
2798 struct sk_buff *next_skb = skb_rb_next(skb);
2799 int next_skb_size;
2800
2801 next_skb_size = next_skb->len;
2802
2803 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2804
2805 if (next_skb_size) {
2806 if (next_skb_size <= skb_availroom(skb))
2807 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2808 next_skb_size);
2809 else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
2810 return false;
2811 }
2812 tcp_highest_sack_replace(sk, next_skb, skb);
2813
2814
2815 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2816
2817
2818 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2819
2820
2821
2822
2823 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2824 TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
2825
2826
2827 tcp_clear_retrans_hints_partial(tp);
2828 if (next_skb == tp->retransmit_skb_hint)
2829 tp->retransmit_skb_hint = skb;
2830
2831 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2832
2833 tcp_skb_collapse_tstamp(skb, next_skb);
2834
2835 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2836 return true;
2837 }
2838
2839
2840 static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2841 {
2842 if (tcp_skb_pcount(skb) > 1)
2843 return false;
2844 if (skb_cloned(skb))
2845 return false;
2846
2847 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2848 return false;
2849
2850 return true;
2851 }
2852
2853
2854
2855
2856 static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2857 int space)
2858 {
2859 struct tcp_sock *tp = tcp_sk(sk);
2860 struct sk_buff *skb = to, *tmp;
2861 bool first = true;
2862
2863 if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
2864 return;
2865 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2866 return;
2867
2868 skb_rbtree_walk_from_safe(skb, tmp) {
2869 if (!tcp_can_collapse(sk, skb))
2870 break;
2871
2872 if (!tcp_skb_can_collapse_to(to))
2873 break;
2874
2875 space -= skb->len;
2876
2877 if (first) {
2878 first = false;
2879 continue;
2880 }
2881
2882 if (space < 0)
2883 break;
2884
2885 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2886 break;
2887
2888 if (!tcp_collapse_retrans(sk, to))
2889 break;
2890 }
2891 }
2892
2893
2894
2895
2896
2897 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2898 {
2899 struct inet_connection_sock *icsk = inet_csk(sk);
2900 struct tcp_sock *tp = tcp_sk(sk);
2901 unsigned int cur_mss;
2902 int diff, len, err;
2903
2904
2905
2906 if (icsk->icsk_mtup.probe_size)
2907 icsk->icsk_mtup.probe_size = 0;
2908
2909
2910
2911
2912 if (refcount_read(&sk->sk_wmem_alloc) >
2913 min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
2914 sk->sk_sndbuf))
2915 return -EAGAIN;
2916
2917 if (skb_still_in_host_queue(sk, skb))
2918 return -EBUSY;
2919
2920 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2921 if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
2922 WARN_ON_ONCE(1);
2923 return -EINVAL;
2924 }
2925 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2926 return -ENOMEM;
2927 }
2928
2929 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2930 return -EHOSTUNREACH;
2931
2932 cur_mss = tcp_current_mss(sk);
2933
2934
2935
2936
2937
2938
2939 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2940 TCP_SKB_CB(skb)->seq != tp->snd_una)
2941 return -EAGAIN;
2942
2943 len = cur_mss * segs;
2944 if (skb->len > len) {
2945 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2946 cur_mss, GFP_ATOMIC))
2947 return -ENOMEM;
2948 } else {
2949 if (skb_unclone(skb, GFP_ATOMIC))
2950 return -ENOMEM;
2951
2952 diff = tcp_skb_pcount(skb);
2953 tcp_set_skb_tso_segs(skb, cur_mss);
2954 diff -= tcp_skb_pcount(skb);
2955 if (diff)
2956 tcp_adjust_pcount(sk, skb, diff);
2957 if (skb->len < cur_mss)
2958 tcp_retrans_try_collapse(sk, skb, cur_mss);
2959 }
2960
2961
2962 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2963 tcp_ecn_clear_syn(sk, skb);
2964
2965
2966 segs = tcp_skb_pcount(skb);
2967 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
2968 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2969 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2970 tp->total_retrans += segs;
2971 tp->bytes_retrans += skb->len;
2972
2973
2974
2975
2976
2977 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2978 skb_headroom(skb) >= 0xFFFF)) {
2979 struct sk_buff *nskb;
2980
2981 tcp_skb_tsorted_save(skb) {
2982 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2983 if (nskb) {
2984 nskb->dev = NULL;
2985 err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC);
2986 } else {
2987 err = -ENOBUFS;
2988 }
2989 } tcp_skb_tsorted_restore(skb);
2990
2991 if (!err) {
2992 tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
2993 tcp_rate_skb_sent(sk, skb);
2994 }
2995 } else {
2996 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2997 }
2998
2999
3000
3001
3002 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
3003
3004 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
3005 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
3006 TCP_SKB_CB(skb)->seq, segs, err);
3007
3008 if (likely(!err)) {
3009 trace_tcp_retransmit_skb(sk, skb);
3010 } else if (err != -EBUSY) {
3011 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
3012 }
3013 return err;
3014 }
3015
3016 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
3017 {
3018 struct tcp_sock *tp = tcp_sk(sk);
3019 int err = __tcp_retransmit_skb(sk, skb, segs);
3020
3021 if (err == 0) {
3022 #if FASTRETRANS_DEBUG > 0
3023 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
3024 net_dbg_ratelimited("retrans_out leaked\n");
3025 }
3026 #endif
3027 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
3028 tp->retrans_out += tcp_skb_pcount(skb);
3029 }
3030
3031
3032 if (!tp->retrans_stamp)
3033 tp->retrans_stamp = tcp_skb_timestamp(skb);
3034
3035 if (tp->undo_retrans < 0)
3036 tp->undo_retrans = 0;
3037 tp->undo_retrans += tcp_skb_pcount(skb);
3038 return err;
3039 }
3040
3041
3042
3043
3044
3045
3046 void tcp_xmit_retransmit_queue(struct sock *sk)
3047 {
3048 const struct inet_connection_sock *icsk = inet_csk(sk);
3049 struct sk_buff *skb, *rtx_head, *hole = NULL;
3050 struct tcp_sock *tp = tcp_sk(sk);
3051 u32 max_segs;
3052 int mib_idx;
3053
3054 if (!tp->packets_out)
3055 return;
3056
3057 rtx_head = tcp_rtx_queue_head(sk);
3058 skb = tp->retransmit_skb_hint ?: rtx_head;
3059 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
3060 skb_rbtree_walk_from(skb) {
3061 __u8 sacked;
3062 int segs;
3063
3064 if (tcp_pacing_check(sk))
3065 break;
3066
3067
3068 if (!hole)
3069 tp->retransmit_skb_hint = skb;
3070
3071 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
3072 if (segs <= 0)
3073 return;
3074 sacked = TCP_SKB_CB(skb)->sacked;
3075
3076
3077
3078 segs = min_t(int, segs, max_segs);
3079
3080 if (tp->retrans_out >= tp->lost_out) {
3081 break;
3082 } else if (!(sacked & TCPCB_LOST)) {
3083 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
3084 hole = skb;
3085 continue;
3086
3087 } else {
3088 if (icsk->icsk_ca_state != TCP_CA_Loss)
3089 mib_idx = LINUX_MIB_TCPFASTRETRANS;
3090 else
3091 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
3092 }
3093
3094 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
3095 continue;
3096
3097 if (tcp_small_queue_check(sk, skb, 1))
3098 return;
3099
3100 if (tcp_retransmit_skb(sk, skb, segs))
3101 return;
3102
3103 NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
3104
3105 if (tcp_in_cwnd_reduction(sk))
3106 tp->prr_out += tcp_skb_pcount(skb);
3107
3108 if (skb == rtx_head &&
3109 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
3110 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3111 inet_csk(sk)->icsk_rto,
3112 TCP_RTO_MAX,
3113 skb);
3114 }
3115 }
3116
3117
3118
3119
3120
3121
3122
3123
3124 void sk_forced_mem_schedule(struct sock *sk, int size)
3125 {
3126 int amt;
3127
3128 if (size <= sk->sk_forward_alloc)
3129 return;
3130 amt = sk_mem_pages(size);
3131 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
3132 sk_memory_allocated_add(sk, amt);
3133
3134 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3135 mem_cgroup_charge_skmem(sk->sk_memcg, amt);
3136 }
3137
3138
3139
3140
3141 void tcp_send_fin(struct sock *sk)
3142 {
3143 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
3144 struct tcp_sock *tp = tcp_sk(sk);
3145
3146
3147
3148
3149
3150
3151 if (!tskb && tcp_under_memory_pressure(sk))
3152 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3153
3154 if (tskb) {
3155 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3156 TCP_SKB_CB(tskb)->end_seq++;
3157 tp->write_seq++;
3158 if (tcp_write_queue_empty(sk)) {
3159
3160
3161
3162
3163
3164
3165 WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
3166 return;
3167 }
3168 } else {
3169 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
3170 if (unlikely(!skb))
3171 return;
3172
3173 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
3174 skb_reserve(skb, MAX_TCP_HEADER);
3175 sk_forced_mem_schedule(sk, skb->truesize);
3176
3177 tcp_init_nondata_skb(skb, tp->write_seq,
3178 TCPHDR_ACK | TCPHDR_FIN);
3179 tcp_queue_skb(sk, skb);
3180 }
3181 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
3182 }
3183
3184
3185
3186
3187
3188
3189 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3190 {
3191 struct sk_buff *skb;
3192
3193 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
3194
3195
3196 skb = alloc_skb(MAX_TCP_HEADER, priority);
3197 if (!skb) {
3198 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3199 return;
3200 }
3201
3202
3203 skb_reserve(skb, MAX_TCP_HEADER);
3204 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
3205 TCPHDR_ACK | TCPHDR_RST);
3206 tcp_mstamp_refresh(tcp_sk(sk));
3207
3208 if (tcp_transmit_skb(sk, skb, 0, priority))
3209 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3210
3211
3212
3213
3214 trace_tcp_send_reset(sk, NULL);
3215 }
3216
3217
3218
3219
3220
3221
3222
3223 int tcp_send_synack(struct sock *sk)
3224 {
3225 struct sk_buff *skb;
3226
3227 skb = tcp_rtx_queue_head(sk);
3228 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3229 pr_err("%s: wrong queue state\n", __func__);
3230 return -EFAULT;
3231 }
3232 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
3233 if (skb_cloned(skb)) {
3234 struct sk_buff *nskb;
3235
3236 tcp_skb_tsorted_save(skb) {
3237 nskb = skb_copy(skb, GFP_ATOMIC);
3238 } tcp_skb_tsorted_restore(skb);
3239 if (!nskb)
3240 return -ENOMEM;
3241 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3242 tcp_highest_sack_replace(sk, skb, nskb);
3243 tcp_rtx_queue_unlink_and_free(skb, sk);
3244 __skb_header_release(nskb);
3245 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3246 sk_wmem_queued_add(sk, nskb->truesize);
3247 sk_mem_charge(sk, nskb->truesize);
3248 skb = nskb;
3249 }
3250
3251 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
3252 tcp_ecn_send_synack(sk, skb);
3253 }
3254 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3255 }
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3267 struct request_sock *req,
3268 struct tcp_fastopen_cookie *foc,
3269 enum tcp_synack_type synack_type)
3270 {
3271 struct inet_request_sock *ireq = inet_rsk(req);
3272 const struct tcp_sock *tp = tcp_sk(sk);
3273 struct tcp_md5sig_key *md5 = NULL;
3274 struct tcp_out_options opts;
3275 struct sk_buff *skb;
3276 int tcp_header_size;
3277 struct tcphdr *th;
3278 int mss;
3279 u64 now;
3280
3281 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3282 if (unlikely(!skb)) {
3283 dst_release(dst);
3284 return NULL;
3285 }
3286
3287 skb_reserve(skb, MAX_TCP_HEADER);
3288
3289 switch (synack_type) {
3290 case TCP_SYNACK_NORMAL:
3291 skb_set_owner_w(skb, req_to_sk(req));
3292 break;
3293 case TCP_SYNACK_COOKIE:
3294
3295
3296
3297 break;
3298 case TCP_SYNACK_FASTOPEN:
3299
3300
3301
3302
3303 skb_set_owner_w(skb, (struct sock *)sk);
3304 break;
3305 }
3306 skb_dst_set(skb, dst);
3307
3308 mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3309
3310 memset(&opts, 0, sizeof(opts));
3311 now = tcp_clock_ns();
3312 #ifdef CONFIG_SYN_COOKIES
3313 if (unlikely(req->cookie_ts))
3314 skb->skb_mstamp_ns = cookie_init_timestamp(req);
3315 else
3316 #endif
3317 {
3318 skb->skb_mstamp_ns = now;
3319 if (!tcp_rsk(req)->snt_synack)
3320 tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
3321 }
3322
3323 #ifdef CONFIG_TCP_MD5SIG
3324 rcu_read_lock();
3325 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3326 #endif
3327 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3328 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3329 foc) + sizeof(*th);
3330
3331 skb_push(skb, tcp_header_size);
3332 skb_reset_transport_header(skb);
3333
3334 th = (struct tcphdr *)skb->data;
3335 memset(th, 0, sizeof(struct tcphdr));
3336 th->syn = 1;
3337 th->ack = 1;
3338 tcp_ecn_make_synack(req, th);
3339 th->source = htons(ireq->ir_num);
3340 th->dest = ireq->ir_rmt_port;
3341 skb->mark = ireq->ir_mark;
3342 skb->ip_summed = CHECKSUM_PARTIAL;
3343 th->seq = htonl(tcp_rsk(req)->snt_isn);
3344
3345 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3346
3347
3348 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3349 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3350 th->doff = (tcp_header_size >> 2);
3351 __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
3352
3353 #ifdef CONFIG_TCP_MD5SIG
3354
3355 if (md5)
3356 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
3357 md5, req_to_sk(req), skb);
3358 rcu_read_unlock();
3359 #endif
3360
3361 skb->skb_mstamp_ns = now;
3362 tcp_add_tx_delay(skb, tp);
3363
3364 return skb;
3365 }
3366 EXPORT_SYMBOL(tcp_make_synack);
3367
3368 static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
3369 {
3370 struct inet_connection_sock *icsk = inet_csk(sk);
3371 const struct tcp_congestion_ops *ca;
3372 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
3373
3374 if (ca_key == TCP_CA_UNSPEC)
3375 return;
3376
3377 rcu_read_lock();
3378 ca = tcp_ca_find_key(ca_key);
3379 if (likely(ca && try_module_get(ca->owner))) {
3380 module_put(icsk->icsk_ca_ops->owner);
3381 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
3382 icsk->icsk_ca_ops = ca;
3383 }
3384 rcu_read_unlock();
3385 }
3386
3387
3388 static void tcp_connect_init(struct sock *sk)
3389 {
3390 const struct dst_entry *dst = __sk_dst_get(sk);
3391 struct tcp_sock *tp = tcp_sk(sk);
3392 __u8 rcv_wscale;
3393 u32 rcv_wnd;
3394
3395
3396
3397
3398 tp->tcp_header_len = sizeof(struct tcphdr);
3399 if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
3400 tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
3401
3402 #ifdef CONFIG_TCP_MD5SIG
3403 if (tp->af_specific->md5_lookup(sk, sk))
3404 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3405 #endif
3406
3407
3408 if (tp->rx_opt.user_mss)
3409 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3410 tp->max_window = 0;
3411 tcp_mtup_init(sk);
3412 tcp_sync_mss(sk, dst_mtu(dst));
3413
3414 tcp_ca_dst_init(sk, dst);
3415
3416 if (!tp->window_clamp)
3417 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3418 tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3419
3420 tcp_initialize_rcv_mss(sk);
3421
3422
3423 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3424 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3425 tp->window_clamp = tcp_full_space(sk);
3426
3427 rcv_wnd = tcp_rwnd_init_bpf(sk);
3428 if (rcv_wnd == 0)
3429 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
3430
3431 tcp_select_initial_window(sk, tcp_full_space(sk),
3432 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3433 &tp->rcv_wnd,
3434 &tp->window_clamp,
3435 sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
3436 &rcv_wscale,
3437 rcv_wnd);
3438
3439 tp->rx_opt.rcv_wscale = rcv_wscale;
3440 tp->rcv_ssthresh = tp->rcv_wnd;
3441
3442 sk->sk_err = 0;
3443 sock_reset_flag(sk, SOCK_DONE);
3444 tp->snd_wnd = 0;
3445 tcp_init_wl(tp, 0);
3446 tcp_write_queue_purge(sk);
3447 tp->snd_una = tp->write_seq;
3448 tp->snd_sml = tp->write_seq;
3449 tp->snd_up = tp->write_seq;
3450 WRITE_ONCE(tp->snd_nxt, tp->write_seq);
3451
3452 if (likely(!tp->repair))
3453 tp->rcv_nxt = 0;
3454 else
3455 tp->rcv_tstamp = tcp_jiffies32;
3456 tp->rcv_wup = tp->rcv_nxt;
3457 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
3458
3459 inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
3460 inet_csk(sk)->icsk_retransmits = 0;
3461 tcp_clear_retrans(tp);
3462 }
3463
3464 static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3465 {
3466 struct tcp_sock *tp = tcp_sk(sk);
3467 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3468
3469 tcb->end_seq += skb->len;
3470 __skb_header_release(skb);
3471 sk_wmem_queued_add(sk, skb->truesize);
3472 sk_mem_charge(sk, skb->truesize);
3473 WRITE_ONCE(tp->write_seq, tcb->end_seq);
3474 tp->packets_out += tcp_skb_pcount(skb);
3475 }
3476
3477
3478
3479
3480
3481
3482
3483
3484 static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3485 {
3486 struct tcp_sock *tp = tcp_sk(sk);
3487 struct tcp_fastopen_request *fo = tp->fastopen_req;
3488 int space, err = 0;
3489 struct sk_buff *syn_data;
3490
3491 tp->rx_opt.mss_clamp = tp->advmss;
3492 if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
3493 goto fallback;
3494
3495
3496
3497
3498
3499 tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
3500
3501 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3502 MAX_TCP_OPTION_SPACE;
3503
3504 space = min_t(size_t, space, fo->size);
3505
3506
3507 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3508
3509 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3510 if (!syn_data)
3511 goto fallback;
3512 syn_data->ip_summed = CHECKSUM_PARTIAL;
3513 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3514 if (space) {
3515 int copied = copy_from_iter(skb_put(syn_data, space), space,
3516 &fo->data->msg_iter);
3517 if (unlikely(!copied)) {
3518 tcp_skb_tsorted_anchor_cleanup(syn_data);
3519 kfree_skb(syn_data);
3520 goto fallback;
3521 }
3522 if (copied != space) {
3523 skb_trim(syn_data, copied);
3524 space = copied;
3525 }
3526 skb_zcopy_set(syn_data, fo->uarg, NULL);
3527 }
3528
3529 if (space == fo->size)
3530 fo->data = NULL;
3531 fo->copied = space;
3532
3533 tcp_connect_queue_skb(sk, syn_data);
3534 if (syn_data->len)
3535 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
3536
3537 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3538
3539 syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
3540
3541
3542
3543
3544
3545
3546 TCP_SKB_CB(syn_data)->seq++;
3547 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3548 if (!err) {
3549 tp->syn_data = (fo->copied > 0);
3550 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3551 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3552 goto done;
3553 }
3554
3555
3556 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3557 tp->packets_out -= tcp_skb_pcount(syn_data);
3558
3559 fallback:
3560
3561 if (fo->cookie.len > 0)
3562 fo->cookie.len = 0;
3563 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3564 if (err)
3565 tp->syn_fastopen = 0;
3566 done:
3567 fo->cookie.len = -1;
3568 return err;
3569 }
3570
3571
3572 int tcp_connect(struct sock *sk)
3573 {
3574 struct tcp_sock *tp = tcp_sk(sk);
3575 struct sk_buff *buff;
3576 int err;
3577
3578 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
3579
3580 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
3581 return -EHOSTUNREACH;
3582
3583 tcp_connect_init(sk);
3584
3585 if (unlikely(tp->repair)) {
3586 tcp_finish_connect(sk, NULL);
3587 return 0;
3588 }
3589
3590 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3591 if (unlikely(!buff))
3592 return -ENOBUFS;
3593
3594 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3595 tcp_mstamp_refresh(tp);
3596 tp->retrans_stamp = tcp_time_stamp(tp);
3597 tcp_connect_queue_skb(sk, buff);
3598 tcp_ecn_send_syn(sk, buff);
3599 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3600
3601
3602 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3603 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3604 if (err == -ECONNREFUSED)
3605 return err;
3606
3607
3608
3609
3610 WRITE_ONCE(tp->snd_nxt, tp->write_seq);
3611 tp->pushed_seq = tp->write_seq;
3612 buff = tcp_send_head(sk);
3613 if (unlikely(buff)) {
3614 WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq);
3615 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
3616 }
3617 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3618
3619
3620 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3621 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3622 return 0;
3623 }
3624 EXPORT_SYMBOL(tcp_connect);
3625
3626
3627
3628
3629
3630 void tcp_send_delayed_ack(struct sock *sk)
3631 {
3632 struct inet_connection_sock *icsk = inet_csk(sk);
3633 int ato = icsk->icsk_ack.ato;
3634 unsigned long timeout;
3635
3636 if (ato > TCP_DELACK_MIN) {
3637 const struct tcp_sock *tp = tcp_sk(sk);
3638 int max_ato = HZ / 2;
3639
3640 if (inet_csk_in_pingpong_mode(sk) ||
3641 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3642 max_ato = TCP_DELACK_MAX;
3643
3644
3645
3646
3647
3648
3649
3650 if (tp->srtt_us) {
3651 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3652 TCP_DELACK_MIN);
3653
3654 if (rtt < max_ato)
3655 max_ato = rtt;
3656 }
3657
3658 ato = min(ato, max_ato);
3659 }
3660
3661
3662 timeout = jiffies + ato;
3663
3664
3665 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3666
3667
3668
3669 if (icsk->icsk_ack.blocked ||
3670 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3671 tcp_send_ack(sk);
3672 return;
3673 }
3674
3675 if (!time_before(timeout, icsk->icsk_ack.timeout))
3676 timeout = icsk->icsk_ack.timeout;
3677 }
3678 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3679 icsk->icsk_ack.timeout = timeout;
3680 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3681 }
3682
3683
3684 void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
3685 {
3686 struct sk_buff *buff;
3687
3688
3689 if (sk->sk_state == TCP_CLOSE)
3690 return;
3691
3692
3693
3694
3695
3696 buff = alloc_skb(MAX_TCP_HEADER,
3697 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3698 if (unlikely(!buff)) {
3699 inet_csk_schedule_ack(sk);
3700 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3701 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3702 TCP_DELACK_MAX, TCP_RTO_MAX);
3703 return;
3704 }
3705
3706
3707 skb_reserve(buff, MAX_TCP_HEADER);
3708 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3709
3710
3711
3712
3713
3714 skb_set_tcp_pure_ack(buff);
3715
3716
3717 __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
3718 }
3719 EXPORT_SYMBOL_GPL(__tcp_send_ack);
3720
3721 void tcp_send_ack(struct sock *sk)
3722 {
3723 __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
3724 }
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737 static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3738 {
3739 struct tcp_sock *tp = tcp_sk(sk);
3740 struct sk_buff *skb;
3741
3742
3743 skb = alloc_skb(MAX_TCP_HEADER,
3744 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3745 if (!skb)
3746 return -1;
3747
3748
3749 skb_reserve(skb, MAX_TCP_HEADER);
3750
3751
3752
3753
3754 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3755 NET_INC_STATS(sock_net(sk), mib);
3756 return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
3757 }
3758
3759
3760 void tcp_send_window_probe(struct sock *sk)
3761 {
3762 if (sk->sk_state == TCP_ESTABLISHED) {
3763 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3764 tcp_mstamp_refresh(tcp_sk(sk));
3765 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3766 }
3767 }
3768
3769
3770 int tcp_write_wakeup(struct sock *sk, int mib)
3771 {
3772 struct tcp_sock *tp = tcp_sk(sk);
3773 struct sk_buff *skb;
3774
3775 if (sk->sk_state == TCP_CLOSE)
3776 return -1;
3777
3778 skb = tcp_send_head(sk);
3779 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3780 int err;
3781 unsigned int mss = tcp_current_mss(sk);
3782 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3783
3784 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3785 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3786
3787
3788
3789
3790
3791 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3792 skb->len > mss) {
3793 seg_size = min(seg_size, mss);
3794 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3795 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3796 skb, seg_size, mss, GFP_ATOMIC))
3797 return -1;
3798 } else if (!tcp_skb_pcount(skb))
3799 tcp_set_skb_tso_segs(skb, mss);
3800
3801 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3802 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3803 if (!err)
3804 tcp_event_new_data_sent(sk, skb);
3805 return err;
3806 } else {
3807 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3808 tcp_xmit_probe_skb(sk, 1, mib);
3809 return tcp_xmit_probe_skb(sk, 0, mib);
3810 }
3811 }
3812
3813
3814
3815
3816 void tcp_send_probe0(struct sock *sk)
3817 {
3818 struct inet_connection_sock *icsk = inet_csk(sk);
3819 struct tcp_sock *tp = tcp_sk(sk);
3820 struct net *net = sock_net(sk);
3821 unsigned long timeout;
3822 int err;
3823
3824 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3825
3826 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3827
3828 icsk->icsk_probes_out = 0;
3829 icsk->icsk_backoff = 0;
3830 return;
3831 }
3832
3833 icsk->icsk_probes_out++;
3834 if (err <= 0) {
3835 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3836 icsk->icsk_backoff++;
3837 timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
3838 } else {
3839
3840
3841
3842 timeout = TCP_RESOURCE_PROBE_INTERVAL;
3843 }
3844 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
3845 }
3846
3847 int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3848 {
3849 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3850 struct flowi fl;
3851 int res;
3852
3853 tcp_rsk(req)->txhash = net_tx_rndhash();
3854 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
3855 if (!res) {
3856 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3857 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3858 if (unlikely(tcp_passive_fastopen(sk)))
3859 tcp_sk(sk)->total_retrans++;
3860 trace_tcp_retransmit_synack(sk, req);
3861 }
3862 return res;
3863 }
3864 EXPORT_SYMBOL(tcp_rtx_synack);