This source file includes following definitions.
- clean_acked_data_enable
- clean_acked_data_disable
- clean_acked_data_flush
- tcp_gro_dev_warn
- tcp_measure_rcv_mss
- tcp_incr_quickack
- tcp_enter_quickack_mode
- tcp_in_quickack_mode
- tcp_ecn_queue_cwr
- tcp_ecn_accept_cwr
- tcp_ecn_withdraw_cwr
- __tcp_ecn_check_ce
- tcp_ecn_check_ce
- tcp_ecn_rcv_synack
- tcp_ecn_rcv_syn
- tcp_ecn_rcv_ecn_echo
- tcp_sndbuf_expand
- __tcp_grow_window
- tcp_grow_window
- tcp_init_buffer_space
- tcp_clamp_window
- tcp_initialize_rcv_mss
- tcp_rcv_rtt_update
- tcp_rcv_rtt_measure
- tcp_rcv_rtt_measure_ts
- tcp_rcv_space_adjust
- tcp_event_data_recv
- tcp_rtt_estimator
- tcp_update_pacing_rate
- tcp_set_rto
- tcp_init_cwnd
- tcp_dsack_seen
- tcp_check_sack_reordering
- tcp_verify_retransmit_hint
- tcp_sum_lost
- tcp_skb_mark_lost
- tcp_skb_mark_lost_uncond_verify
- tcp_is_sackblock_valid
- tcp_check_dsack
- tcp_match_skb_to_sack
- tcp_sacktag_one
- tcp_shifted_skb
- tcp_skb_seglen
- skb_can_shift
- tcp_skb_shift
- tcp_shift_skb_data
- tcp_sacktag_walk
- tcp_sacktag_bsearch
- tcp_sacktag_skip
- tcp_maybe_skipping_dsack
- tcp_sack_cache_ok
- tcp_sacktag_write_queue
- tcp_limit_reno_sacked
- tcp_check_reno_reordering
- tcp_add_reno_sack
- tcp_remove_reno_sacks
- tcp_reset_reno_sack
- tcp_clear_retrans
- tcp_init_undo
- tcp_is_rack
- tcp_timeout_mark_lost
- tcp_enter_loss
- tcp_check_sack_reneging
- tcp_dupack_heuristics
- tcp_time_to_recover
- tcp_mark_head_lost
- tcp_update_scoreboard
- tcp_tsopt_ecr_before
- tcp_skb_spurious_retrans
- tcp_packet_delayed
- tcp_any_retrans_done
- DBGUNDO
- tcp_undo_cwnd_reduction
- tcp_may_undo
- tcp_try_undo_recovery
- tcp_try_undo_dsack
- tcp_try_undo_loss
- tcp_init_cwnd_reduction
- tcp_cwnd_reduction
- tcp_end_cwnd_reduction
- tcp_enter_cwr
- tcp_try_keep_open
- tcp_try_to_open
- tcp_mtup_probe_failed
- tcp_mtup_probe_success
- tcp_simple_retransmit
- tcp_enter_recovery
- tcp_process_loss
- tcp_try_undo_partial
- tcp_identify_packet_loss
- tcp_force_fast_retransmit
- tcp_fastretrans_alert
- tcp_update_rtt_min
- tcp_ack_update_rtt
- tcp_synack_rtt_meas
- tcp_cong_avoid
- tcp_rearm_rto
- tcp_set_xmit_timer
- tcp_tso_acked
- tcp_ack_tstamp
- tcp_clean_rtx_queue
- tcp_ack_probe
- tcp_ack_is_dubious
- tcp_may_raise_cwnd
- tcp_cong_control
- tcp_may_update_window
- tcp_snd_una_update
- tcp_rcv_nxt_update
- tcp_ack_update_window
- __tcp_oow_rate_limited
- tcp_oow_rate_limited
- tcp_send_challenge_ack
- tcp_store_ts_recent
- tcp_replace_ts_recent
- tcp_process_tlp_ack
- tcp_in_ack_event
- tcp_xmit_recovery
- tcp_newly_delivered
- tcp_ack
- tcp_parse_fastopen_option
- smc_parse_options
- tcp_parse_mss_option
- tcp_parse_options
- tcp_parse_aligned_timestamp
- tcp_fast_parse_options
- tcp_parse_md5sig_option
- tcp_disordered_ack
- tcp_paws_discard
- tcp_sequence
- tcp_reset
- tcp_fin
- tcp_sack_extend
- tcp_dsack_set
- tcp_dsack_extend
- tcp_rcv_spurious_retrans
- tcp_send_dupack
- tcp_sack_maybe_coalesce
- tcp_sack_new_ofo_skb
- tcp_sack_remove
- tcp_try_coalesce
- tcp_ooo_try_coalesce
- tcp_drop
- tcp_ofo_queue
- tcp_try_rmem_schedule
- tcp_data_queue_ofo
- tcp_queue_rcv
- tcp_send_rcvq
- tcp_data_ready
- tcp_data_queue
- tcp_skb_next
- tcp_collapse_one
- tcp_rbtree_insert
- tcp_collapse
- tcp_collapse_ofo_queue
- tcp_prune_ofo_queue
- tcp_prune_queue
- tcp_should_expand_sndbuf
- tcp_new_space
- tcp_check_space
- tcp_data_snd_check
- __tcp_ack_snd_check
- tcp_ack_snd_check
- tcp_check_urg
- tcp_urg
- tcp_reset_check
- tcp_validate_incoming
- tcp_rcv_established
- tcp_init_transfer
- tcp_finish_connect
- tcp_rcv_fastopen_synack
- smc_check_reset_syn
- tcp_try_undo_spurious_syn
- tcp_rcv_synsent_state_process
- tcp_rcv_synrecv_state_fastopen
- tcp_rcv_state_process
- pr_drop_req
- tcp_ecn_create_request
- tcp_openreq_init
- inet_reqsk_alloc
- tcp_syn_flood_action
- tcp_reqsk_record_syn
- tcp_get_syncookie_mss
- tcp_conn_request
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 #define pr_fmt(fmt) "TCP: " fmt
66
67 #include <linux/mm.h>
68 #include <linux/slab.h>
69 #include <linux/module.h>
70 #include <linux/sysctl.h>
71 #include <linux/kernel.h>
72 #include <linux/prefetch.h>
73 #include <net/dst.h>
74 #include <net/tcp.h>
75 #include <net/inet_common.h>
76 #include <linux/ipsec.h>
77 #include <asm/unaligned.h>
78 #include <linux/errqueue.h>
79 #include <trace/events/tcp.h>
80 #include <linux/jump_label_ratelimit.h>
81 #include <net/busy_poll.h>
82
83 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
84
85 #define FLAG_DATA 0x01
86 #define FLAG_WIN_UPDATE 0x02
87 #define FLAG_DATA_ACKED 0x04
88 #define FLAG_RETRANS_DATA_ACKED 0x08
89 #define FLAG_SYN_ACKED 0x10
90 #define FLAG_DATA_SACKED 0x20
91 #define FLAG_ECE 0x40
92 #define FLAG_LOST_RETRANS 0x80
93 #define FLAG_SLOWPATH 0x100
94 #define FLAG_ORIG_SACK_ACKED 0x200
95 #define FLAG_SND_UNA_ADVANCED 0x400
96 #define FLAG_DSACKING_ACK 0x800
97 #define FLAG_SET_XMIT_TIMER 0x1000
98 #define FLAG_SACK_RENEGING 0x2000
99 #define FLAG_UPDATE_TS_RECENT 0x4000
100 #define FLAG_NO_CHALLENGE_ACK 0x8000
101 #define FLAG_ACK_MAYBE_DELAYED 0x10000
102
103 #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
104 #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
105 #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
106 #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
107
108 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
109 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
110
111 #define REXMIT_NONE 0
112 #define REXMIT_LOST 1
113 #define REXMIT_NEW 2
114
115 #if IS_ENABLED(CONFIG_TLS_DEVICE)
116 static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
117
118 void clean_acked_data_enable(struct inet_connection_sock *icsk,
119 void (*cad)(struct sock *sk, u32 ack_seq))
120 {
121 icsk->icsk_clean_acked = cad;
122 static_branch_deferred_inc(&clean_acked_data_enabled);
123 }
124 EXPORT_SYMBOL_GPL(clean_acked_data_enable);
125
126 void clean_acked_data_disable(struct inet_connection_sock *icsk)
127 {
128 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
129 icsk->icsk_clean_acked = NULL;
130 }
131 EXPORT_SYMBOL_GPL(clean_acked_data_disable);
132
133 void clean_acked_data_flush(void)
134 {
135 static_key_deferred_flush(&clean_acked_data_enabled);
136 }
137 EXPORT_SYMBOL_GPL(clean_acked_data_flush);
138 #endif
139
140 static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
141 unsigned int len)
142 {
143 static bool __once __read_mostly;
144
145 if (!__once) {
146 struct net_device *dev;
147
148 __once = true;
149
150 rcu_read_lock();
151 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
152 if (!dev || len >= dev->mtu)
153 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
154 dev ? dev->name : "Unknown driver");
155 rcu_read_unlock();
156 }
157 }
158
159
160
161
162 static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
163 {
164 struct inet_connection_sock *icsk = inet_csk(sk);
165 const unsigned int lss = icsk->icsk_ack.last_seg_size;
166 unsigned int len;
167
168 icsk->icsk_ack.last_seg_size = 0;
169
170
171
172
173 len = skb_shinfo(skb)->gso_size ? : skb->len;
174 if (len >= icsk->icsk_ack.rcv_mss) {
175 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
176 tcp_sk(sk)->advmss);
177
178 if (unlikely(len > icsk->icsk_ack.rcv_mss +
179 MAX_TCP_OPTION_SPACE))
180 tcp_gro_dev_warn(sk, skb, len);
181 } else {
182
183
184
185
186
187 len += skb->data - skb_transport_header(skb);
188 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
189
190
191
192
193
194 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
195 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
196
197
198
199
200 len -= tcp_sk(sk)->tcp_header_len;
201 icsk->icsk_ack.last_seg_size = len;
202 if (len == lss) {
203 icsk->icsk_ack.rcv_mss = len;
204 return;
205 }
206 }
207 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
208 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
209 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
210 }
211 }
212
213 static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
214 {
215 struct inet_connection_sock *icsk = inet_csk(sk);
216 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
217
218 if (quickacks == 0)
219 quickacks = 2;
220 quickacks = min(quickacks, max_quickacks);
221 if (quickacks > icsk->icsk_ack.quick)
222 icsk->icsk_ack.quick = quickacks;
223 }
224
225 void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
226 {
227 struct inet_connection_sock *icsk = inet_csk(sk);
228
229 tcp_incr_quickack(sk, max_quickacks);
230 inet_csk_exit_pingpong_mode(sk);
231 icsk->icsk_ack.ato = TCP_ATO_MIN;
232 }
233 EXPORT_SYMBOL(tcp_enter_quickack_mode);
234
235
236
237
238
239 static bool tcp_in_quickack_mode(struct sock *sk)
240 {
241 const struct inet_connection_sock *icsk = inet_csk(sk);
242 const struct dst_entry *dst = __sk_dst_get(sk);
243
244 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
245 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
246 }
247
248 static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
249 {
250 if (tp->ecn_flags & TCP_ECN_OK)
251 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
252 }
253
254 static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
255 {
256 if (tcp_hdr(skb)->cwr) {
257 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
258
259
260
261
262
263 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
264 }
265 }
266
267 static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
268 {
269 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
270 }
271
272 static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
273 {
274 struct tcp_sock *tp = tcp_sk(sk);
275
276 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
277 case INET_ECN_NOT_ECT:
278
279
280
281
282 if (tp->ecn_flags & TCP_ECN_SEEN)
283 tcp_enter_quickack_mode(sk, 2);
284 break;
285 case INET_ECN_CE:
286 if (tcp_ca_needs_ecn(sk))
287 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
288
289 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
290
291 tcp_enter_quickack_mode(sk, 2);
292 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
293 }
294 tp->ecn_flags |= TCP_ECN_SEEN;
295 break;
296 default:
297 if (tcp_ca_needs_ecn(sk))
298 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
299 tp->ecn_flags |= TCP_ECN_SEEN;
300 break;
301 }
302 }
303
304 static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
305 {
306 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
307 __tcp_ecn_check_ce(sk, skb);
308 }
309
310 static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
311 {
312 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
313 tp->ecn_flags &= ~TCP_ECN_OK;
314 }
315
316 static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
317 {
318 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
319 tp->ecn_flags &= ~TCP_ECN_OK;
320 }
321
322 static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
323 {
324 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
325 return true;
326 return false;
327 }
328
329
330
331
332
333
334 static void tcp_sndbuf_expand(struct sock *sk)
335 {
336 const struct tcp_sock *tp = tcp_sk(sk);
337 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
338 int sndmem, per_mss;
339 u32 nr_segs;
340
341
342
343
344 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
345 MAX_TCP_HEADER +
346 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
347
348 per_mss = roundup_pow_of_two(per_mss) +
349 SKB_DATA_ALIGN(sizeof(struct sk_buff));
350
351 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
352 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
353
354
355
356
357
358 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
359 sndmem *= nr_segs * per_mss;
360
361 if (sk->sk_sndbuf < sndmem)
362 WRITE_ONCE(sk->sk_sndbuf,
363 min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
364 }
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392 static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
393 {
394 struct tcp_sock *tp = tcp_sk(sk);
395
396 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
397 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
398
399 while (tp->rcv_ssthresh <= window) {
400 if (truesize <= skb->len)
401 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
402
403 truesize >>= 1;
404 window >>= 1;
405 }
406 return 0;
407 }
408
409 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
410 {
411 struct tcp_sock *tp = tcp_sk(sk);
412 int room;
413
414 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
415
416
417 if (room > 0 && !tcp_under_memory_pressure(sk)) {
418 int incr;
419
420
421
422
423 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
424 incr = 2 * tp->advmss;
425 else
426 incr = __tcp_grow_window(sk, skb);
427
428 if (incr) {
429 incr = max_t(int, incr, 2 * skb->len);
430 tp->rcv_ssthresh += min(room, incr);
431 inet_csk(sk)->icsk_ack.quick |= 1;
432 }
433 }
434 }
435
436
437
438
439 void tcp_init_buffer_space(struct sock *sk)
440 {
441 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
442 struct tcp_sock *tp = tcp_sk(sk);
443 int maxwin;
444
445 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
446 tcp_sndbuf_expand(sk);
447
448 tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
449 tcp_mstamp_refresh(tp);
450 tp->rcvq_space.time = tp->tcp_mstamp;
451 tp->rcvq_space.seq = tp->copied_seq;
452
453 maxwin = tcp_full_space(sk);
454
455 if (tp->window_clamp >= maxwin) {
456 tp->window_clamp = maxwin;
457
458 if (tcp_app_win && maxwin > 4 * tp->advmss)
459 tp->window_clamp = max(maxwin -
460 (maxwin >> tcp_app_win),
461 4 * tp->advmss);
462 }
463
464
465 if (tcp_app_win &&
466 tp->window_clamp > 2 * tp->advmss &&
467 tp->window_clamp + tp->advmss > maxwin)
468 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
469
470 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
471 tp->snd_cwnd_stamp = tcp_jiffies32;
472 }
473
474
475 static void tcp_clamp_window(struct sock *sk)
476 {
477 struct tcp_sock *tp = tcp_sk(sk);
478 struct inet_connection_sock *icsk = inet_csk(sk);
479 struct net *net = sock_net(sk);
480
481 icsk->icsk_ack.quick = 0;
482
483 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
484 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
485 !tcp_under_memory_pressure(sk) &&
486 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
487 WRITE_ONCE(sk->sk_rcvbuf,
488 min(atomic_read(&sk->sk_rmem_alloc),
489 net->ipv4.sysctl_tcp_rmem[2]));
490 }
491 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
492 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
493 }
494
495
496
497
498
499
500
501
502 void tcp_initialize_rcv_mss(struct sock *sk)
503 {
504 const struct tcp_sock *tp = tcp_sk(sk);
505 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
506
507 hint = min(hint, tp->rcv_wnd / 2);
508 hint = min(hint, TCP_MSS_DEFAULT);
509 hint = max(hint, TCP_MIN_MSS);
510
511 inet_csk(sk)->icsk_ack.rcv_mss = hint;
512 }
513 EXPORT_SYMBOL(tcp_initialize_rcv_mss);
514
515
516
517
518
519
520
521
522
523
524
525
526 static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
527 {
528 u32 new_sample = tp->rcv_rtt_est.rtt_us;
529 long m = sample;
530
531 if (new_sample != 0) {
532
533
534
535
536
537
538
539
540
541
542 if (!win_dep) {
543 m -= (new_sample >> 3);
544 new_sample += m;
545 } else {
546 m <<= 3;
547 if (m < new_sample)
548 new_sample = m;
549 }
550 } else {
551
552 new_sample = m << 3;
553 }
554
555 tp->rcv_rtt_est.rtt_us = new_sample;
556 }
557
558 static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
559 {
560 u32 delta_us;
561
562 if (tp->rcv_rtt_est.time == 0)
563 goto new_measure;
564 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
565 return;
566 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
567 if (!delta_us)
568 delta_us = 1;
569 tcp_rcv_rtt_update(tp, delta_us, 1);
570
571 new_measure:
572 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
573 tp->rcv_rtt_est.time = tp->tcp_mstamp;
574 }
575
576 static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
577 const struct sk_buff *skb)
578 {
579 struct tcp_sock *tp = tcp_sk(sk);
580
581 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
582 return;
583 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
584
585 if (TCP_SKB_CB(skb)->end_seq -
586 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
587 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
588 u32 delta_us;
589
590 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
591 if (!delta)
592 delta = 1;
593 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
594 tcp_rcv_rtt_update(tp, delta_us, 0);
595 }
596 }
597 }
598
599
600
601
602
603 void tcp_rcv_space_adjust(struct sock *sk)
604 {
605 struct tcp_sock *tp = tcp_sk(sk);
606 u32 copied;
607 int time;
608
609 trace_tcp_rcv_space_adjust(sk);
610
611 tcp_mstamp_refresh(tp);
612 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
613 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
614 return;
615
616
617 copied = tp->copied_seq - tp->rcvq_space.seq;
618 if (copied <= tp->rcvq_space.space)
619 goto new_measure;
620
621
622
623
624
625
626
627
628
629
630 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
631 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
632 int rcvmem, rcvbuf;
633 u64 rcvwin, grow;
634
635
636
637
638 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
639
640
641 grow = rcvwin * (copied - tp->rcvq_space.space);
642 do_div(grow, tp->rcvq_space.space);
643 rcvwin += (grow << 1);
644
645 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
646 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
647 rcvmem += 128;
648
649 do_div(rcvwin, tp->advmss);
650 rcvbuf = min_t(u64, rcvwin * rcvmem,
651 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
652 if (rcvbuf > sk->sk_rcvbuf) {
653 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
654
655
656 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
657 }
658 }
659 tp->rcvq_space.space = copied;
660
661 new_measure:
662 tp->rcvq_space.seq = tp->copied_seq;
663 tp->rcvq_space.time = tp->tcp_mstamp;
664 }
665
666
667
668
669
670
671
672
673
674
675
676 static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
677 {
678 struct tcp_sock *tp = tcp_sk(sk);
679 struct inet_connection_sock *icsk = inet_csk(sk);
680 u32 now;
681
682 inet_csk_schedule_ack(sk);
683
684 tcp_measure_rcv_mss(sk, skb);
685
686 tcp_rcv_rtt_measure(tp);
687
688 now = tcp_jiffies32;
689
690 if (!icsk->icsk_ack.ato) {
691
692
693
694 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
695 icsk->icsk_ack.ato = TCP_ATO_MIN;
696 } else {
697 int m = now - icsk->icsk_ack.lrcvtime;
698
699 if (m <= TCP_ATO_MIN / 2) {
700
701 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
702 } else if (m < icsk->icsk_ack.ato) {
703 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
704 if (icsk->icsk_ack.ato > icsk->icsk_rto)
705 icsk->icsk_ack.ato = icsk->icsk_rto;
706 } else if (m > icsk->icsk_rto) {
707
708
709
710 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
711 sk_mem_reclaim(sk);
712 }
713 }
714 icsk->icsk_ack.lrcvtime = now;
715
716 tcp_ecn_check_ce(sk, skb);
717
718 if (skb->len >= 128)
719 tcp_grow_window(sk, skb);
720 }
721
722
723
724
725
726
727
728
729
730
731 static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
732 {
733 struct tcp_sock *tp = tcp_sk(sk);
734 long m = mrtt_us;
735 u32 srtt = tp->srtt_us;
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753 if (srtt != 0) {
754 m -= (srtt >> 3);
755 srtt += m;
756 if (m < 0) {
757 m = -m;
758 m -= (tp->mdev_us >> 2);
759
760
761
762
763
764
765
766
767 if (m > 0)
768 m >>= 3;
769 } else {
770 m -= (tp->mdev_us >> 2);
771 }
772 tp->mdev_us += m;
773 if (tp->mdev_us > tp->mdev_max_us) {
774 tp->mdev_max_us = tp->mdev_us;
775 if (tp->mdev_max_us > tp->rttvar_us)
776 tp->rttvar_us = tp->mdev_max_us;
777 }
778 if (after(tp->snd_una, tp->rtt_seq)) {
779 if (tp->mdev_max_us < tp->rttvar_us)
780 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
781 tp->rtt_seq = tp->snd_nxt;
782 tp->mdev_max_us = tcp_rto_min_us(sk);
783
784 tcp_bpf_rtt(sk);
785 }
786 } else {
787
788 srtt = m << 3;
789 tp->mdev_us = m << 1;
790 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
791 tp->mdev_max_us = tp->rttvar_us;
792 tp->rtt_seq = tp->snd_nxt;
793
794 tcp_bpf_rtt(sk);
795 }
796 tp->srtt_us = max(1U, srtt);
797 }
798
799 static void tcp_update_pacing_rate(struct sock *sk)
800 {
801 const struct tcp_sock *tp = tcp_sk(sk);
802 u64 rate;
803
804
805 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
806
807
808
809
810
811
812
813
814
815 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
816 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
817 else
818 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
819
820 rate *= max(tp->snd_cwnd, tp->packets_out);
821
822 if (likely(tp->srtt_us))
823 do_div(rate, tp->srtt_us);
824
825
826
827
828
829 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
830 sk->sk_max_pacing_rate));
831 }
832
833
834
835
836 static void tcp_set_rto(struct sock *sk)
837 {
838 const struct tcp_sock *tp = tcp_sk(sk);
839
840
841
842
843
844
845
846
847
848
849 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
850
851
852
853
854
855
856
857
858
859
860 tcp_bound_rto(sk);
861 }
862
863 __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
864 {
865 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
866
867 if (!cwnd)
868 cwnd = TCP_INIT_CWND;
869 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
870 }
871
872
873 static void tcp_dsack_seen(struct tcp_sock *tp)
874 {
875 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
876 tp->rack.dsack_seen = 1;
877 tp->dsack_dups++;
878 }
879
880
881
882
883
884 static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
885 const int ts)
886 {
887 struct tcp_sock *tp = tcp_sk(sk);
888 const u32 mss = tp->mss_cache;
889 u32 fack, metric;
890
891 fack = tcp_highest_sack_seq(tp);
892 if (!before(low_seq, fack))
893 return;
894
895 metric = fack - low_seq;
896 if ((metric > tp->reordering * mss) && mss) {
897 #if FASTRETRANS_DEBUG > 1
898 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
899 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
900 tp->reordering,
901 0,
902 tp->sacked_out,
903 tp->undo_marker ? tp->undo_retrans : 0);
904 #endif
905 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
906 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
907 }
908
909
910 tp->reord_seen++;
911 NET_INC_STATS(sock_net(sk),
912 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
913 }
914
915
916 static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
917 {
918 if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
919 (tp->retransmit_skb_hint &&
920 before(TCP_SKB_CB(skb)->seq,
921 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
922 tp->retransmit_skb_hint = skb;
923 }
924
925
926
927
928
929
930
931
932 static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
933 {
934 __u8 sacked = TCP_SKB_CB(skb)->sacked;
935
936 if (!(sacked & TCPCB_LOST) ||
937 ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
938 tp->lost += tcp_skb_pcount(skb);
939 }
940
941 static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
942 {
943 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
944 tcp_verify_retransmit_hint(tp, skb);
945
946 tp->lost_out += tcp_skb_pcount(skb);
947 tcp_sum_lost(tp, skb);
948 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
949 }
950 }
951
952 void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
953 {
954 tcp_verify_retransmit_hint(tp, skb);
955
956 tcp_sum_lost(tp, skb);
957 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
958 tp->lost_out += tcp_skb_pcount(skb);
959 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
960 }
961 }
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1057 u32 start_seq, u32 end_seq)
1058 {
1059
1060 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1061 return false;
1062
1063
1064 if (!before(start_seq, tp->snd_nxt))
1065 return false;
1066
1067
1068
1069
1070 if (after(start_seq, tp->snd_una))
1071 return true;
1072
1073 if (!is_dsack || !tp->undo_marker)
1074 return false;
1075
1076
1077 if (after(end_seq, tp->snd_una))
1078 return false;
1079
1080 if (!before(start_seq, tp->undo_marker))
1081 return true;
1082
1083
1084 if (!after(end_seq, tp->undo_marker))
1085 return false;
1086
1087
1088
1089
1090 return !before(start_seq, end_seq - tp->max_window);
1091 }
1092
1093 static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1094 struct tcp_sack_block_wire *sp, int num_sacks,
1095 u32 prior_snd_una)
1096 {
1097 struct tcp_sock *tp = tcp_sk(sk);
1098 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1099 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1100 bool dup_sack = false;
1101
1102 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1103 dup_sack = true;
1104 tcp_dsack_seen(tp);
1105 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1106 } else if (num_sacks > 1) {
1107 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1108 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1109
1110 if (!after(end_seq_0, end_seq_1) &&
1111 !before(start_seq_0, start_seq_1)) {
1112 dup_sack = true;
1113 tcp_dsack_seen(tp);
1114 NET_INC_STATS(sock_net(sk),
1115 LINUX_MIB_TCPDSACKOFORECV);
1116 }
1117 }
1118
1119
1120 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1121 !after(end_seq_0, prior_snd_una) &&
1122 after(end_seq_0, tp->undo_marker))
1123 tp->undo_retrans--;
1124
1125 return dup_sack;
1126 }
1127
1128 struct tcp_sacktag_state {
1129 u32 reord;
1130
1131
1132
1133
1134 u64 first_sackt;
1135 u64 last_sackt;
1136 struct rate_sample *rate;
1137 int flag;
1138 unsigned int mss_now;
1139 };
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1150 u32 start_seq, u32 end_seq)
1151 {
1152 int err;
1153 bool in_sack;
1154 unsigned int pkt_len;
1155 unsigned int mss;
1156
1157 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1158 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1159
1160 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1161 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1162 mss = tcp_skb_mss(skb);
1163 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1164
1165 if (!in_sack) {
1166 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1167 if (pkt_len < mss)
1168 pkt_len = mss;
1169 } else {
1170 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1171 if (pkt_len < mss)
1172 return -EINVAL;
1173 }
1174
1175
1176
1177
1178 if (pkt_len > mss) {
1179 unsigned int new_len = (pkt_len / mss) * mss;
1180 if (!in_sack && new_len < pkt_len)
1181 new_len += mss;
1182 pkt_len = new_len;
1183 }
1184
1185 if (pkt_len >= skb->len && !in_sack)
1186 return 0;
1187
1188 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1189 pkt_len, mss, GFP_ATOMIC);
1190 if (err < 0)
1191 return err;
1192 }
1193
1194 return in_sack;
1195 }
1196
1197
1198 static u8 tcp_sacktag_one(struct sock *sk,
1199 struct tcp_sacktag_state *state, u8 sacked,
1200 u32 start_seq, u32 end_seq,
1201 int dup_sack, int pcount,
1202 u64 xmit_time)
1203 {
1204 struct tcp_sock *tp = tcp_sk(sk);
1205
1206
1207 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1208 if (tp->undo_marker && tp->undo_retrans > 0 &&
1209 after(end_seq, tp->undo_marker))
1210 tp->undo_retrans--;
1211 if ((sacked & TCPCB_SACKED_ACKED) &&
1212 before(start_seq, state->reord))
1213 state->reord = start_seq;
1214 }
1215
1216
1217 if (!after(end_seq, tp->snd_una))
1218 return sacked;
1219
1220 if (!(sacked & TCPCB_SACKED_ACKED)) {
1221 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1222
1223 if (sacked & TCPCB_SACKED_RETRANS) {
1224
1225
1226
1227
1228 if (sacked & TCPCB_LOST) {
1229 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1230 tp->lost_out -= pcount;
1231 tp->retrans_out -= pcount;
1232 }
1233 } else {
1234 if (!(sacked & TCPCB_RETRANS)) {
1235
1236
1237
1238 if (before(start_seq,
1239 tcp_highest_sack_seq(tp)) &&
1240 before(start_seq, state->reord))
1241 state->reord = start_seq;
1242
1243 if (!after(end_seq, tp->high_seq))
1244 state->flag |= FLAG_ORIG_SACK_ACKED;
1245 if (state->first_sackt == 0)
1246 state->first_sackt = xmit_time;
1247 state->last_sackt = xmit_time;
1248 }
1249
1250 if (sacked & TCPCB_LOST) {
1251 sacked &= ~TCPCB_LOST;
1252 tp->lost_out -= pcount;
1253 }
1254 }
1255
1256 sacked |= TCPCB_SACKED_ACKED;
1257 state->flag |= FLAG_DATA_SACKED;
1258 tp->sacked_out += pcount;
1259 tp->delivered += pcount;
1260
1261
1262 if (tp->lost_skb_hint &&
1263 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1264 tp->lost_cnt_hint += pcount;
1265 }
1266
1267
1268
1269
1270
1271 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1272 sacked &= ~TCPCB_SACKED_RETRANS;
1273 tp->retrans_out -= pcount;
1274 }
1275
1276 return sacked;
1277 }
1278
1279
1280
1281
1282 static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1283 struct sk_buff *skb,
1284 struct tcp_sacktag_state *state,
1285 unsigned int pcount, int shifted, int mss,
1286 bool dup_sack)
1287 {
1288 struct tcp_sock *tp = tcp_sk(sk);
1289 u32 start_seq = TCP_SKB_CB(skb)->seq;
1290 u32 end_seq = start_seq + shifted;
1291
1292 BUG_ON(!pcount);
1293
1294
1295
1296
1297
1298
1299
1300 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1301 start_seq, end_seq, dup_sack, pcount,
1302 tcp_skb_timestamp_us(skb));
1303 tcp_rate_skb_delivered(sk, skb, state->rate);
1304
1305 if (skb == tp->lost_skb_hint)
1306 tp->lost_cnt_hint += pcount;
1307
1308 TCP_SKB_CB(prev)->end_seq += shifted;
1309 TCP_SKB_CB(skb)->seq += shifted;
1310
1311 tcp_skb_pcount_add(prev, pcount);
1312 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1313 tcp_skb_pcount_add(skb, -pcount);
1314
1315
1316
1317
1318
1319
1320 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1321 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1322
1323
1324 if (tcp_skb_pcount(skb) <= 1)
1325 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1326
1327
1328 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1329
1330 if (skb->len > 0) {
1331 BUG_ON(!tcp_skb_pcount(skb));
1332 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1333 return false;
1334 }
1335
1336
1337
1338 if (skb == tp->retransmit_skb_hint)
1339 tp->retransmit_skb_hint = prev;
1340 if (skb == tp->lost_skb_hint) {
1341 tp->lost_skb_hint = prev;
1342 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1343 }
1344
1345 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1346 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1347 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1348 TCP_SKB_CB(prev)->end_seq++;
1349
1350 if (skb == tcp_highest_sack(sk))
1351 tcp_advance_highest_sack(sk, skb);
1352
1353 tcp_skb_collapse_tstamp(prev, skb);
1354 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1355 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1356
1357 tcp_rtx_queue_unlink_and_free(skb, sk);
1358
1359 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1360
1361 return true;
1362 }
1363
1364
1365
1366
1367 static int tcp_skb_seglen(const struct sk_buff *skb)
1368 {
1369 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1370 }
1371
1372
1373 static int skb_can_shift(const struct sk_buff *skb)
1374 {
1375 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1376 }
1377
1378 int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1379 int pcount, int shiftlen)
1380 {
1381
1382
1383
1384
1385
1386 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1387 return 0;
1388 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1389 return 0;
1390 return skb_shift(to, from, shiftlen);
1391 }
1392
1393
1394
1395
1396 static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1397 struct tcp_sacktag_state *state,
1398 u32 start_seq, u32 end_seq,
1399 bool dup_sack)
1400 {
1401 struct tcp_sock *tp = tcp_sk(sk);
1402 struct sk_buff *prev;
1403 int mss;
1404 int pcount = 0;
1405 int len;
1406 int in_sack;
1407
1408
1409 if (!dup_sack &&
1410 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1411 goto fallback;
1412 if (!skb_can_shift(skb))
1413 goto fallback;
1414
1415 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1416 goto fallback;
1417
1418
1419 prev = skb_rb_prev(skb);
1420 if (!prev)
1421 goto fallback;
1422
1423 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1424 goto fallback;
1425
1426 if (!tcp_skb_can_collapse_to(prev))
1427 goto fallback;
1428
1429 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1430 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1431
1432 if (in_sack) {
1433 len = skb->len;
1434 pcount = tcp_skb_pcount(skb);
1435 mss = tcp_skb_seglen(skb);
1436
1437
1438
1439
1440 if (mss != tcp_skb_seglen(prev))
1441 goto fallback;
1442 } else {
1443 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1444 goto noop;
1445
1446
1447
1448
1449 if (tcp_skb_pcount(skb) <= 1)
1450 goto noop;
1451
1452 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1453 if (!in_sack) {
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465 goto fallback;
1466 }
1467
1468 len = end_seq - TCP_SKB_CB(skb)->seq;
1469 BUG_ON(len < 0);
1470 BUG_ON(len > skb->len);
1471
1472
1473
1474
1475
1476 mss = tcp_skb_mss(skb);
1477
1478
1479
1480
1481 if (mss != tcp_skb_seglen(prev))
1482 goto fallback;
1483
1484 if (len == mss) {
1485 pcount = 1;
1486 } else if (len < mss) {
1487 goto noop;
1488 } else {
1489 pcount = len / mss;
1490 len = pcount * mss;
1491 }
1492 }
1493
1494
1495 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1496 goto fallback;
1497
1498 if (!tcp_skb_shift(prev, skb, pcount, len))
1499 goto fallback;
1500 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1501 goto out;
1502
1503
1504
1505
1506 skb = skb_rb_next(prev);
1507 if (!skb)
1508 goto out;
1509
1510 if (!skb_can_shift(skb) ||
1511 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1512 (mss != tcp_skb_seglen(skb)))
1513 goto out;
1514
1515 len = skb->len;
1516 pcount = tcp_skb_pcount(skb);
1517 if (tcp_skb_shift(prev, skb, pcount, len))
1518 tcp_shifted_skb(sk, prev, skb, state, pcount,
1519 len, mss, 0);
1520
1521 out:
1522 return prev;
1523
1524 noop:
1525 return skb;
1526
1527 fallback:
1528 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1529 return NULL;
1530 }
1531
1532 static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1533 struct tcp_sack_block *next_dup,
1534 struct tcp_sacktag_state *state,
1535 u32 start_seq, u32 end_seq,
1536 bool dup_sack_in)
1537 {
1538 struct tcp_sock *tp = tcp_sk(sk);
1539 struct sk_buff *tmp;
1540
1541 skb_rbtree_walk_from(skb) {
1542 int in_sack = 0;
1543 bool dup_sack = dup_sack_in;
1544
1545
1546 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1547 break;
1548
1549 if (next_dup &&
1550 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1551 in_sack = tcp_match_skb_to_sack(sk, skb,
1552 next_dup->start_seq,
1553 next_dup->end_seq);
1554 if (in_sack > 0)
1555 dup_sack = true;
1556 }
1557
1558
1559
1560
1561
1562 if (in_sack <= 0) {
1563 tmp = tcp_shift_skb_data(sk, skb, state,
1564 start_seq, end_seq, dup_sack);
1565 if (tmp) {
1566 if (tmp != skb) {
1567 skb = tmp;
1568 continue;
1569 }
1570
1571 in_sack = 0;
1572 } else {
1573 in_sack = tcp_match_skb_to_sack(sk, skb,
1574 start_seq,
1575 end_seq);
1576 }
1577 }
1578
1579 if (unlikely(in_sack < 0))
1580 break;
1581
1582 if (in_sack) {
1583 TCP_SKB_CB(skb)->sacked =
1584 tcp_sacktag_one(sk,
1585 state,
1586 TCP_SKB_CB(skb)->sacked,
1587 TCP_SKB_CB(skb)->seq,
1588 TCP_SKB_CB(skb)->end_seq,
1589 dup_sack,
1590 tcp_skb_pcount(skb),
1591 tcp_skb_timestamp_us(skb));
1592 tcp_rate_skb_delivered(sk, skb, state->rate);
1593 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1594 list_del_init(&skb->tcp_tsorted_anchor);
1595
1596 if (!before(TCP_SKB_CB(skb)->seq,
1597 tcp_highest_sack_seq(tp)))
1598 tcp_advance_highest_sack(sk, skb);
1599 }
1600 }
1601 return skb;
1602 }
1603
1604 static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1605 {
1606 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1607 struct sk_buff *skb;
1608
1609 while (*p) {
1610 parent = *p;
1611 skb = rb_to_skb(parent);
1612 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1613 p = &parent->rb_left;
1614 continue;
1615 }
1616 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1617 p = &parent->rb_right;
1618 continue;
1619 }
1620 return skb;
1621 }
1622 return NULL;
1623 }
1624
1625 static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1626 u32 skip_to_seq)
1627 {
1628 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1629 return skb;
1630
1631 return tcp_sacktag_bsearch(sk, skip_to_seq);
1632 }
1633
1634 static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1635 struct sock *sk,
1636 struct tcp_sack_block *next_dup,
1637 struct tcp_sacktag_state *state,
1638 u32 skip_to_seq)
1639 {
1640 if (!next_dup)
1641 return skb;
1642
1643 if (before(next_dup->start_seq, skip_to_seq)) {
1644 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1645 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1646 next_dup->start_seq, next_dup->end_seq,
1647 1);
1648 }
1649
1650 return skb;
1651 }
1652
1653 static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1654 {
1655 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1656 }
1657
1658 static int
1659 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1660 u32 prior_snd_una, struct tcp_sacktag_state *state)
1661 {
1662 struct tcp_sock *tp = tcp_sk(sk);
1663 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1664 TCP_SKB_CB(ack_skb)->sacked);
1665 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1666 struct tcp_sack_block sp[TCP_NUM_SACKS];
1667 struct tcp_sack_block *cache;
1668 struct sk_buff *skb;
1669 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1670 int used_sacks;
1671 bool found_dup_sack = false;
1672 int i, j;
1673 int first_sack_index;
1674
1675 state->flag = 0;
1676 state->reord = tp->snd_nxt;
1677
1678 if (!tp->sacked_out)
1679 tcp_highest_sack_reset(sk);
1680
1681 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1682 num_sacks, prior_snd_una);
1683 if (found_dup_sack) {
1684 state->flag |= FLAG_DSACKING_ACK;
1685 tp->delivered++;
1686 }
1687
1688
1689
1690
1691
1692 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1693 return 0;
1694
1695 if (!tp->packets_out)
1696 goto out;
1697
1698 used_sacks = 0;
1699 first_sack_index = 0;
1700 for (i = 0; i < num_sacks; i++) {
1701 bool dup_sack = !i && found_dup_sack;
1702
1703 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1704 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1705
1706 if (!tcp_is_sackblock_valid(tp, dup_sack,
1707 sp[used_sacks].start_seq,
1708 sp[used_sacks].end_seq)) {
1709 int mib_idx;
1710
1711 if (dup_sack) {
1712 if (!tp->undo_marker)
1713 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1714 else
1715 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1716 } else {
1717
1718 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1719 !after(sp[used_sacks].end_seq, tp->snd_una))
1720 continue;
1721 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1722 }
1723
1724 NET_INC_STATS(sock_net(sk), mib_idx);
1725 if (i == 0)
1726 first_sack_index = -1;
1727 continue;
1728 }
1729
1730
1731 if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
1732 if (i == 0)
1733 first_sack_index = -1;
1734 continue;
1735 }
1736
1737 used_sacks++;
1738 }
1739
1740
1741 for (i = used_sacks - 1; i > 0; i--) {
1742 for (j = 0; j < i; j++) {
1743 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1744 swap(sp[j], sp[j + 1]);
1745
1746
1747 if (j == first_sack_index)
1748 first_sack_index = j + 1;
1749 }
1750 }
1751 }
1752
1753 state->mss_now = tcp_current_mss(sk);
1754 skb = NULL;
1755 i = 0;
1756
1757 if (!tp->sacked_out) {
1758
1759 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1760 } else {
1761 cache = tp->recv_sack_cache;
1762
1763 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1764 !cache->end_seq)
1765 cache++;
1766 }
1767
1768 while (i < used_sacks) {
1769 u32 start_seq = sp[i].start_seq;
1770 u32 end_seq = sp[i].end_seq;
1771 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1772 struct tcp_sack_block *next_dup = NULL;
1773
1774 if (found_dup_sack && ((i + 1) == first_sack_index))
1775 next_dup = &sp[i + 1];
1776
1777
1778 while (tcp_sack_cache_ok(tp, cache) &&
1779 !before(start_seq, cache->end_seq))
1780 cache++;
1781
1782
1783 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1784 after(end_seq, cache->start_seq)) {
1785
1786
1787 if (before(start_seq, cache->start_seq)) {
1788 skb = tcp_sacktag_skip(skb, sk, start_seq);
1789 skb = tcp_sacktag_walk(skb, sk, next_dup,
1790 state,
1791 start_seq,
1792 cache->start_seq,
1793 dup_sack);
1794 }
1795
1796
1797 if (!after(end_seq, cache->end_seq))
1798 goto advance_sp;
1799
1800 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1801 state,
1802 cache->end_seq);
1803
1804
1805 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1806
1807 skb = tcp_highest_sack(sk);
1808 if (!skb)
1809 break;
1810 cache++;
1811 goto walk;
1812 }
1813
1814 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1815
1816 cache++;
1817 continue;
1818 }
1819
1820 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1821 skb = tcp_highest_sack(sk);
1822 if (!skb)
1823 break;
1824 }
1825 skb = tcp_sacktag_skip(skb, sk, start_seq);
1826
1827 walk:
1828 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1829 start_seq, end_seq, dup_sack);
1830
1831 advance_sp:
1832 i++;
1833 }
1834
1835
1836 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1837 tp->recv_sack_cache[i].start_seq = 0;
1838 tp->recv_sack_cache[i].end_seq = 0;
1839 }
1840 for (j = 0; j < used_sacks; j++)
1841 tp->recv_sack_cache[i++] = sp[j];
1842
1843 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1844 tcp_check_sack_reordering(sk, state->reord, 0);
1845
1846 tcp_verify_left_out(tp);
1847 out:
1848
1849 #if FASTRETRANS_DEBUG > 0
1850 WARN_ON((int)tp->sacked_out < 0);
1851 WARN_ON((int)tp->lost_out < 0);
1852 WARN_ON((int)tp->retrans_out < 0);
1853 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1854 #endif
1855 return state->flag;
1856 }
1857
1858
1859
1860
1861 static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1862 {
1863 u32 holes;
1864
1865 holes = max(tp->lost_out, 1U);
1866 holes = min(holes, tp->packets_out);
1867
1868 if ((tp->sacked_out + holes) > tp->packets_out) {
1869 tp->sacked_out = tp->packets_out - holes;
1870 return true;
1871 }
1872 return false;
1873 }
1874
1875
1876
1877
1878
1879 static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1880 {
1881 struct tcp_sock *tp = tcp_sk(sk);
1882
1883 if (!tcp_limit_reno_sacked(tp))
1884 return;
1885
1886 tp->reordering = min_t(u32, tp->packets_out + addend,
1887 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1888 tp->reord_seen++;
1889 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1890 }
1891
1892
1893
1894 static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
1895 {
1896 if (num_dupack) {
1897 struct tcp_sock *tp = tcp_sk(sk);
1898 u32 prior_sacked = tp->sacked_out;
1899 s32 delivered;
1900
1901 tp->sacked_out += num_dupack;
1902 tcp_check_reno_reordering(sk, 0);
1903 delivered = tp->sacked_out - prior_sacked;
1904 if (delivered > 0)
1905 tp->delivered += delivered;
1906 tcp_verify_left_out(tp);
1907 }
1908 }
1909
1910
1911
1912 static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1913 {
1914 struct tcp_sock *tp = tcp_sk(sk);
1915
1916 if (acked > 0) {
1917
1918 tp->delivered += max_t(int, acked - tp->sacked_out, 1);
1919 if (acked - 1 >= tp->sacked_out)
1920 tp->sacked_out = 0;
1921 else
1922 tp->sacked_out -= acked - 1;
1923 }
1924 tcp_check_reno_reordering(sk, acked);
1925 tcp_verify_left_out(tp);
1926 }
1927
1928 static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1929 {
1930 tp->sacked_out = 0;
1931 }
1932
1933 void tcp_clear_retrans(struct tcp_sock *tp)
1934 {
1935 tp->retrans_out = 0;
1936 tp->lost_out = 0;
1937 tp->undo_marker = 0;
1938 tp->undo_retrans = -1;
1939 tp->sacked_out = 0;
1940 }
1941
1942 static inline void tcp_init_undo(struct tcp_sock *tp)
1943 {
1944 tp->undo_marker = tp->snd_una;
1945
1946 tp->undo_retrans = tp->retrans_out ? : -1;
1947 }
1948
1949 static bool tcp_is_rack(const struct sock *sk)
1950 {
1951 return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
1952 }
1953
1954
1955
1956
1957
1958 static void tcp_timeout_mark_lost(struct sock *sk)
1959 {
1960 struct tcp_sock *tp = tcp_sk(sk);
1961 struct sk_buff *skb, *head;
1962 bool is_reneg;
1963
1964 head = tcp_rtx_queue_head(sk);
1965 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
1966 if (is_reneg) {
1967 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1968 tp->sacked_out = 0;
1969
1970 tp->is_sack_reneg = 1;
1971 } else if (tcp_is_reno(tp)) {
1972 tcp_reset_reno_sack(tp);
1973 }
1974
1975 skb = head;
1976 skb_rbtree_walk_from(skb) {
1977 if (is_reneg)
1978 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1979 else if (tcp_is_rack(sk) && skb != head &&
1980 tcp_rack_skb_timeout(tp, skb, 0) > 0)
1981 continue;
1982 tcp_mark_skb_lost(sk, skb);
1983 }
1984 tcp_verify_left_out(tp);
1985 tcp_clear_all_retrans_hints(tp);
1986 }
1987
1988
1989 void tcp_enter_loss(struct sock *sk)
1990 {
1991 const struct inet_connection_sock *icsk = inet_csk(sk);
1992 struct tcp_sock *tp = tcp_sk(sk);
1993 struct net *net = sock_net(sk);
1994 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1995
1996 tcp_timeout_mark_lost(sk);
1997
1998
1999 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
2000 !after(tp->high_seq, tp->snd_una) ||
2001 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2002 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2003 tp->prior_cwnd = tp->snd_cwnd;
2004 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2005 tcp_ca_event(sk, CA_EVENT_LOSS);
2006 tcp_init_undo(tp);
2007 }
2008 tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
2009 tp->snd_cwnd_cnt = 0;
2010 tp->snd_cwnd_stamp = tcp_jiffies32;
2011
2012
2013
2014
2015 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2016 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
2017 tp->reordering = min_t(unsigned int, tp->reordering,
2018 net->ipv4.sysctl_tcp_reordering);
2019 tcp_set_ca_state(sk, TCP_CA_Loss);
2020 tp->high_seq = tp->snd_nxt;
2021 tcp_ecn_queue_cwr(tp);
2022
2023
2024
2025
2026
2027 tp->frto = net->ipv4.sysctl_tcp_frto &&
2028 (new_recovery || icsk->icsk_retransmits) &&
2029 !inet_csk(sk)->icsk_mtup.probe_size;
2030 }
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042 static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2043 {
2044 if (flag & FLAG_SACK_RENEGING) {
2045 struct tcp_sock *tp = tcp_sk(sk);
2046 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2047 msecs_to_jiffies(10));
2048
2049 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2050 delay, TCP_RTO_MAX);
2051 return true;
2052 }
2053 return false;
2054 }
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2068 {
2069 return tp->sacked_out + 1;
2070 }
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169 static bool tcp_time_to_recover(struct sock *sk, int flag)
2170 {
2171 struct tcp_sock *tp = tcp_sk(sk);
2172
2173
2174 if (tp->lost_out)
2175 return true;
2176
2177
2178 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2179 return true;
2180
2181 return false;
2182 }
2183
2184
2185
2186
2187
2188
2189
2190 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2191 {
2192 struct tcp_sock *tp = tcp_sk(sk);
2193 struct sk_buff *skb;
2194 int cnt, oldcnt, lost;
2195 unsigned int mss;
2196
2197 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2198
2199 WARN_ON(packets > tp->packets_out);
2200 skb = tp->lost_skb_hint;
2201 if (skb) {
2202
2203 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2204 return;
2205 cnt = tp->lost_cnt_hint;
2206 } else {
2207 skb = tcp_rtx_queue_head(sk);
2208 cnt = 0;
2209 }
2210
2211 skb_rbtree_walk_from(skb) {
2212
2213
2214 tp->lost_skb_hint = skb;
2215 tp->lost_cnt_hint = cnt;
2216
2217 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2218 break;
2219
2220 oldcnt = cnt;
2221 if (tcp_is_reno(tp) ||
2222 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2223 cnt += tcp_skb_pcount(skb);
2224
2225 if (cnt > packets) {
2226 if (tcp_is_sack(tp) ||
2227 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2228 (oldcnt >= packets))
2229 break;
2230
2231 mss = tcp_skb_mss(skb);
2232
2233 lost = (packets - oldcnt) * mss;
2234 if (lost < skb->len &&
2235 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2236 lost, mss, GFP_ATOMIC) < 0)
2237 break;
2238 cnt = packets;
2239 }
2240
2241 tcp_skb_mark_lost(tp, skb);
2242
2243 if (mark_head)
2244 break;
2245 }
2246 tcp_verify_left_out(tp);
2247 }
2248
2249
2250
2251 static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2252 {
2253 struct tcp_sock *tp = tcp_sk(sk);
2254
2255 if (tcp_is_sack(tp)) {
2256 int sacked_upto = tp->sacked_out - tp->reordering;
2257 if (sacked_upto >= 0)
2258 tcp_mark_head_lost(sk, sacked_upto, 0);
2259 else if (fast_rexmit)
2260 tcp_mark_head_lost(sk, 1, 1);
2261 }
2262 }
2263
2264 static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2265 {
2266 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2267 before(tp->rx_opt.rcv_tsecr, when);
2268 }
2269
2270
2271
2272
2273 static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2274 const struct sk_buff *skb)
2275 {
2276 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2277 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2278 }
2279
2280
2281
2282
2283 static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2284 {
2285 return tp->retrans_stamp &&
2286 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2287 }
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305 static bool tcp_any_retrans_done(const struct sock *sk)
2306 {
2307 const struct tcp_sock *tp = tcp_sk(sk);
2308 struct sk_buff *skb;
2309
2310 if (tp->retrans_out)
2311 return true;
2312
2313 skb = tcp_rtx_queue_head(sk);
2314 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2315 return true;
2316
2317 return false;
2318 }
2319
2320 static void DBGUNDO(struct sock *sk, const char *msg)
2321 {
2322 #if FASTRETRANS_DEBUG > 1
2323 struct tcp_sock *tp = tcp_sk(sk);
2324 struct inet_sock *inet = inet_sk(sk);
2325
2326 if (sk->sk_family == AF_INET) {
2327 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2328 msg,
2329 &inet->inet_daddr, ntohs(inet->inet_dport),
2330 tp->snd_cwnd, tcp_left_out(tp),
2331 tp->snd_ssthresh, tp->prior_ssthresh,
2332 tp->packets_out);
2333 }
2334 #if IS_ENABLED(CONFIG_IPV6)
2335 else if (sk->sk_family == AF_INET6) {
2336 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2337 msg,
2338 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2339 tp->snd_cwnd, tcp_left_out(tp),
2340 tp->snd_ssthresh, tp->prior_ssthresh,
2341 tp->packets_out);
2342 }
2343 #endif
2344 #endif
2345 }
2346
2347 static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2348 {
2349 struct tcp_sock *tp = tcp_sk(sk);
2350
2351 if (unmark_loss) {
2352 struct sk_buff *skb;
2353
2354 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2355 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2356 }
2357 tp->lost_out = 0;
2358 tcp_clear_all_retrans_hints(tp);
2359 }
2360
2361 if (tp->prior_ssthresh) {
2362 const struct inet_connection_sock *icsk = inet_csk(sk);
2363
2364 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2365
2366 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2367 tp->snd_ssthresh = tp->prior_ssthresh;
2368 tcp_ecn_withdraw_cwr(tp);
2369 }
2370 }
2371 tp->snd_cwnd_stamp = tcp_jiffies32;
2372 tp->undo_marker = 0;
2373 tp->rack.advanced = 1;
2374 }
2375
2376 static inline bool tcp_may_undo(const struct tcp_sock *tp)
2377 {
2378 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2379 }
2380
2381
2382 static bool tcp_try_undo_recovery(struct sock *sk)
2383 {
2384 struct tcp_sock *tp = tcp_sk(sk);
2385
2386 if (tcp_may_undo(tp)) {
2387 int mib_idx;
2388
2389
2390
2391
2392 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2393 tcp_undo_cwnd_reduction(sk, false);
2394 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2395 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2396 else
2397 mib_idx = LINUX_MIB_TCPFULLUNDO;
2398
2399 NET_INC_STATS(sock_net(sk), mib_idx);
2400 } else if (tp->rack.reo_wnd_persist) {
2401 tp->rack.reo_wnd_persist--;
2402 }
2403 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2404
2405
2406
2407 if (!tcp_any_retrans_done(sk))
2408 tp->retrans_stamp = 0;
2409 return true;
2410 }
2411 tcp_set_ca_state(sk, TCP_CA_Open);
2412 tp->is_sack_reneg = 0;
2413 return false;
2414 }
2415
2416
2417 static bool tcp_try_undo_dsack(struct sock *sk)
2418 {
2419 struct tcp_sock *tp = tcp_sk(sk);
2420
2421 if (tp->undo_marker && !tp->undo_retrans) {
2422 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2423 tp->rack.reo_wnd_persist + 1);
2424 DBGUNDO(sk, "D-SACK");
2425 tcp_undo_cwnd_reduction(sk, false);
2426 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2427 return true;
2428 }
2429 return false;
2430 }
2431
2432
2433 static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2434 {
2435 struct tcp_sock *tp = tcp_sk(sk);
2436
2437 if (frto_undo || tcp_may_undo(tp)) {
2438 tcp_undo_cwnd_reduction(sk, true);
2439
2440 DBGUNDO(sk, "partial loss");
2441 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2442 if (frto_undo)
2443 NET_INC_STATS(sock_net(sk),
2444 LINUX_MIB_TCPSPURIOUSRTOS);
2445 inet_csk(sk)->icsk_retransmits = 0;
2446 if (frto_undo || tcp_is_sack(tp)) {
2447 tcp_set_ca_state(sk, TCP_CA_Open);
2448 tp->is_sack_reneg = 0;
2449 }
2450 return true;
2451 }
2452 return false;
2453 }
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464 static void tcp_init_cwnd_reduction(struct sock *sk)
2465 {
2466 struct tcp_sock *tp = tcp_sk(sk);
2467
2468 tp->high_seq = tp->snd_nxt;
2469 tp->tlp_high_seq = 0;
2470 tp->snd_cwnd_cnt = 0;
2471 tp->prior_cwnd = tp->snd_cwnd;
2472 tp->prr_delivered = 0;
2473 tp->prr_out = 0;
2474 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2475 tcp_ecn_queue_cwr(tp);
2476 }
2477
2478 void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
2479 {
2480 struct tcp_sock *tp = tcp_sk(sk);
2481 int sndcnt = 0;
2482 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2483
2484 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2485 return;
2486
2487 tp->prr_delivered += newly_acked_sacked;
2488 if (delta < 0) {
2489 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2490 tp->prior_cwnd - 1;
2491 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2492 } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
2493 FLAG_RETRANS_DATA_ACKED) {
2494 sndcnt = min_t(int, delta,
2495 max_t(int, tp->prr_delivered - tp->prr_out,
2496 newly_acked_sacked) + 1);
2497 } else {
2498 sndcnt = min(delta, newly_acked_sacked);
2499 }
2500
2501 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2502 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2503 }
2504
2505 static inline void tcp_end_cwnd_reduction(struct sock *sk)
2506 {
2507 struct tcp_sock *tp = tcp_sk(sk);
2508
2509 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2510 return;
2511
2512
2513 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2514 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2515 tp->snd_cwnd = tp->snd_ssthresh;
2516 tp->snd_cwnd_stamp = tcp_jiffies32;
2517 }
2518 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2519 }
2520
2521
2522 void tcp_enter_cwr(struct sock *sk)
2523 {
2524 struct tcp_sock *tp = tcp_sk(sk);
2525
2526 tp->prior_ssthresh = 0;
2527 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2528 tp->undo_marker = 0;
2529 tcp_init_cwnd_reduction(sk);
2530 tcp_set_ca_state(sk, TCP_CA_CWR);
2531 }
2532 }
2533 EXPORT_SYMBOL(tcp_enter_cwr);
2534
2535 static void tcp_try_keep_open(struct sock *sk)
2536 {
2537 struct tcp_sock *tp = tcp_sk(sk);
2538 int state = TCP_CA_Open;
2539
2540 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2541 state = TCP_CA_Disorder;
2542
2543 if (inet_csk(sk)->icsk_ca_state != state) {
2544 tcp_set_ca_state(sk, state);
2545 tp->high_seq = tp->snd_nxt;
2546 }
2547 }
2548
2549 static void tcp_try_to_open(struct sock *sk, int flag)
2550 {
2551 struct tcp_sock *tp = tcp_sk(sk);
2552
2553 tcp_verify_left_out(tp);
2554
2555 if (!tcp_any_retrans_done(sk))
2556 tp->retrans_stamp = 0;
2557
2558 if (flag & FLAG_ECE)
2559 tcp_enter_cwr(sk);
2560
2561 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2562 tcp_try_keep_open(sk);
2563 }
2564 }
2565
2566 static void tcp_mtup_probe_failed(struct sock *sk)
2567 {
2568 struct inet_connection_sock *icsk = inet_csk(sk);
2569
2570 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2571 icsk->icsk_mtup.probe_size = 0;
2572 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2573 }
2574
2575 static void tcp_mtup_probe_success(struct sock *sk)
2576 {
2577 struct tcp_sock *tp = tcp_sk(sk);
2578 struct inet_connection_sock *icsk = inet_csk(sk);
2579
2580
2581 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2582 tp->snd_cwnd = tp->snd_cwnd *
2583 tcp_mss_to_mtu(sk, tp->mss_cache) /
2584 icsk->icsk_mtup.probe_size;
2585 tp->snd_cwnd_cnt = 0;
2586 tp->snd_cwnd_stamp = tcp_jiffies32;
2587 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2588
2589 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2590 icsk->icsk_mtup.probe_size = 0;
2591 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2592 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2593 }
2594
2595
2596
2597
2598
2599 void tcp_simple_retransmit(struct sock *sk)
2600 {
2601 const struct inet_connection_sock *icsk = inet_csk(sk);
2602 struct tcp_sock *tp = tcp_sk(sk);
2603 struct sk_buff *skb;
2604 unsigned int mss = tcp_current_mss(sk);
2605
2606 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2607 if (tcp_skb_seglen(skb) > mss &&
2608 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2609 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2610 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2611 tp->retrans_out -= tcp_skb_pcount(skb);
2612 }
2613 tcp_skb_mark_lost_uncond_verify(tp, skb);
2614 }
2615 }
2616
2617 tcp_clear_retrans_hints_partial(tp);
2618
2619 if (!tp->lost_out)
2620 return;
2621
2622 if (tcp_is_reno(tp))
2623 tcp_limit_reno_sacked(tp);
2624
2625 tcp_verify_left_out(tp);
2626
2627
2628
2629
2630
2631
2632 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2633 tp->high_seq = tp->snd_nxt;
2634 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2635 tp->prior_ssthresh = 0;
2636 tp->undo_marker = 0;
2637 tcp_set_ca_state(sk, TCP_CA_Loss);
2638 }
2639 tcp_xmit_retransmit_queue(sk);
2640 }
2641 EXPORT_SYMBOL(tcp_simple_retransmit);
2642
2643 void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2644 {
2645 struct tcp_sock *tp = tcp_sk(sk);
2646 int mib_idx;
2647
2648 if (tcp_is_reno(tp))
2649 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2650 else
2651 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2652
2653 NET_INC_STATS(sock_net(sk), mib_idx);
2654
2655 tp->prior_ssthresh = 0;
2656 tcp_init_undo(tp);
2657
2658 if (!tcp_in_cwnd_reduction(sk)) {
2659 if (!ece_ack)
2660 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2661 tcp_init_cwnd_reduction(sk);
2662 }
2663 tcp_set_ca_state(sk, TCP_CA_Recovery);
2664 }
2665
2666
2667
2668
2669 static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2670 int *rexmit)
2671 {
2672 struct tcp_sock *tp = tcp_sk(sk);
2673 bool recovered = !before(tp->snd_una, tp->high_seq);
2674
2675 if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
2676 tcp_try_undo_loss(sk, false))
2677 return;
2678
2679 if (tp->frto) {
2680
2681
2682
2683 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2684 tcp_try_undo_loss(sk, true))
2685 return;
2686
2687 if (after(tp->snd_nxt, tp->high_seq)) {
2688 if (flag & FLAG_DATA_SACKED || num_dupack)
2689 tp->frto = 0;
2690 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2691 tp->high_seq = tp->snd_nxt;
2692
2693
2694
2695
2696 if (!tcp_write_queue_empty(sk) &&
2697 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2698 *rexmit = REXMIT_NEW;
2699 return;
2700 }
2701 tp->frto = 0;
2702 }
2703 }
2704
2705 if (recovered) {
2706
2707 tcp_try_undo_recovery(sk);
2708 return;
2709 }
2710 if (tcp_is_reno(tp)) {
2711
2712
2713
2714 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2715 tcp_add_reno_sack(sk, num_dupack);
2716 else if (flag & FLAG_SND_UNA_ADVANCED)
2717 tcp_reset_reno_sack(tp);
2718 }
2719 *rexmit = REXMIT_LOST;
2720 }
2721
2722
2723 static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
2724 {
2725 struct tcp_sock *tp = tcp_sk(sk);
2726
2727 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2728
2729
2730
2731 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2732
2733
2734
2735
2736
2737
2738 if (tp->retrans_out)
2739 return true;
2740
2741 if (!tcp_any_retrans_done(sk))
2742 tp->retrans_stamp = 0;
2743
2744 DBGUNDO(sk, "partial recovery");
2745 tcp_undo_cwnd_reduction(sk, true);
2746 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2747 tcp_try_keep_open(sk);
2748 return true;
2749 }
2750 return false;
2751 }
2752
2753 static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2754 {
2755 struct tcp_sock *tp = tcp_sk(sk);
2756
2757 if (tcp_rtx_queue_empty(sk))
2758 return;
2759
2760 if (unlikely(tcp_is_reno(tp))) {
2761 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2762 } else if (tcp_is_rack(sk)) {
2763 u32 prior_retrans = tp->retrans_out;
2764
2765 tcp_rack_mark_lost(sk);
2766 if (prior_retrans > tp->retrans_out)
2767 *ack_flag |= FLAG_LOST_RETRANS;
2768 }
2769 }
2770
2771 static bool tcp_force_fast_retransmit(struct sock *sk)
2772 {
2773 struct tcp_sock *tp = tcp_sk(sk);
2774
2775 return after(tcp_highest_sack_seq(tp),
2776 tp->snd_una + tp->reordering * tp->mss_cache);
2777 }
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791 static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2792 int num_dupack, int *ack_flag, int *rexmit)
2793 {
2794 struct inet_connection_sock *icsk = inet_csk(sk);
2795 struct tcp_sock *tp = tcp_sk(sk);
2796 int fast_rexmit = 0, flag = *ack_flag;
2797 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
2798 tcp_force_fast_retransmit(sk));
2799
2800 if (!tp->packets_out && tp->sacked_out)
2801 tp->sacked_out = 0;
2802
2803
2804
2805 if (flag & FLAG_ECE)
2806 tp->prior_ssthresh = 0;
2807
2808
2809 if (tcp_check_sack_reneging(sk, flag))
2810 return;
2811
2812
2813 tcp_verify_left_out(tp);
2814
2815
2816
2817 if (icsk->icsk_ca_state == TCP_CA_Open) {
2818 WARN_ON(tp->retrans_out != 0);
2819 tp->retrans_stamp = 0;
2820 } else if (!before(tp->snd_una, tp->high_seq)) {
2821 switch (icsk->icsk_ca_state) {
2822 case TCP_CA_CWR:
2823
2824
2825 if (tp->snd_una != tp->high_seq) {
2826 tcp_end_cwnd_reduction(sk);
2827 tcp_set_ca_state(sk, TCP_CA_Open);
2828 }
2829 break;
2830
2831 case TCP_CA_Recovery:
2832 if (tcp_is_reno(tp))
2833 tcp_reset_reno_sack(tp);
2834 if (tcp_try_undo_recovery(sk))
2835 return;
2836 tcp_end_cwnd_reduction(sk);
2837 break;
2838 }
2839 }
2840
2841
2842 switch (icsk->icsk_ca_state) {
2843 case TCP_CA_Recovery:
2844 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2845 if (tcp_is_reno(tp))
2846 tcp_add_reno_sack(sk, num_dupack);
2847 } else {
2848 if (tcp_try_undo_partial(sk, prior_snd_una))
2849 return;
2850
2851 do_lost = tcp_is_reno(tp) ||
2852 tcp_force_fast_retransmit(sk);
2853 }
2854 if (tcp_try_undo_dsack(sk)) {
2855 tcp_try_keep_open(sk);
2856 return;
2857 }
2858 tcp_identify_packet_loss(sk, ack_flag);
2859 break;
2860 case TCP_CA_Loss:
2861 tcp_process_loss(sk, flag, num_dupack, rexmit);
2862 tcp_identify_packet_loss(sk, ack_flag);
2863 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
2864 (*ack_flag & FLAG_LOST_RETRANS)))
2865 return;
2866
2867
2868 default:
2869 if (tcp_is_reno(tp)) {
2870 if (flag & FLAG_SND_UNA_ADVANCED)
2871 tcp_reset_reno_sack(tp);
2872 tcp_add_reno_sack(sk, num_dupack);
2873 }
2874
2875 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2876 tcp_try_undo_dsack(sk);
2877
2878 tcp_identify_packet_loss(sk, ack_flag);
2879 if (!tcp_time_to_recover(sk, flag)) {
2880 tcp_try_to_open(sk, flag);
2881 return;
2882 }
2883
2884
2885 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2886 icsk->icsk_mtup.probe_size &&
2887 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2888 tcp_mtup_probe_failed(sk);
2889
2890 tp->snd_cwnd++;
2891 tcp_simple_retransmit(sk);
2892 return;
2893 }
2894
2895
2896 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2897 fast_rexmit = 1;
2898 }
2899
2900 if (!tcp_is_rack(sk) && do_lost)
2901 tcp_update_scoreboard(sk, fast_rexmit);
2902 *rexmit = REXMIT_LOST;
2903 }
2904
2905 static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2906 {
2907 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2908 struct tcp_sock *tp = tcp_sk(sk);
2909
2910 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2911
2912
2913
2914
2915 return;
2916 }
2917 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2918 rtt_us ? : jiffies_to_usecs(1));
2919 }
2920
2921 static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2922 long seq_rtt_us, long sack_rtt_us,
2923 long ca_rtt_us, struct rate_sample *rs)
2924 {
2925 const struct tcp_sock *tp = tcp_sk(sk);
2926
2927
2928
2929
2930
2931
2932 if (seq_rtt_us < 0)
2933 seq_rtt_us = sack_rtt_us;
2934
2935
2936
2937
2938
2939
2940
2941 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2942 flag & FLAG_ACKED) {
2943 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
2944
2945 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
2946 seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
2947 ca_rtt_us = seq_rtt_us;
2948 }
2949 }
2950 rs->rtt_us = ca_rtt_us;
2951 if (seq_rtt_us < 0)
2952 return false;
2953
2954
2955
2956
2957
2958 tcp_update_rtt_min(sk, ca_rtt_us, flag);
2959 tcp_rtt_estimator(sk, seq_rtt_us);
2960 tcp_set_rto(sk);
2961
2962
2963 inet_csk(sk)->icsk_backoff = 0;
2964 return true;
2965 }
2966
2967
2968 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2969 {
2970 struct rate_sample rs;
2971 long rtt_us = -1L;
2972
2973 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
2974 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
2975
2976 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
2977 }
2978
2979
2980 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2981 {
2982 const struct inet_connection_sock *icsk = inet_csk(sk);
2983
2984 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2985 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
2986 }
2987
2988
2989
2990
2991 void tcp_rearm_rto(struct sock *sk)
2992 {
2993 const struct inet_connection_sock *icsk = inet_csk(sk);
2994 struct tcp_sock *tp = tcp_sk(sk);
2995
2996
2997
2998
2999 if (rcu_access_pointer(tp->fastopen_rsk))
3000 return;
3001
3002 if (!tp->packets_out) {
3003 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3004 } else {
3005 u32 rto = inet_csk(sk)->icsk_rto;
3006
3007 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3008 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3009 s64 delta_us = tcp_rto_delta_us(sk);
3010
3011
3012
3013 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3014 }
3015 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3016 TCP_RTO_MAX, tcp_rtx_queue_head(sk));
3017 }
3018 }
3019
3020
3021 static void tcp_set_xmit_timer(struct sock *sk)
3022 {
3023 if (!tcp_schedule_loss_probe(sk, true))
3024 tcp_rearm_rto(sk);
3025 }
3026
3027
3028 static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3029 {
3030 struct tcp_sock *tp = tcp_sk(sk);
3031 u32 packets_acked;
3032
3033 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3034
3035 packets_acked = tcp_skb_pcount(skb);
3036 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3037 return 0;
3038 packets_acked -= tcp_skb_pcount(skb);
3039
3040 if (packets_acked) {
3041 BUG_ON(tcp_skb_pcount(skb) == 0);
3042 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3043 }
3044
3045 return packets_acked;
3046 }
3047
3048 static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3049 u32 prior_snd_una)
3050 {
3051 const struct skb_shared_info *shinfo;
3052
3053
3054 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3055 return;
3056
3057 shinfo = skb_shinfo(skb);
3058 if (!before(shinfo->tskey, prior_snd_una) &&
3059 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3060 tcp_skb_tsorted_save(skb) {
3061 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3062 } tcp_skb_tsorted_restore(skb);
3063 }
3064 }
3065
3066
3067
3068
3069
3070 static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3071 u32 prior_snd_una,
3072 struct tcp_sacktag_state *sack)
3073 {
3074 const struct inet_connection_sock *icsk = inet_csk(sk);
3075 u64 first_ackt, last_ackt;
3076 struct tcp_sock *tp = tcp_sk(sk);
3077 u32 prior_sacked = tp->sacked_out;
3078 u32 reord = tp->snd_nxt;
3079 struct sk_buff *skb, *next;
3080 bool fully_acked = true;
3081 long sack_rtt_us = -1L;
3082 long seq_rtt_us = -1L;
3083 long ca_rtt_us = -1L;
3084 u32 pkts_acked = 0;
3085 u32 last_in_flight = 0;
3086 bool rtt_update;
3087 int flag = 0;
3088
3089 first_ackt = 0;
3090
3091 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3092 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3093 const u32 start_seq = scb->seq;
3094 u8 sacked = scb->sacked;
3095 u32 acked_pcount;
3096
3097 tcp_ack_tstamp(sk, skb, prior_snd_una);
3098
3099
3100 if (after(scb->end_seq, tp->snd_una)) {
3101 if (tcp_skb_pcount(skb) == 1 ||
3102 !after(tp->snd_una, scb->seq))
3103 break;
3104
3105 acked_pcount = tcp_tso_acked(sk, skb);
3106 if (!acked_pcount)
3107 break;
3108 fully_acked = false;
3109 } else {
3110 acked_pcount = tcp_skb_pcount(skb);
3111 }
3112
3113 if (unlikely(sacked & TCPCB_RETRANS)) {
3114 if (sacked & TCPCB_SACKED_RETRANS)
3115 tp->retrans_out -= acked_pcount;
3116 flag |= FLAG_RETRANS_DATA_ACKED;
3117 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3118 last_ackt = tcp_skb_timestamp_us(skb);
3119 WARN_ON_ONCE(last_ackt == 0);
3120 if (!first_ackt)
3121 first_ackt = last_ackt;
3122
3123 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3124 if (before(start_seq, reord))
3125 reord = start_seq;
3126 if (!after(scb->end_seq, tp->high_seq))
3127 flag |= FLAG_ORIG_SACK_ACKED;
3128 }
3129
3130 if (sacked & TCPCB_SACKED_ACKED) {
3131 tp->sacked_out -= acked_pcount;
3132 } else if (tcp_is_sack(tp)) {
3133 tp->delivered += acked_pcount;
3134 if (!tcp_skb_spurious_retrans(tp, skb))
3135 tcp_rack_advance(tp, sacked, scb->end_seq,
3136 tcp_skb_timestamp_us(skb));
3137 }
3138 if (sacked & TCPCB_LOST)
3139 tp->lost_out -= acked_pcount;
3140
3141 tp->packets_out -= acked_pcount;
3142 pkts_acked += acked_pcount;
3143 tcp_rate_skb_delivered(sk, skb, sack->rate);
3144
3145
3146
3147
3148
3149
3150
3151
3152 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3153 flag |= FLAG_DATA_ACKED;
3154 } else {
3155 flag |= FLAG_SYN_ACKED;
3156 tp->retrans_stamp = 0;
3157 }
3158
3159 if (!fully_acked)
3160 break;
3161
3162 next = skb_rb_next(skb);
3163 if (unlikely(skb == tp->retransmit_skb_hint))
3164 tp->retransmit_skb_hint = NULL;
3165 if (unlikely(skb == tp->lost_skb_hint))
3166 tp->lost_skb_hint = NULL;
3167 tcp_highest_sack_replace(sk, skb, next);
3168 tcp_rtx_queue_unlink_and_free(skb, sk);
3169 }
3170
3171 if (!skb)
3172 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3173
3174 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3175 tp->snd_up = tp->snd_una;
3176
3177 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3178 flag |= FLAG_SACK_RENEGING;
3179
3180 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3181 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3182 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3183
3184 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3185 last_in_flight && !prior_sacked && fully_acked &&
3186 sack->rate->prior_delivered + 1 == tp->delivered &&
3187 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3188
3189
3190
3191
3192 flag |= FLAG_ACK_MAYBE_DELAYED;
3193 }
3194 }
3195 if (sack->first_sackt) {
3196 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3197 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3198 }
3199 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3200 ca_rtt_us, sack->rate);
3201
3202 if (flag & FLAG_ACKED) {
3203 flag |= FLAG_SET_XMIT_TIMER;
3204 if (unlikely(icsk->icsk_mtup.probe_size &&
3205 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3206 tcp_mtup_probe_success(sk);
3207 }
3208
3209 if (tcp_is_reno(tp)) {
3210 tcp_remove_reno_sacks(sk, pkts_acked);
3211
3212
3213
3214
3215
3216
3217
3218 if (flag & FLAG_RETRANS_DATA_ACKED)
3219 flag &= ~FLAG_ORIG_SACK_ACKED;
3220 } else {
3221 int delta;
3222
3223
3224 if (before(reord, prior_fack))
3225 tcp_check_sack_reordering(sk, reord, 0);
3226
3227 delta = prior_sacked - tp->sacked_out;
3228 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3229 }
3230 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3231 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3232 tcp_skb_timestamp_us(skb))) {
3233
3234
3235
3236
3237 flag |= FLAG_SET_XMIT_TIMER;
3238 }
3239
3240 if (icsk->icsk_ca_ops->pkts_acked) {
3241 struct ack_sample sample = { .pkts_acked = pkts_acked,
3242 .rtt_us = sack->rate->rtt_us,
3243 .in_flight = last_in_flight };
3244
3245 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3246 }
3247
3248 #if FASTRETRANS_DEBUG > 0
3249 WARN_ON((int)tp->sacked_out < 0);
3250 WARN_ON((int)tp->lost_out < 0);
3251 WARN_ON((int)tp->retrans_out < 0);
3252 if (!tp->packets_out && tcp_is_sack(tp)) {
3253 icsk = inet_csk(sk);
3254 if (tp->lost_out) {
3255 pr_debug("Leak l=%u %d\n",
3256 tp->lost_out, icsk->icsk_ca_state);
3257 tp->lost_out = 0;
3258 }
3259 if (tp->sacked_out) {
3260 pr_debug("Leak s=%u %d\n",
3261 tp->sacked_out, icsk->icsk_ca_state);
3262 tp->sacked_out = 0;
3263 }
3264 if (tp->retrans_out) {
3265 pr_debug("Leak r=%u %d\n",
3266 tp->retrans_out, icsk->icsk_ca_state);
3267 tp->retrans_out = 0;
3268 }
3269 }
3270 #endif
3271 return flag;
3272 }
3273
3274 static void tcp_ack_probe(struct sock *sk)
3275 {
3276 struct inet_connection_sock *icsk = inet_csk(sk);
3277 struct sk_buff *head = tcp_send_head(sk);
3278 const struct tcp_sock *tp = tcp_sk(sk);
3279
3280
3281 if (!head)
3282 return;
3283 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3284 icsk->icsk_backoff = 0;
3285 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3286
3287
3288
3289 } else {
3290 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3291
3292 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3293 when, TCP_RTO_MAX, NULL);
3294 }
3295 }
3296
3297 static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3298 {
3299 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3300 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3301 }
3302
3303
3304 static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3305 {
3306
3307
3308
3309
3310
3311
3312 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3313 return flag & FLAG_FORWARD_PROGRESS;
3314
3315 return flag & FLAG_DATA_ACKED;
3316 }
3317
3318
3319
3320
3321
3322
3323 static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3324 int flag, const struct rate_sample *rs)
3325 {
3326 const struct inet_connection_sock *icsk = inet_csk(sk);
3327
3328 if (icsk->icsk_ca_ops->cong_control) {
3329 icsk->icsk_ca_ops->cong_control(sk, rs);
3330 return;
3331 }
3332
3333 if (tcp_in_cwnd_reduction(sk)) {
3334
3335 tcp_cwnd_reduction(sk, acked_sacked, flag);
3336 } else if (tcp_may_raise_cwnd(sk, flag)) {
3337
3338 tcp_cong_avoid(sk, ack, acked_sacked);
3339 }
3340 tcp_update_pacing_rate(sk);
3341 }
3342
3343
3344
3345
3346 static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3347 const u32 ack, const u32 ack_seq,
3348 const u32 nwin)
3349 {
3350 return after(ack, tp->snd_una) ||
3351 after(ack_seq, tp->snd_wl1) ||
3352 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3353 }
3354
3355
3356 static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3357 {
3358 u32 delta = ack - tp->snd_una;
3359
3360 sock_owned_by_me((struct sock *)tp);
3361 tp->bytes_acked += delta;
3362 tp->snd_una = ack;
3363 }
3364
3365
3366 static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3367 {
3368 u32 delta = seq - tp->rcv_nxt;
3369
3370 sock_owned_by_me((struct sock *)tp);
3371 tp->bytes_received += delta;
3372 WRITE_ONCE(tp->rcv_nxt, seq);
3373 }
3374
3375
3376
3377
3378
3379
3380 static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3381 u32 ack_seq)
3382 {
3383 struct tcp_sock *tp = tcp_sk(sk);
3384 int flag = 0;
3385 u32 nwin = ntohs(tcp_hdr(skb)->window);
3386
3387 if (likely(!tcp_hdr(skb)->syn))
3388 nwin <<= tp->rx_opt.snd_wscale;
3389
3390 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3391 flag |= FLAG_WIN_UPDATE;
3392 tcp_update_wl(tp, ack_seq);
3393
3394 if (tp->snd_wnd != nwin) {
3395 tp->snd_wnd = nwin;
3396
3397
3398
3399
3400 tp->pred_flags = 0;
3401 tcp_fast_path_check(sk);
3402
3403 if (!tcp_write_queue_empty(sk))
3404 tcp_slow_start_after_idle_check(sk);
3405
3406 if (nwin > tp->max_window) {
3407 tp->max_window = nwin;
3408 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3409 }
3410 }
3411 }
3412
3413 tcp_snd_una_update(tp, ack);
3414
3415 return flag;
3416 }
3417
3418 static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3419 u32 *last_oow_ack_time)
3420 {
3421 if (*last_oow_ack_time) {
3422 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3423
3424 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3425 NET_INC_STATS(net, mib_idx);
3426 return true;
3427 }
3428 }
3429
3430 *last_oow_ack_time = tcp_jiffies32;
3431
3432 return false;
3433 }
3434
3435
3436
3437
3438
3439
3440
3441
3442 bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3443 int mib_idx, u32 *last_oow_ack_time)
3444 {
3445
3446 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3447 !tcp_hdr(skb)->syn)
3448 return false;
3449
3450 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3451 }
3452
3453
3454 static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3455 {
3456
3457 static u32 challenge_timestamp;
3458 static unsigned int challenge_count;
3459 struct tcp_sock *tp = tcp_sk(sk);
3460 struct net *net = sock_net(sk);
3461 u32 count, now;
3462
3463
3464 if (__tcp_oow_rate_limited(net,
3465 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3466 &tp->last_oow_ack_time))
3467 return;
3468
3469
3470 now = jiffies / HZ;
3471 if (now != challenge_timestamp) {
3472 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3473 u32 half = (ack_limit + 1) >> 1;
3474
3475 challenge_timestamp = now;
3476 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3477 }
3478 count = READ_ONCE(challenge_count);
3479 if (count > 0) {
3480 WRITE_ONCE(challenge_count, count - 1);
3481 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3482 tcp_send_ack(sk);
3483 }
3484 }
3485
3486 static void tcp_store_ts_recent(struct tcp_sock *tp)
3487 {
3488 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3489 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3490 }
3491
3492 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3493 {
3494 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3495
3496
3497
3498
3499
3500
3501
3502 if (tcp_paws_check(&tp->rx_opt, 0))
3503 tcp_store_ts_recent(tp);
3504 }
3505 }
3506
3507
3508
3509
3510
3511
3512 static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3513 {
3514 struct tcp_sock *tp = tcp_sk(sk);
3515
3516 if (before(ack, tp->tlp_high_seq))
3517 return;
3518
3519 if (flag & FLAG_DSACKING_ACK) {
3520
3521 tp->tlp_high_seq = 0;
3522 } else if (after(ack, tp->tlp_high_seq)) {
3523
3524
3525
3526 tcp_init_cwnd_reduction(sk);
3527 tcp_set_ca_state(sk, TCP_CA_CWR);
3528 tcp_end_cwnd_reduction(sk);
3529 tcp_try_keep_open(sk);
3530 NET_INC_STATS(sock_net(sk),
3531 LINUX_MIB_TCPLOSSPROBERECOVERY);
3532 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3533 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3534
3535 tp->tlp_high_seq = 0;
3536 }
3537 }
3538
3539 static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3540 {
3541 const struct inet_connection_sock *icsk = inet_csk(sk);
3542
3543 if (icsk->icsk_ca_ops->in_ack_event)
3544 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3545 }
3546
3547
3548
3549
3550
3551 static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3552 {
3553 struct tcp_sock *tp = tcp_sk(sk);
3554
3555 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3556 return;
3557
3558 if (unlikely(rexmit == 2)) {
3559 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3560 TCP_NAGLE_OFF);
3561 if (after(tp->snd_nxt, tp->high_seq))
3562 return;
3563 tp->frto = 0;
3564 }
3565 tcp_xmit_retransmit_queue(sk);
3566 }
3567
3568
3569 static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3570 {
3571 const struct net *net = sock_net(sk);
3572 struct tcp_sock *tp = tcp_sk(sk);
3573 u32 delivered;
3574
3575 delivered = tp->delivered - prior_delivered;
3576 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3577 if (flag & FLAG_ECE) {
3578 tp->delivered_ce += delivered;
3579 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3580 }
3581 return delivered;
3582 }
3583
3584
3585 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3586 {
3587 struct inet_connection_sock *icsk = inet_csk(sk);
3588 struct tcp_sock *tp = tcp_sk(sk);
3589 struct tcp_sacktag_state sack_state;
3590 struct rate_sample rs = { .prior_delivered = 0 };
3591 u32 prior_snd_una = tp->snd_una;
3592 bool is_sack_reneg = tp->is_sack_reneg;
3593 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3594 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3595 int num_dupack = 0;
3596 int prior_packets = tp->packets_out;
3597 u32 delivered = tp->delivered;
3598 u32 lost = tp->lost;
3599 int rexmit = REXMIT_NONE;
3600 u32 prior_fack;
3601
3602 sack_state.first_sackt = 0;
3603 sack_state.rate = &rs;
3604
3605
3606 prefetch(sk->tcp_rtx_queue.rb_node);
3607
3608
3609
3610
3611 if (before(ack, prior_snd_una)) {
3612
3613 if (before(ack, prior_snd_una - tp->max_window)) {
3614 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3615 tcp_send_challenge_ack(sk, skb);
3616 return -1;
3617 }
3618 goto old_ack;
3619 }
3620
3621
3622
3623
3624 if (after(ack, tp->snd_nxt))
3625 return -1;
3626
3627 if (after(ack, prior_snd_una)) {
3628 flag |= FLAG_SND_UNA_ADVANCED;
3629 icsk->icsk_retransmits = 0;
3630
3631 #if IS_ENABLED(CONFIG_TLS_DEVICE)
3632 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3633 if (icsk->icsk_clean_acked)
3634 icsk->icsk_clean_acked(sk, ack);
3635 #endif
3636 }
3637
3638 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3639 rs.prior_in_flight = tcp_packets_in_flight(tp);
3640
3641
3642
3643
3644 if (flag & FLAG_UPDATE_TS_RECENT)
3645 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3646
3647 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3648 FLAG_SND_UNA_ADVANCED) {
3649
3650
3651
3652
3653 tcp_update_wl(tp, ack_seq);
3654 tcp_snd_una_update(tp, ack);
3655 flag |= FLAG_WIN_UPDATE;
3656
3657 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3658
3659 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3660 } else {
3661 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3662
3663 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3664 flag |= FLAG_DATA;
3665 else
3666 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3667
3668 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3669
3670 if (TCP_SKB_CB(skb)->sacked)
3671 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3672 &sack_state);
3673
3674 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3675 flag |= FLAG_ECE;
3676 ack_ev_flags |= CA_ACK_ECE;
3677 }
3678
3679 if (flag & FLAG_WIN_UPDATE)
3680 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3681
3682 tcp_in_ack_event(sk, ack_ev_flags);
3683 }
3684
3685
3686
3687
3688 sk->sk_err_soft = 0;
3689 icsk->icsk_probes_out = 0;
3690 tp->rcv_tstamp = tcp_jiffies32;
3691 if (!prior_packets)
3692 goto no_queue;
3693
3694
3695 flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
3696
3697 tcp_rack_update_reo_wnd(sk, &rs);
3698
3699 if (tp->tlp_high_seq)
3700 tcp_process_tlp_ack(sk, ack, flag);
3701
3702 if (flag & FLAG_SET_XMIT_TIMER)
3703 tcp_set_xmit_timer(sk);
3704
3705 if (tcp_ack_is_dubious(sk, flag)) {
3706 if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
3707 num_dupack = 1;
3708
3709 if (!(flag & FLAG_DATA))
3710 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3711 }
3712 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3713 &rexmit);
3714 }
3715
3716 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3717 sk_dst_confirm(sk);
3718
3719 delivered = tcp_newly_delivered(sk, delivered, flag);
3720 lost = tp->lost - lost;
3721 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3722 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3723 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3724 tcp_xmit_recovery(sk, rexmit);
3725 return 1;
3726
3727 no_queue:
3728
3729 if (flag & FLAG_DSACKING_ACK) {
3730 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3731 &rexmit);
3732 tcp_newly_delivered(sk, delivered, flag);
3733 }
3734
3735
3736
3737
3738 tcp_ack_probe(sk);
3739
3740 if (tp->tlp_high_seq)
3741 tcp_process_tlp_ack(sk, ack, flag);
3742 return 1;
3743
3744 old_ack:
3745
3746
3747
3748 if (TCP_SKB_CB(skb)->sacked) {
3749 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3750 &sack_state);
3751 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3752 &rexmit);
3753 tcp_newly_delivered(sk, delivered, flag);
3754 tcp_xmit_recovery(sk, rexmit);
3755 }
3756
3757 return 0;
3758 }
3759
3760 static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3761 bool syn, struct tcp_fastopen_cookie *foc,
3762 bool exp_opt)
3763 {
3764
3765 if (!foc || !syn || len < 0 || (len & 1))
3766 return;
3767
3768 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3769 len <= TCP_FASTOPEN_COOKIE_MAX)
3770 memcpy(foc->val, cookie, len);
3771 else if (len != 0)
3772 len = -1;
3773 foc->len = len;
3774 foc->exp = exp_opt;
3775 }
3776
3777 static void smc_parse_options(const struct tcphdr *th,
3778 struct tcp_options_received *opt_rx,
3779 const unsigned char *ptr,
3780 int opsize)
3781 {
3782 #if IS_ENABLED(CONFIG_SMC)
3783 if (static_branch_unlikely(&tcp_have_smc)) {
3784 if (th->syn && !(opsize & 1) &&
3785 opsize >= TCPOLEN_EXP_SMC_BASE &&
3786 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3787 opt_rx->smc_ok = 1;
3788 }
3789 #endif
3790 }
3791
3792
3793
3794
3795 static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
3796 {
3797 const unsigned char *ptr = (const unsigned char *)(th + 1);
3798 int length = (th->doff * 4) - sizeof(struct tcphdr);
3799 u16 mss = 0;
3800
3801 while (length > 0) {
3802 int opcode = *ptr++;
3803 int opsize;
3804
3805 switch (opcode) {
3806 case TCPOPT_EOL:
3807 return mss;
3808 case TCPOPT_NOP:
3809 length--;
3810 continue;
3811 default:
3812 if (length < 2)
3813 return mss;
3814 opsize = *ptr++;
3815 if (opsize < 2)
3816 return mss;
3817 if (opsize > length)
3818 return mss;
3819 if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
3820 u16 in_mss = get_unaligned_be16(ptr);
3821
3822 if (in_mss) {
3823 if (user_mss && user_mss < in_mss)
3824 in_mss = user_mss;
3825 mss = in_mss;
3826 }
3827 }
3828 ptr += opsize - 2;
3829 length -= opsize;
3830 }
3831 }
3832 return mss;
3833 }
3834
3835
3836
3837
3838
3839 void tcp_parse_options(const struct net *net,
3840 const struct sk_buff *skb,
3841 struct tcp_options_received *opt_rx, int estab,
3842 struct tcp_fastopen_cookie *foc)
3843 {
3844 const unsigned char *ptr;
3845 const struct tcphdr *th = tcp_hdr(skb);
3846 int length = (th->doff * 4) - sizeof(struct tcphdr);
3847
3848 ptr = (const unsigned char *)(th + 1);
3849 opt_rx->saw_tstamp = 0;
3850
3851 while (length > 0) {
3852 int opcode = *ptr++;
3853 int opsize;
3854
3855 switch (opcode) {
3856 case TCPOPT_EOL:
3857 return;
3858 case TCPOPT_NOP:
3859 length--;
3860 continue;
3861 default:
3862 if (length < 2)
3863 return;
3864 opsize = *ptr++;
3865 if (opsize < 2)
3866 return;
3867 if (opsize > length)
3868 return;
3869 switch (opcode) {
3870 case TCPOPT_MSS:
3871 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3872 u16 in_mss = get_unaligned_be16(ptr);
3873 if (in_mss) {
3874 if (opt_rx->user_mss &&
3875 opt_rx->user_mss < in_mss)
3876 in_mss = opt_rx->user_mss;
3877 opt_rx->mss_clamp = in_mss;
3878 }
3879 }
3880 break;
3881 case TCPOPT_WINDOW:
3882 if (opsize == TCPOLEN_WINDOW && th->syn &&
3883 !estab && net->ipv4.sysctl_tcp_window_scaling) {
3884 __u8 snd_wscale = *(__u8 *)ptr;
3885 opt_rx->wscale_ok = 1;
3886 if (snd_wscale > TCP_MAX_WSCALE) {
3887 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
3888 __func__,
3889 snd_wscale,
3890 TCP_MAX_WSCALE);
3891 snd_wscale = TCP_MAX_WSCALE;
3892 }
3893 opt_rx->snd_wscale = snd_wscale;
3894 }
3895 break;
3896 case TCPOPT_TIMESTAMP:
3897 if ((opsize == TCPOLEN_TIMESTAMP) &&
3898 ((estab && opt_rx->tstamp_ok) ||
3899 (!estab && net->ipv4.sysctl_tcp_timestamps))) {
3900 opt_rx->saw_tstamp = 1;
3901 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3902 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3903 }
3904 break;
3905 case TCPOPT_SACK_PERM:
3906 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3907 !estab && net->ipv4.sysctl_tcp_sack) {
3908 opt_rx->sack_ok = TCP_SACK_SEEN;
3909 tcp_sack_reset(opt_rx);
3910 }
3911 break;
3912
3913 case TCPOPT_SACK:
3914 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3915 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3916 opt_rx->sack_ok) {
3917 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3918 }
3919 break;
3920 #ifdef CONFIG_TCP_MD5SIG
3921 case TCPOPT_MD5SIG:
3922
3923
3924
3925
3926 break;
3927 #endif
3928 case TCPOPT_FASTOPEN:
3929 tcp_parse_fastopen_option(
3930 opsize - TCPOLEN_FASTOPEN_BASE,
3931 ptr, th->syn, foc, false);
3932 break;
3933
3934 case TCPOPT_EXP:
3935
3936
3937
3938 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3939 get_unaligned_be16(ptr) ==
3940 TCPOPT_FASTOPEN_MAGIC)
3941 tcp_parse_fastopen_option(opsize -
3942 TCPOLEN_EXP_FASTOPEN_BASE,
3943 ptr + 2, th->syn, foc, true);
3944 else
3945 smc_parse_options(th, opt_rx, ptr,
3946 opsize);
3947 break;
3948
3949 }
3950 ptr += opsize-2;
3951 length -= opsize;
3952 }
3953 }
3954 }
3955 EXPORT_SYMBOL(tcp_parse_options);
3956
3957 static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3958 {
3959 const __be32 *ptr = (const __be32 *)(th + 1);
3960
3961 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3962 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3963 tp->rx_opt.saw_tstamp = 1;
3964 ++ptr;
3965 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3966 ++ptr;
3967 if (*ptr)
3968 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3969 else
3970 tp->rx_opt.rcv_tsecr = 0;
3971 return true;
3972 }
3973 return false;
3974 }
3975
3976
3977
3978
3979 static bool tcp_fast_parse_options(const struct net *net,
3980 const struct sk_buff *skb,
3981 const struct tcphdr *th, struct tcp_sock *tp)
3982 {
3983
3984
3985
3986 if (th->doff == (sizeof(*th) / 4)) {
3987 tp->rx_opt.saw_tstamp = 0;
3988 return false;
3989 } else if (tp->rx_opt.tstamp_ok &&
3990 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3991 if (tcp_parse_aligned_timestamp(tp, th))
3992 return true;
3993 }
3994
3995 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
3996 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3997 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3998
3999 return true;
4000 }
4001
4002 #ifdef CONFIG_TCP_MD5SIG
4003
4004
4005
4006 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
4007 {
4008 int length = (th->doff << 2) - sizeof(*th);
4009 const u8 *ptr = (const u8 *)(th + 1);
4010
4011
4012 while (length >= TCPOLEN_MD5SIG) {
4013 int opcode = *ptr++;
4014 int opsize;
4015
4016 switch (opcode) {
4017 case TCPOPT_EOL:
4018 return NULL;
4019 case TCPOPT_NOP:
4020 length--;
4021 continue;
4022 default:
4023 opsize = *ptr++;
4024 if (opsize < 2 || opsize > length)
4025 return NULL;
4026 if (opcode == TCPOPT_MD5SIG)
4027 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
4028 }
4029 ptr += opsize - 2;
4030 length -= opsize;
4031 }
4032 return NULL;
4033 }
4034 EXPORT_SYMBOL(tcp_parse_md5sig_option);
4035 #endif
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4061 {
4062 const struct tcp_sock *tp = tcp_sk(sk);
4063 const struct tcphdr *th = tcp_hdr(skb);
4064 u32 seq = TCP_SKB_CB(skb)->seq;
4065 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4066
4067 return (
4068 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4069
4070
4071 ack == tp->snd_una &&
4072
4073
4074 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4075
4076
4077 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4078 }
4079
4080 static inline bool tcp_paws_discard(const struct sock *sk,
4081 const struct sk_buff *skb)
4082 {
4083 const struct tcp_sock *tp = tcp_sk(sk);
4084
4085 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4086 !tcp_disordered_ack(sk, skb);
4087 }
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102 static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4103 {
4104 return !before(end_seq, tp->rcv_wup) &&
4105 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4106 }
4107
4108
4109 void tcp_reset(struct sock *sk)
4110 {
4111 trace_tcp_receive_reset(sk);
4112
4113
4114 switch (sk->sk_state) {
4115 case TCP_SYN_SENT:
4116 sk->sk_err = ECONNREFUSED;
4117 break;
4118 case TCP_CLOSE_WAIT:
4119 sk->sk_err = EPIPE;
4120 break;
4121 case TCP_CLOSE:
4122 return;
4123 default:
4124 sk->sk_err = ECONNRESET;
4125 }
4126
4127 smp_wmb();
4128
4129 tcp_write_queue_purge(sk);
4130 tcp_done(sk);
4131
4132 if (!sock_flag(sk, SOCK_DEAD))
4133 sk->sk_error_report(sk);
4134 }
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150 void tcp_fin(struct sock *sk)
4151 {
4152 struct tcp_sock *tp = tcp_sk(sk);
4153
4154 inet_csk_schedule_ack(sk);
4155
4156 sk->sk_shutdown |= RCV_SHUTDOWN;
4157 sock_set_flag(sk, SOCK_DONE);
4158
4159 switch (sk->sk_state) {
4160 case TCP_SYN_RECV:
4161 case TCP_ESTABLISHED:
4162
4163 tcp_set_state(sk, TCP_CLOSE_WAIT);
4164 inet_csk_enter_pingpong_mode(sk);
4165 break;
4166
4167 case TCP_CLOSE_WAIT:
4168 case TCP_CLOSING:
4169
4170
4171
4172 break;
4173 case TCP_LAST_ACK:
4174
4175 break;
4176
4177 case TCP_FIN_WAIT1:
4178
4179
4180
4181
4182 tcp_send_ack(sk);
4183 tcp_set_state(sk, TCP_CLOSING);
4184 break;
4185 case TCP_FIN_WAIT2:
4186
4187 tcp_send_ack(sk);
4188 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4189 break;
4190 default:
4191
4192
4193
4194 pr_err("%s: Impossible, sk->sk_state=%d\n",
4195 __func__, sk->sk_state);
4196 break;
4197 }
4198
4199
4200
4201
4202 skb_rbtree_purge(&tp->out_of_order_queue);
4203 if (tcp_is_sack(tp))
4204 tcp_sack_reset(&tp->rx_opt);
4205 sk_mem_reclaim(sk);
4206
4207 if (!sock_flag(sk, SOCK_DEAD)) {
4208 sk->sk_state_change(sk);
4209
4210
4211 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4212 sk->sk_state == TCP_CLOSE)
4213 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4214 else
4215 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4216 }
4217 }
4218
4219 static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4220 u32 end_seq)
4221 {
4222 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4223 if (before(seq, sp->start_seq))
4224 sp->start_seq = seq;
4225 if (after(end_seq, sp->end_seq))
4226 sp->end_seq = end_seq;
4227 return true;
4228 }
4229 return false;
4230 }
4231
4232 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4233 {
4234 struct tcp_sock *tp = tcp_sk(sk);
4235
4236 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4237 int mib_idx;
4238
4239 if (before(seq, tp->rcv_nxt))
4240 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4241 else
4242 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4243
4244 NET_INC_STATS(sock_net(sk), mib_idx);
4245
4246 tp->rx_opt.dsack = 1;
4247 tp->duplicate_sack[0].start_seq = seq;
4248 tp->duplicate_sack[0].end_seq = end_seq;
4249 }
4250 }
4251
4252 static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4253 {
4254 struct tcp_sock *tp = tcp_sk(sk);
4255
4256 if (!tp->rx_opt.dsack)
4257 tcp_dsack_set(sk, seq, end_seq);
4258 else
4259 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4260 }
4261
4262 static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
4263 {
4264
4265
4266
4267
4268
4269 if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
4270 sk_rethink_txhash(sk);
4271 }
4272
4273 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4274 {
4275 struct tcp_sock *tp = tcp_sk(sk);
4276
4277 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4278 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4279 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4280 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4281
4282 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4283 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4284
4285 tcp_rcv_spurious_retrans(sk, skb);
4286 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4287 end_seq = tp->rcv_nxt;
4288 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4289 }
4290 }
4291
4292 tcp_send_ack(sk);
4293 }
4294
4295
4296
4297
4298 static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4299 {
4300 int this_sack;
4301 struct tcp_sack_block *sp = &tp->selective_acks[0];
4302 struct tcp_sack_block *swalk = sp + 1;
4303
4304
4305
4306
4307 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4308 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4309 int i;
4310
4311
4312
4313
4314 tp->rx_opt.num_sacks--;
4315 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4316 sp[i] = sp[i + 1];
4317 continue;
4318 }
4319 this_sack++, swalk++;
4320 }
4321 }
4322
4323 static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4324 {
4325 struct tcp_sock *tp = tcp_sk(sk);
4326 struct tcp_sack_block *sp = &tp->selective_acks[0];
4327 int cur_sacks = tp->rx_opt.num_sacks;
4328 int this_sack;
4329
4330 if (!cur_sacks)
4331 goto new_sack;
4332
4333 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4334 if (tcp_sack_extend(sp, seq, end_seq)) {
4335
4336 for (; this_sack > 0; this_sack--, sp--)
4337 swap(*sp, *(sp - 1));
4338 if (cur_sacks > 1)
4339 tcp_sack_maybe_coalesce(tp);
4340 return;
4341 }
4342 }
4343
4344
4345
4346
4347
4348
4349
4350 if (this_sack >= TCP_NUM_SACKS) {
4351 if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
4352 tcp_send_ack(sk);
4353 this_sack--;
4354 tp->rx_opt.num_sacks--;
4355 sp--;
4356 }
4357 for (; this_sack > 0; this_sack--, sp--)
4358 *sp = *(sp - 1);
4359
4360 new_sack:
4361
4362 sp->start_seq = seq;
4363 sp->end_seq = end_seq;
4364 tp->rx_opt.num_sacks++;
4365 }
4366
4367
4368
4369 static void tcp_sack_remove(struct tcp_sock *tp)
4370 {
4371 struct tcp_sack_block *sp = &tp->selective_acks[0];
4372 int num_sacks = tp->rx_opt.num_sacks;
4373 int this_sack;
4374
4375
4376 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4377 tp->rx_opt.num_sacks = 0;
4378 return;
4379 }
4380
4381 for (this_sack = 0; this_sack < num_sacks;) {
4382
4383 if (!before(tp->rcv_nxt, sp->start_seq)) {
4384 int i;
4385
4386
4387 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4388
4389
4390 for (i = this_sack+1; i < num_sacks; i++)
4391 tp->selective_acks[i-1] = tp->selective_acks[i];
4392 num_sacks--;
4393 continue;
4394 }
4395 this_sack++;
4396 sp++;
4397 }
4398 tp->rx_opt.num_sacks = num_sacks;
4399 }
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415 static bool tcp_try_coalesce(struct sock *sk,
4416 struct sk_buff *to,
4417 struct sk_buff *from,
4418 bool *fragstolen)
4419 {
4420 int delta;
4421
4422 *fragstolen = false;
4423
4424
4425 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4426 return false;
4427
4428 #ifdef CONFIG_TLS_DEVICE
4429 if (from->decrypted != to->decrypted)
4430 return false;
4431 #endif
4432
4433 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4434 return false;
4435
4436 atomic_add(delta, &sk->sk_rmem_alloc);
4437 sk_mem_charge(sk, delta);
4438 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4439 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4440 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4441 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4442
4443 if (TCP_SKB_CB(from)->has_rxtstamp) {
4444 TCP_SKB_CB(to)->has_rxtstamp = true;
4445 to->tstamp = from->tstamp;
4446 skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
4447 }
4448
4449 return true;
4450 }
4451
4452 static bool tcp_ooo_try_coalesce(struct sock *sk,
4453 struct sk_buff *to,
4454 struct sk_buff *from,
4455 bool *fragstolen)
4456 {
4457 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4458
4459
4460 if (res) {
4461 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4462 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4463
4464 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4465 }
4466 return res;
4467 }
4468
4469 static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4470 {
4471 sk_drops_add(sk, skb);
4472 __kfree_skb(skb);
4473 }
4474
4475
4476
4477
4478 static void tcp_ofo_queue(struct sock *sk)
4479 {
4480 struct tcp_sock *tp = tcp_sk(sk);
4481 __u32 dsack_high = tp->rcv_nxt;
4482 bool fin, fragstolen, eaten;
4483 struct sk_buff *skb, *tail;
4484 struct rb_node *p;
4485
4486 p = rb_first(&tp->out_of_order_queue);
4487 while (p) {
4488 skb = rb_to_skb(p);
4489 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4490 break;
4491
4492 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4493 __u32 dsack = dsack_high;
4494 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4495 dsack_high = TCP_SKB_CB(skb)->end_seq;
4496 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4497 }
4498 p = rb_next(p);
4499 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4500
4501 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4502 tcp_drop(sk, skb);
4503 continue;
4504 }
4505
4506 tail = skb_peek_tail(&sk->sk_receive_queue);
4507 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4508 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4509 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4510 if (!eaten)
4511 __skb_queue_tail(&sk->sk_receive_queue, skb);
4512 else
4513 kfree_skb_partial(skb, fragstolen);
4514
4515 if (unlikely(fin)) {
4516 tcp_fin(sk);
4517
4518
4519
4520 break;
4521 }
4522 }
4523 }
4524
4525 static bool tcp_prune_ofo_queue(struct sock *sk);
4526 static int tcp_prune_queue(struct sock *sk);
4527
4528 static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4529 unsigned int size)
4530 {
4531 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4532 !sk_rmem_schedule(sk, skb, size)) {
4533
4534 if (tcp_prune_queue(sk) < 0)
4535 return -1;
4536
4537 while (!sk_rmem_schedule(sk, skb, size)) {
4538 if (!tcp_prune_ofo_queue(sk))
4539 return -1;
4540 }
4541 }
4542 return 0;
4543 }
4544
4545 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4546 {
4547 struct tcp_sock *tp = tcp_sk(sk);
4548 struct rb_node **p, *parent;
4549 struct sk_buff *skb1;
4550 u32 seq, end_seq;
4551 bool fragstolen;
4552
4553 tcp_ecn_check_ce(sk, skb);
4554
4555 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4556 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4557 tcp_drop(sk, skb);
4558 return;
4559 }
4560
4561
4562 tp->pred_flags = 0;
4563 inet_csk_schedule_ack(sk);
4564
4565 tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
4566 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4567 seq = TCP_SKB_CB(skb)->seq;
4568 end_seq = TCP_SKB_CB(skb)->end_seq;
4569
4570 p = &tp->out_of_order_queue.rb_node;
4571 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4572
4573 if (tcp_is_sack(tp)) {
4574 tp->rx_opt.num_sacks = 1;
4575 tp->selective_acks[0].start_seq = seq;
4576 tp->selective_acks[0].end_seq = end_seq;
4577 }
4578 rb_link_node(&skb->rbnode, NULL, p);
4579 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4580 tp->ooo_last_skb = skb;
4581 goto end;
4582 }
4583
4584
4585
4586
4587 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4588 skb, &fragstolen)) {
4589 coalesce_done:
4590 tcp_grow_window(sk, skb);
4591 kfree_skb_partial(skb, fragstolen);
4592 skb = NULL;
4593 goto add_sack;
4594 }
4595
4596 if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
4597 parent = &tp->ooo_last_skb->rbnode;
4598 p = &parent->rb_right;
4599 goto insert;
4600 }
4601
4602
4603 parent = NULL;
4604 while (*p) {
4605 parent = *p;
4606 skb1 = rb_to_skb(parent);
4607 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4608 p = &parent->rb_left;
4609 continue;
4610 }
4611 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4612 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4613
4614 NET_INC_STATS(sock_net(sk),
4615 LINUX_MIB_TCPOFOMERGE);
4616 tcp_drop(sk, skb);
4617 skb = NULL;
4618 tcp_dsack_set(sk, seq, end_seq);
4619 goto add_sack;
4620 }
4621 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4622
4623 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4624 } else {
4625
4626
4627
4628 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4629 &tp->out_of_order_queue);
4630 tcp_dsack_extend(sk,
4631 TCP_SKB_CB(skb1)->seq,
4632 TCP_SKB_CB(skb1)->end_seq);
4633 NET_INC_STATS(sock_net(sk),
4634 LINUX_MIB_TCPOFOMERGE);
4635 tcp_drop(sk, skb1);
4636 goto merge_right;
4637 }
4638 } else if (tcp_ooo_try_coalesce(sk, skb1,
4639 skb, &fragstolen)) {
4640 goto coalesce_done;
4641 }
4642 p = &parent->rb_right;
4643 }
4644 insert:
4645
4646 rb_link_node(&skb->rbnode, parent, p);
4647 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4648
4649 merge_right:
4650
4651 while ((skb1 = skb_rb_next(skb)) != NULL) {
4652 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4653 break;
4654 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4655 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4656 end_seq);
4657 break;
4658 }
4659 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4660 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4661 TCP_SKB_CB(skb1)->end_seq);
4662 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4663 tcp_drop(sk, skb1);
4664 }
4665
4666 if (!skb1)
4667 tp->ooo_last_skb = skb;
4668
4669 add_sack:
4670 if (tcp_is_sack(tp))
4671 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4672 end:
4673 if (skb) {
4674 tcp_grow_window(sk, skb);
4675 skb_condense(skb);
4676 skb_set_owner_r(skb, sk);
4677 }
4678 }
4679
4680 static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
4681 bool *fragstolen)
4682 {
4683 int eaten;
4684 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4685
4686 eaten = (tail &&
4687 tcp_try_coalesce(sk, tail,
4688 skb, fragstolen)) ? 1 : 0;
4689 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4690 if (!eaten) {
4691 __skb_queue_tail(&sk->sk_receive_queue, skb);
4692 skb_set_owner_r(skb, sk);
4693 }
4694 return eaten;
4695 }
4696
4697 int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4698 {
4699 struct sk_buff *skb;
4700 int err = -ENOMEM;
4701 int data_len = 0;
4702 bool fragstolen;
4703
4704 if (size == 0)
4705 return 0;
4706
4707 if (size > PAGE_SIZE) {
4708 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4709
4710 data_len = npages << PAGE_SHIFT;
4711 size = data_len + (size & ~PAGE_MASK);
4712 }
4713 skb = alloc_skb_with_frags(size - data_len, data_len,
4714 PAGE_ALLOC_COSTLY_ORDER,
4715 &err, sk->sk_allocation);
4716 if (!skb)
4717 goto err;
4718
4719 skb_put(skb, size - data_len);
4720 skb->data_len = data_len;
4721 skb->len = size;
4722
4723 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4724 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4725 goto err_free;
4726 }
4727
4728 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4729 if (err)
4730 goto err_free;
4731
4732 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4733 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4734 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4735
4736 if (tcp_queue_rcv(sk, skb, &fragstolen)) {
4737 WARN_ON_ONCE(fragstolen);
4738 __kfree_skb(skb);
4739 }
4740 return size;
4741
4742 err_free:
4743 kfree_skb(skb);
4744 err:
4745 return err;
4746
4747 }
4748
4749 void tcp_data_ready(struct sock *sk)
4750 {
4751 const struct tcp_sock *tp = tcp_sk(sk);
4752 int avail = tp->rcv_nxt - tp->copied_seq;
4753
4754 if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
4755 !sock_flag(sk, SOCK_DONE))
4756 return;
4757
4758 sk->sk_data_ready(sk);
4759 }
4760
4761 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4762 {
4763 struct tcp_sock *tp = tcp_sk(sk);
4764 bool fragstolen;
4765 int eaten;
4766
4767 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4768 __kfree_skb(skb);
4769 return;
4770 }
4771 skb_dst_drop(skb);
4772 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4773
4774 tcp_ecn_accept_cwr(sk, skb);
4775
4776 tp->rx_opt.dsack = 0;
4777
4778
4779
4780
4781
4782 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4783 if (tcp_receive_window(tp) == 0) {
4784 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4785 goto out_of_window;
4786 }
4787
4788
4789 queue_and_out:
4790 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4791 sk_forced_mem_schedule(sk, skb->truesize);
4792 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4793 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4794 goto drop;
4795 }
4796
4797 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
4798 if (skb->len)
4799 tcp_event_data_recv(sk, skb);
4800 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4801 tcp_fin(sk);
4802
4803 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4804 tcp_ofo_queue(sk);
4805
4806
4807
4808
4809 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4810 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
4811 }
4812
4813 if (tp->rx_opt.num_sacks)
4814 tcp_sack_remove(tp);
4815
4816 tcp_fast_path_check(sk);
4817
4818 if (eaten > 0)
4819 kfree_skb_partial(skb, fragstolen);
4820 if (!sock_flag(sk, SOCK_DEAD))
4821 tcp_data_ready(sk);
4822 return;
4823 }
4824
4825 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4826 tcp_rcv_spurious_retrans(sk, skb);
4827
4828 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4829 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4830
4831 out_of_window:
4832 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4833 inet_csk_schedule_ack(sk);
4834 drop:
4835 tcp_drop(sk, skb);
4836 return;
4837 }
4838
4839
4840 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4841 goto out_of_window;
4842
4843 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4844
4845 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4846
4847
4848
4849
4850 if (!tcp_receive_window(tp)) {
4851 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4852 goto out_of_window;
4853 }
4854 goto queue_and_out;
4855 }
4856
4857 tcp_data_queue_ofo(sk, skb);
4858 }
4859
4860 static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
4861 {
4862 if (list)
4863 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4864
4865 return skb_rb_next(skb);
4866 }
4867
4868 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4869 struct sk_buff_head *list,
4870 struct rb_root *root)
4871 {
4872 struct sk_buff *next = tcp_skb_next(skb, list);
4873
4874 if (list)
4875 __skb_unlink(skb, list);
4876 else
4877 rb_erase(&skb->rbnode, root);
4878
4879 __kfree_skb(skb);
4880 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4881
4882 return next;
4883 }
4884
4885
4886 void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4887 {
4888 struct rb_node **p = &root->rb_node;
4889 struct rb_node *parent = NULL;
4890 struct sk_buff *skb1;
4891
4892 while (*p) {
4893 parent = *p;
4894 skb1 = rb_to_skb(parent);
4895 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4896 p = &parent->rb_left;
4897 else
4898 p = &parent->rb_right;
4899 }
4900 rb_link_node(&skb->rbnode, parent, p);
4901 rb_insert_color(&skb->rbnode, root);
4902 }
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912 static void
4913 tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
4914 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
4915 {
4916 struct sk_buff *skb = head, *n;
4917 struct sk_buff_head tmp;
4918 bool end_of_skbs;
4919
4920
4921
4922
4923 restart:
4924 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
4925 n = tcp_skb_next(skb, list);
4926
4927
4928 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4929 skb = tcp_collapse_one(sk, skb, list, root);
4930 if (!skb)
4931 break;
4932 goto restart;
4933 }
4934
4935
4936
4937
4938
4939
4940 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4941 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
4942 before(TCP_SKB_CB(skb)->seq, start))) {
4943 end_of_skbs = false;
4944 break;
4945 }
4946
4947 if (n && n != tail &&
4948 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
4949 end_of_skbs = false;
4950 break;
4951 }
4952
4953
4954 start = TCP_SKB_CB(skb)->end_seq;
4955 }
4956 if (end_of_skbs ||
4957 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4958 return;
4959
4960 __skb_queue_head_init(&tmp);
4961
4962 while (before(start, end)) {
4963 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4964 struct sk_buff *nskb;
4965
4966 nskb = alloc_skb(copy, GFP_ATOMIC);
4967 if (!nskb)
4968 break;
4969
4970 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4971 #ifdef CONFIG_TLS_DEVICE
4972 nskb->decrypted = skb->decrypted;
4973 #endif
4974 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4975 if (list)
4976 __skb_queue_before(list, skb, nskb);
4977 else
4978 __skb_queue_tail(&tmp, nskb);
4979 skb_set_owner_r(nskb, sk);
4980
4981
4982 while (copy > 0) {
4983 int offset = start - TCP_SKB_CB(skb)->seq;
4984 int size = TCP_SKB_CB(skb)->end_seq - start;
4985
4986 BUG_ON(offset < 0);
4987 if (size > 0) {
4988 size = min(copy, size);
4989 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4990 BUG();
4991 TCP_SKB_CB(nskb)->end_seq += size;
4992 copy -= size;
4993 start += size;
4994 }
4995 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4996 skb = tcp_collapse_one(sk, skb, list, root);
4997 if (!skb ||
4998 skb == tail ||
4999 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5000 goto end;
5001 #ifdef CONFIG_TLS_DEVICE
5002 if (skb->decrypted != nskb->decrypted)
5003 goto end;
5004 #endif
5005 }
5006 }
5007 }
5008 end:
5009 skb_queue_walk_safe(&tmp, skb, n)
5010 tcp_rbtree_insert(root, skb);
5011 }
5012
5013
5014
5015
5016 static void tcp_collapse_ofo_queue(struct sock *sk)
5017 {
5018 struct tcp_sock *tp = tcp_sk(sk);
5019 u32 range_truesize, sum_tiny = 0;
5020 struct sk_buff *skb, *head;
5021 u32 start, end;
5022
5023 skb = skb_rb_first(&tp->out_of_order_queue);
5024 new_range:
5025 if (!skb) {
5026 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
5027 return;
5028 }
5029 start = TCP_SKB_CB(skb)->seq;
5030 end = TCP_SKB_CB(skb)->end_seq;
5031 range_truesize = skb->truesize;
5032
5033 for (head = skb;;) {
5034 skb = skb_rb_next(skb);
5035
5036
5037
5038
5039 if (!skb ||
5040 after(TCP_SKB_CB(skb)->seq, end) ||
5041 before(TCP_SKB_CB(skb)->end_seq, start)) {
5042
5043 if (range_truesize != head->truesize ||
5044 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
5045 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
5046 head, skb, start, end);
5047 } else {
5048 sum_tiny += range_truesize;
5049 if (sum_tiny > sk->sk_rcvbuf >> 3)
5050 return;
5051 }
5052 goto new_range;
5053 }
5054
5055 range_truesize += skb->truesize;
5056 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
5057 start = TCP_SKB_CB(skb)->seq;
5058 if (after(TCP_SKB_CB(skb)->end_seq, end))
5059 end = TCP_SKB_CB(skb)->end_seq;
5060 }
5061 }
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074 static bool tcp_prune_ofo_queue(struct sock *sk)
5075 {
5076 struct tcp_sock *tp = tcp_sk(sk);
5077 struct rb_node *node, *prev;
5078 int goal;
5079
5080 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5081 return false;
5082
5083 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
5084 goal = sk->sk_rcvbuf >> 3;
5085 node = &tp->ooo_last_skb->rbnode;
5086 do {
5087 prev = rb_prev(node);
5088 rb_erase(node, &tp->out_of_order_queue);
5089 goal -= rb_to_skb(node)->truesize;
5090 tcp_drop(sk, rb_to_skb(node));
5091 if (!prev || goal <= 0) {
5092 sk_mem_reclaim(sk);
5093 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5094 !tcp_under_memory_pressure(sk))
5095 break;
5096 goal = sk->sk_rcvbuf >> 3;
5097 }
5098 node = prev;
5099 } while (node);
5100 tp->ooo_last_skb = rb_to_skb(prev);
5101
5102
5103
5104
5105
5106
5107 if (tp->rx_opt.sack_ok)
5108 tcp_sack_reset(&tp->rx_opt);
5109 return true;
5110 }
5111
5112
5113
5114
5115
5116
5117
5118
5119 static int tcp_prune_queue(struct sock *sk)
5120 {
5121 struct tcp_sock *tp = tcp_sk(sk);
5122
5123 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
5124
5125 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5126 tcp_clamp_window(sk);
5127 else if (tcp_under_memory_pressure(sk))
5128 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
5129
5130 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5131 return 0;
5132
5133 tcp_collapse_ofo_queue(sk);
5134 if (!skb_queue_empty(&sk->sk_receive_queue))
5135 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
5136 skb_peek(&sk->sk_receive_queue),
5137 NULL,
5138 tp->copied_seq, tp->rcv_nxt);
5139 sk_mem_reclaim(sk);
5140
5141 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5142 return 0;
5143
5144
5145
5146
5147 tcp_prune_ofo_queue(sk);
5148
5149 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5150 return 0;
5151
5152
5153
5154
5155
5156 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
5157
5158
5159 tp->pred_flags = 0;
5160 return -1;
5161 }
5162
5163 static bool tcp_should_expand_sndbuf(const struct sock *sk)
5164 {
5165 const struct tcp_sock *tp = tcp_sk(sk);
5166
5167
5168
5169
5170 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
5171 return false;
5172
5173
5174 if (tcp_under_memory_pressure(sk))
5175 return false;
5176
5177
5178 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
5179 return false;
5180
5181
5182 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
5183 return false;
5184
5185 return true;
5186 }
5187
5188
5189
5190
5191
5192
5193
5194 static void tcp_new_space(struct sock *sk)
5195 {
5196 struct tcp_sock *tp = tcp_sk(sk);
5197
5198 if (tcp_should_expand_sndbuf(sk)) {
5199 tcp_sndbuf_expand(sk);
5200 tp->snd_cwnd_stamp = tcp_jiffies32;
5201 }
5202
5203 sk->sk_write_space(sk);
5204 }
5205
5206 static void tcp_check_space(struct sock *sk)
5207 {
5208 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
5209 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
5210
5211 smp_mb();
5212 if (sk->sk_socket &&
5213 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5214 tcp_new_space(sk);
5215 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5216 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5217 }
5218 }
5219 }
5220
5221 static inline void tcp_data_snd_check(struct sock *sk)
5222 {
5223 tcp_push_pending_frames(sk);
5224 tcp_check_space(sk);
5225 }
5226
5227
5228
5229
5230 static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5231 {
5232 struct tcp_sock *tp = tcp_sk(sk);
5233 unsigned long rtt, delay;
5234
5235
5236 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5237
5238
5239
5240
5241
5242 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5243 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5244
5245 tcp_in_quickack_mode(sk) ||
5246
5247 inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
5248 send_now:
5249 tcp_send_ack(sk);
5250 return;
5251 }
5252
5253 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5254 tcp_send_delayed_ack(sk);
5255 return;
5256 }
5257
5258 if (!tcp_is_sack(tp) ||
5259 tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
5260 goto send_now;
5261
5262 if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
5263 tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
5264 if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
5265 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
5266 tp->compressed_ack - TCP_FASTRETRANS_THRESH);
5267 tp->compressed_ack = 0;
5268 }
5269
5270 if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
5271 goto send_now;
5272
5273 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5274 return;
5275
5276
5277
5278 rtt = tp->rcv_rtt_est.rtt_us;
5279 if (tp->srtt_us && tp->srtt_us < rtt)
5280 rtt = tp->srtt_us;
5281
5282 delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
5283 rtt * (NSEC_PER_USEC >> 3)/20);
5284 sock_hold(sk);
5285 hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
5286 HRTIMER_MODE_REL_PINNED_SOFT);
5287 }
5288
5289 static inline void tcp_ack_snd_check(struct sock *sk)
5290 {
5291 if (!inet_csk_ack_scheduled(sk)) {
5292
5293 return;
5294 }
5295 __tcp_ack_snd_check(sk, 1);
5296 }
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308 static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5309 {
5310 struct tcp_sock *tp = tcp_sk(sk);
5311 u32 ptr = ntohs(th->urg_ptr);
5312
5313 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5314 ptr--;
5315 ptr += ntohl(th->seq);
5316
5317
5318 if (after(tp->copied_seq, ptr))
5319 return;
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331 if (before(ptr, tp->rcv_nxt))
5332 return;
5333
5334
5335 if (tp->urg_data && !after(ptr, tp->urg_seq))
5336 return;
5337
5338
5339 sk_send_sigurg(sk);
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5357 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5358 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5359 tp->copied_seq++;
5360 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5361 __skb_unlink(skb, &sk->sk_receive_queue);
5362 __kfree_skb(skb);
5363 }
5364 }
5365
5366 tp->urg_data = TCP_URG_NOTYET;
5367 WRITE_ONCE(tp->urg_seq, ptr);
5368
5369
5370 tp->pred_flags = 0;
5371 }
5372
5373
5374 static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5375 {
5376 struct tcp_sock *tp = tcp_sk(sk);
5377
5378
5379 if (th->urg)
5380 tcp_check_urg(sk, th);
5381
5382
5383 if (tp->urg_data == TCP_URG_NOTYET) {
5384 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5385 th->syn;
5386
5387
5388 if (ptr < skb->len) {
5389 u8 tmp;
5390 if (skb_copy_bits(skb, ptr, &tmp, 1))
5391 BUG();
5392 tp->urg_data = TCP_URG_VALID | tmp;
5393 if (!sock_flag(sk, SOCK_DEAD))
5394 sk->sk_data_ready(sk);
5395 }
5396 }
5397 }
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407 static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5408 {
5409 struct tcp_sock *tp = tcp_sk(sk);
5410
5411 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5412 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5413 TCPF_CLOSING));
5414 }
5415
5416
5417
5418
5419 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5420 const struct tcphdr *th, int syn_inerr)
5421 {
5422 struct tcp_sock *tp = tcp_sk(sk);
5423 bool rst_seq_match = false;
5424
5425
5426 if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
5427 tp->rx_opt.saw_tstamp &&
5428 tcp_paws_discard(sk, skb)) {
5429 if (!th->rst) {
5430 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5431 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5432 LINUX_MIB_TCPACKSKIPPEDPAWS,
5433 &tp->last_oow_ack_time))
5434 tcp_send_dupack(sk, skb);
5435 goto discard;
5436 }
5437
5438 }
5439
5440
5441 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5442
5443
5444
5445
5446
5447
5448 if (!th->rst) {
5449 if (th->syn)
5450 goto syn_challenge;
5451 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5452 LINUX_MIB_TCPACKSKIPPEDSEQ,
5453 &tp->last_oow_ack_time))
5454 tcp_send_dupack(sk, skb);
5455 } else if (tcp_reset_check(sk, skb)) {
5456 tcp_reset(sk);
5457 }
5458 goto discard;
5459 }
5460
5461
5462 if (th->rst) {
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5473 tcp_reset_check(sk, skb)) {
5474 rst_seq_match = true;
5475 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5476 struct tcp_sack_block *sp = &tp->selective_acks[0];
5477 int max_sack = sp[0].end_seq;
5478 int this_sack;
5479
5480 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5481 ++this_sack) {
5482 max_sack = after(sp[this_sack].end_seq,
5483 max_sack) ?
5484 sp[this_sack].end_seq : max_sack;
5485 }
5486
5487 if (TCP_SKB_CB(skb)->seq == max_sack)
5488 rst_seq_match = true;
5489 }
5490
5491 if (rst_seq_match)
5492 tcp_reset(sk);
5493 else {
5494
5495
5496
5497
5498 if (tp->syn_fastopen && !tp->data_segs_in &&
5499 sk->sk_state == TCP_ESTABLISHED)
5500 tcp_fastopen_active_disable(sk);
5501 tcp_send_challenge_ack(sk, skb);
5502 }
5503 goto discard;
5504 }
5505
5506
5507
5508
5509
5510
5511 if (th->syn) {
5512 syn_challenge:
5513 if (syn_inerr)
5514 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5515 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5516 tcp_send_challenge_ack(sk, skb);
5517 goto discard;
5518 }
5519
5520 return true;
5521
5522 discard:
5523 tcp_drop(sk, skb);
5524 return false;
5525 }
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5551 {
5552 const struct tcphdr *th = (const struct tcphdr *)skb->data;
5553 struct tcp_sock *tp = tcp_sk(sk);
5554 unsigned int len = skb->len;
5555
5556
5557 trace_tcp_probe(sk, skb);
5558
5559 tcp_mstamp_refresh(tp);
5560 if (unlikely(!sk->sk_rx_dst))
5561 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577 tp->rx_opt.saw_tstamp = 0;
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5589 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5590 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5591 int tcp_header_len = tp->tcp_header_len;
5592
5593
5594
5595
5596
5597
5598
5599 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5600
5601 if (!tcp_parse_aligned_timestamp(tp, th))
5602 goto slow_path;
5603
5604
5605 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5606 goto slow_path;
5607
5608
5609
5610
5611
5612
5613 }
5614
5615 if (len <= tcp_header_len) {
5616
5617 if (len == tcp_header_len) {
5618
5619
5620
5621
5622 if (tcp_header_len ==
5623 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5624 tp->rcv_nxt == tp->rcv_wup)
5625 tcp_store_ts_recent(tp);
5626
5627
5628
5629
5630 tcp_ack(sk, skb, 0);
5631 __kfree_skb(skb);
5632 tcp_data_snd_check(sk);
5633
5634
5635
5636
5637 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5638 return;
5639 } else {
5640 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5641 goto discard;
5642 }
5643 } else {
5644 int eaten = 0;
5645 bool fragstolen = false;
5646
5647 if (tcp_checksum_complete(skb))
5648 goto csum_error;
5649
5650 if ((int)skb->truesize > sk->sk_forward_alloc)
5651 goto step5;
5652
5653
5654
5655
5656
5657 if (tcp_header_len ==
5658 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5659 tp->rcv_nxt == tp->rcv_wup)
5660 tcp_store_ts_recent(tp);
5661
5662 tcp_rcv_rtt_measure_ts(sk, skb);
5663
5664 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5665
5666
5667 __skb_pull(skb, tcp_header_len);
5668 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
5669
5670 tcp_event_data_recv(sk, skb);
5671
5672 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5673
5674 tcp_ack(sk, skb, FLAG_DATA);
5675 tcp_data_snd_check(sk);
5676 if (!inet_csk_ack_scheduled(sk))
5677 goto no_ack;
5678 }
5679
5680 __tcp_ack_snd_check(sk, 0);
5681 no_ack:
5682 if (eaten)
5683 kfree_skb_partial(skb, fragstolen);
5684 tcp_data_ready(sk);
5685 return;
5686 }
5687 }
5688
5689 slow_path:
5690 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5691 goto csum_error;
5692
5693 if (!th->ack && !th->rst && !th->syn)
5694 goto discard;
5695
5696
5697
5698
5699
5700 if (!tcp_validate_incoming(sk, skb, th, 1))
5701 return;
5702
5703 step5:
5704 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5705 goto discard;
5706
5707 tcp_rcv_rtt_measure_ts(sk, skb);
5708
5709
5710 tcp_urg(sk, skb, th);
5711
5712
5713 tcp_data_queue(sk, skb);
5714
5715 tcp_data_snd_check(sk);
5716 tcp_ack_snd_check(sk);
5717 return;
5718
5719 csum_error:
5720 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5721 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5722
5723 discard:
5724 tcp_drop(sk, skb);
5725 }
5726 EXPORT_SYMBOL(tcp_rcv_established);
5727
5728 void tcp_init_transfer(struct sock *sk, int bpf_op)
5729 {
5730 struct inet_connection_sock *icsk = inet_csk(sk);
5731 struct tcp_sock *tp = tcp_sk(sk);
5732
5733 tcp_mtup_init(sk);
5734 icsk->icsk_af_ops->rebuild_header(sk);
5735 tcp_init_metrics(sk);
5736
5737
5738
5739
5740
5741
5742
5743 if (tp->total_retrans > 1 && tp->undo_marker)
5744 tp->snd_cwnd = 1;
5745 else
5746 tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
5747 tp->snd_cwnd_stamp = tcp_jiffies32;
5748
5749 tcp_call_bpf(sk, bpf_op, 0, NULL);
5750 tcp_init_congestion_control(sk);
5751 tcp_init_buffer_space(sk);
5752 }
5753
5754 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5755 {
5756 struct tcp_sock *tp = tcp_sk(sk);
5757 struct inet_connection_sock *icsk = inet_csk(sk);
5758
5759 tcp_set_state(sk, TCP_ESTABLISHED);
5760 icsk->icsk_ack.lrcvtime = tcp_jiffies32;
5761
5762 if (skb) {
5763 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5764 security_inet_conn_established(sk, skb);
5765 sk_mark_napi_id(sk, skb);
5766 }
5767
5768 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5769
5770
5771
5772
5773 tp->lsndtime = tcp_jiffies32;
5774
5775 if (sock_flag(sk, SOCK_KEEPOPEN))
5776 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5777
5778 if (!tp->rx_opt.snd_wscale)
5779 __tcp_fast_path_on(tp, tp->snd_wnd);
5780 else
5781 tp->pred_flags = 0;
5782 }
5783
5784 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5785 struct tcp_fastopen_cookie *cookie)
5786 {
5787 struct tcp_sock *tp = tcp_sk(sk);
5788 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5789 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5790 bool syn_drop = false;
5791
5792 if (mss == tp->rx_opt.user_mss) {
5793 struct tcp_options_received opt;
5794
5795
5796 tcp_clear_options(&opt);
5797 opt.user_mss = opt.mss_clamp = 0;
5798 tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
5799 mss = opt.mss_clamp;
5800 }
5801
5802 if (!tp->syn_fastopen) {
5803
5804 cookie->len = -1;
5805 } else if (tp->total_retrans) {
5806
5807
5808
5809
5810
5811 syn_drop = (cookie->len < 0 && data);
5812 } else if (cookie->len < 0 && !tp->syn_data) {
5813
5814
5815
5816
5817 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5818 }
5819
5820 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5821
5822 if (data) {
5823 skb_rbtree_walk_from(data) {
5824 if (__tcp_retransmit_skb(sk, data, 1))
5825 break;
5826 }
5827 tcp_rearm_rto(sk);
5828 NET_INC_STATS(sock_net(sk),
5829 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5830 return true;
5831 }
5832 tp->syn_data_acked = tp->syn_data;
5833 if (tp->syn_data_acked) {
5834 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5835
5836 if (tp->delivered > 1)
5837 --tp->delivered;
5838 }
5839
5840 tcp_fastopen_add_skb(sk, synack);
5841
5842 return false;
5843 }
5844
5845 static void smc_check_reset_syn(struct tcp_sock *tp)
5846 {
5847 #if IS_ENABLED(CONFIG_SMC)
5848 if (static_branch_unlikely(&tcp_have_smc)) {
5849 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5850 tp->syn_smc = 0;
5851 }
5852 #endif
5853 }
5854
5855 static void tcp_try_undo_spurious_syn(struct sock *sk)
5856 {
5857 struct tcp_sock *tp = tcp_sk(sk);
5858 u32 syn_stamp;
5859
5860
5861
5862
5863
5864 syn_stamp = tp->retrans_stamp;
5865 if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
5866 syn_stamp == tp->rx_opt.rcv_tsecr)
5867 tp->undo_marker = 0;
5868 }
5869
5870 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5871 const struct tcphdr *th)
5872 {
5873 struct inet_connection_sock *icsk = inet_csk(sk);
5874 struct tcp_sock *tp = tcp_sk(sk);
5875 struct tcp_fastopen_cookie foc = { .len = -1 };
5876 int saved_clamp = tp->rx_opt.mss_clamp;
5877 bool fastopen_fail;
5878
5879 tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
5880 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5881 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5882
5883 if (th->ack) {
5884
5885
5886
5887
5888
5889
5890
5891
5892 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5893 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5894 goto reset_and_undo;
5895
5896 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5897 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5898 tcp_time_stamp(tp))) {
5899 NET_INC_STATS(sock_net(sk),
5900 LINUX_MIB_PAWSACTIVEREJECTED);
5901 goto reset_and_undo;
5902 }
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912 if (th->rst) {
5913 tcp_reset(sk);
5914 goto discard;
5915 }
5916
5917
5918
5919
5920
5921
5922
5923
5924 if (!th->syn)
5925 goto discard_and_undo;
5926
5927
5928
5929
5930
5931
5932
5933
5934 tcp_ecn_rcv_synack(tp, th);
5935
5936 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5937 tcp_try_undo_spurious_syn(sk);
5938 tcp_ack(sk, skb, FLAG_SLOWPATH);
5939
5940
5941
5942
5943 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
5944 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5945
5946
5947
5948
5949 tp->snd_wnd = ntohs(th->window);
5950
5951 if (!tp->rx_opt.wscale_ok) {
5952 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5953 tp->window_clamp = min(tp->window_clamp, 65535U);
5954 }
5955
5956 if (tp->rx_opt.saw_tstamp) {
5957 tp->rx_opt.tstamp_ok = 1;
5958 tp->tcp_header_len =
5959 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5960 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5961 tcp_store_ts_recent(tp);
5962 } else {
5963 tp->tcp_header_len = sizeof(struct tcphdr);
5964 }
5965
5966 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5967 tcp_initialize_rcv_mss(sk);
5968
5969
5970
5971
5972 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
5973
5974 smc_check_reset_syn(tp);
5975
5976 smp_mb();
5977
5978 tcp_finish_connect(sk, skb);
5979
5980 fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
5981 tcp_rcv_fastopen_synack(sk, skb, &foc);
5982
5983 if (!sock_flag(sk, SOCK_DEAD)) {
5984 sk->sk_state_change(sk);
5985 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5986 }
5987 if (fastopen_fail)
5988 return -1;
5989 if (sk->sk_write_pending ||
5990 icsk->icsk_accept_queue.rskq_defer_accept ||
5991 inet_csk_in_pingpong_mode(sk)) {
5992
5993
5994
5995
5996
5997
5998
5999 inet_csk_schedule_ack(sk);
6000 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
6001 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
6002 TCP_DELACK_MAX, TCP_RTO_MAX);
6003
6004 discard:
6005 tcp_drop(sk, skb);
6006 return 0;
6007 } else {
6008 tcp_send_ack(sk);
6009 }
6010 return -1;
6011 }
6012
6013
6014
6015 if (th->rst) {
6016
6017
6018
6019
6020
6021
6022 goto discard_and_undo;
6023 }
6024
6025
6026 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
6027 tcp_paws_reject(&tp->rx_opt, 0))
6028 goto discard_and_undo;
6029
6030 if (th->syn) {
6031
6032
6033
6034
6035 tcp_set_state(sk, TCP_SYN_RECV);
6036
6037 if (tp->rx_opt.saw_tstamp) {
6038 tp->rx_opt.tstamp_ok = 1;
6039 tcp_store_ts_recent(tp);
6040 tp->tcp_header_len =
6041 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6042 } else {
6043 tp->tcp_header_len = sizeof(struct tcphdr);
6044 }
6045
6046 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6047 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6048 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6049
6050
6051
6052
6053 tp->snd_wnd = ntohs(th->window);
6054 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
6055 tp->max_window = tp->snd_wnd;
6056
6057 tcp_ecn_rcv_syn(tp, th);
6058
6059 tcp_mtup_init(sk);
6060 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6061 tcp_initialize_rcv_mss(sk);
6062
6063 tcp_send_synack(sk);
6064 #if 0
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076 return -1;
6077 #else
6078 goto discard;
6079 #endif
6080 }
6081
6082
6083
6084
6085 discard_and_undo:
6086 tcp_clear_options(&tp->rx_opt);
6087 tp->rx_opt.mss_clamp = saved_clamp;
6088 goto discard;
6089
6090 reset_and_undo:
6091 tcp_clear_options(&tp->rx_opt);
6092 tp->rx_opt.mss_clamp = saved_clamp;
6093 return 1;
6094 }
6095
6096 static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
6097 {
6098 struct request_sock *req;
6099
6100
6101
6102
6103 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
6104 tcp_try_undo_loss(sk, false);
6105
6106
6107 tcp_sk(sk)->retrans_stamp = 0;
6108 inet_csk(sk)->icsk_retransmits = 0;
6109
6110
6111
6112
6113 req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
6114 lockdep_sock_is_held(sk));
6115 reqsk_fastopen_remove(sk, req, false);
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125 tcp_rearm_rto(sk);
6126 }
6127
6128
6129
6130
6131
6132
6133
6134
6135 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6136 {
6137 struct tcp_sock *tp = tcp_sk(sk);
6138 struct inet_connection_sock *icsk = inet_csk(sk);
6139 const struct tcphdr *th = tcp_hdr(skb);
6140 struct request_sock *req;
6141 int queued = 0;
6142 bool acceptable;
6143
6144 switch (sk->sk_state) {
6145 case TCP_CLOSE:
6146 goto discard;
6147
6148 case TCP_LISTEN:
6149 if (th->ack)
6150 return 1;
6151
6152 if (th->rst)
6153 goto discard;
6154
6155 if (th->syn) {
6156 if (th->fin)
6157 goto discard;
6158
6159
6160
6161 rcu_read_lock();
6162 local_bh_disable();
6163 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
6164 local_bh_enable();
6165 rcu_read_unlock();
6166
6167 if (!acceptable)
6168 return 1;
6169 consume_skb(skb);
6170 return 0;
6171 }
6172 goto discard;
6173
6174 case TCP_SYN_SENT:
6175 tp->rx_opt.saw_tstamp = 0;
6176 tcp_mstamp_refresh(tp);
6177 queued = tcp_rcv_synsent_state_process(sk, skb, th);
6178 if (queued >= 0)
6179 return queued;
6180
6181
6182 tcp_urg(sk, skb, th);
6183 __kfree_skb(skb);
6184 tcp_data_snd_check(sk);
6185 return 0;
6186 }
6187
6188 tcp_mstamp_refresh(tp);
6189 tp->rx_opt.saw_tstamp = 0;
6190 req = rcu_dereference_protected(tp->fastopen_rsk,
6191 lockdep_sock_is_held(sk));
6192 if (req) {
6193 bool req_stolen;
6194
6195 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
6196 sk->sk_state != TCP_FIN_WAIT1);
6197
6198 if (!tcp_check_req(sk, skb, req, true, &req_stolen))
6199 goto discard;
6200 }
6201
6202 if (!th->ack && !th->rst && !th->syn)
6203 goto discard;
6204
6205 if (!tcp_validate_incoming(sk, skb, th, 0))
6206 return 0;
6207
6208
6209 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
6210 FLAG_UPDATE_TS_RECENT |
6211 FLAG_NO_CHALLENGE_ACK) > 0;
6212
6213 if (!acceptable) {
6214 if (sk->sk_state == TCP_SYN_RECV)
6215 return 1;
6216 tcp_send_challenge_ack(sk, skb);
6217 goto discard;
6218 }
6219 switch (sk->sk_state) {
6220 case TCP_SYN_RECV:
6221 tp->delivered++;
6222 if (!tp->srtt_us)
6223 tcp_synack_rtt_meas(sk, req);
6224
6225 if (req) {
6226 tcp_rcv_synrecv_state_fastopen(sk);
6227 } else {
6228 tcp_try_undo_spurious_syn(sk);
6229 tp->retrans_stamp = 0;
6230 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
6231 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6232 }
6233 smp_mb();
6234 tcp_set_state(sk, TCP_ESTABLISHED);
6235 sk->sk_state_change(sk);
6236
6237
6238
6239
6240
6241 if (sk->sk_socket)
6242 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6243
6244 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
6245 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
6246 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6247
6248 if (tp->rx_opt.tstamp_ok)
6249 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6250
6251 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
6252 tcp_update_pacing_rate(sk);
6253
6254
6255 tp->lsndtime = tcp_jiffies32;
6256
6257 tcp_initialize_rcv_mss(sk);
6258 tcp_fast_path_on(tp);
6259 break;
6260
6261 case TCP_FIN_WAIT1: {
6262 int tmo;
6263
6264 if (req)
6265 tcp_rcv_synrecv_state_fastopen(sk);
6266
6267 if (tp->snd_una != tp->write_seq)
6268 break;
6269
6270 tcp_set_state(sk, TCP_FIN_WAIT2);
6271 sk->sk_shutdown |= SEND_SHUTDOWN;
6272
6273 sk_dst_confirm(sk);
6274
6275 if (!sock_flag(sk, SOCK_DEAD)) {
6276
6277 sk->sk_state_change(sk);
6278 break;
6279 }
6280
6281 if (tp->linger2 < 0) {
6282 tcp_done(sk);
6283 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6284 return 1;
6285 }
6286 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6287 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6288
6289 if (tp->syn_fastopen && th->fin)
6290 tcp_fastopen_active_disable(sk);
6291 tcp_done(sk);
6292 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6293 return 1;
6294 }
6295
6296 tmo = tcp_fin_time(sk);
6297 if (tmo > TCP_TIMEWAIT_LEN) {
6298 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
6299 } else if (th->fin || sock_owned_by_user(sk)) {
6300
6301
6302
6303
6304
6305
6306 inet_csk_reset_keepalive_timer(sk, tmo);
6307 } else {
6308 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6309 goto discard;
6310 }
6311 break;
6312 }
6313
6314 case TCP_CLOSING:
6315 if (tp->snd_una == tp->write_seq) {
6316 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6317 goto discard;
6318 }
6319 break;
6320
6321 case TCP_LAST_ACK:
6322 if (tp->snd_una == tp->write_seq) {
6323 tcp_update_metrics(sk);
6324 tcp_done(sk);
6325 goto discard;
6326 }
6327 break;
6328 }
6329
6330
6331 tcp_urg(sk, skb, th);
6332
6333
6334 switch (sk->sk_state) {
6335 case TCP_CLOSE_WAIT:
6336 case TCP_CLOSING:
6337 case TCP_LAST_ACK:
6338 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6339 break;
6340
6341 case TCP_FIN_WAIT1:
6342 case TCP_FIN_WAIT2:
6343
6344
6345
6346
6347 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6348 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6349 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6350 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6351 tcp_reset(sk);
6352 return 1;
6353 }
6354 }
6355
6356 case TCP_ESTABLISHED:
6357 tcp_data_queue(sk, skb);
6358 queued = 1;
6359 break;
6360 }
6361
6362
6363 if (sk->sk_state != TCP_CLOSE) {
6364 tcp_data_snd_check(sk);
6365 tcp_ack_snd_check(sk);
6366 }
6367
6368 if (!queued) {
6369 discard:
6370 tcp_drop(sk, skb);
6371 }
6372 return 0;
6373 }
6374 EXPORT_SYMBOL(tcp_rcv_state_process);
6375
6376 static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6377 {
6378 struct inet_request_sock *ireq = inet_rsk(req);
6379
6380 if (family == AF_INET)
6381 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6382 &ireq->ir_rmt_addr, port);
6383 #if IS_ENABLED(CONFIG_IPV6)
6384 else if (family == AF_INET6)
6385 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6386 &ireq->ir_v6_rmt_addr, port);
6387 #endif
6388 }
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407 static void tcp_ecn_create_request(struct request_sock *req,
6408 const struct sk_buff *skb,
6409 const struct sock *listen_sk,
6410 const struct dst_entry *dst)
6411 {
6412 const struct tcphdr *th = tcp_hdr(skb);
6413 const struct net *net = sock_net(listen_sk);
6414 bool th_ecn = th->ece && th->cwr;
6415 bool ect, ecn_ok;
6416 u32 ecn_ok_dst;
6417
6418 if (!th_ecn)
6419 return;
6420
6421 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6422 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6423 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6424
6425 if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6426 (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6427 tcp_bpf_ca_needs_ecn((struct sock *)req))
6428 inet_rsk(req)->ecn_ok = 1;
6429 }
6430
6431 static void tcp_openreq_init(struct request_sock *req,
6432 const struct tcp_options_received *rx_opt,
6433 struct sk_buff *skb, const struct sock *sk)
6434 {
6435 struct inet_request_sock *ireq = inet_rsk(req);
6436
6437 req->rsk_rcv_wnd = 0;
6438 req->cookie_ts = 0;
6439 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6440 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6441 tcp_rsk(req)->snt_synack = 0;
6442 tcp_rsk(req)->last_oow_ack_time = 0;
6443 req->mss = rx_opt->mss_clamp;
6444 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6445 ireq->tstamp_ok = rx_opt->tstamp_ok;
6446 ireq->sack_ok = rx_opt->sack_ok;
6447 ireq->snd_wscale = rx_opt->snd_wscale;
6448 ireq->wscale_ok = rx_opt->wscale_ok;
6449 ireq->acked = 0;
6450 ireq->ecn_ok = 0;
6451 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6452 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6453 ireq->ir_mark = inet_request_mark(sk, skb);
6454 #if IS_ENABLED(CONFIG_SMC)
6455 ireq->smc_ok = rx_opt->smc_ok;
6456 #endif
6457 }
6458
6459 struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6460 struct sock *sk_listener,
6461 bool attach_listener)
6462 {
6463 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6464 attach_listener);
6465
6466 if (req) {
6467 struct inet_request_sock *ireq = inet_rsk(req);
6468
6469 ireq->ireq_opt = NULL;
6470 #if IS_ENABLED(CONFIG_IPV6)
6471 ireq->pktopts = NULL;
6472 #endif
6473 atomic64_set(&ireq->ir_cookie, 0);
6474 ireq->ireq_state = TCP_NEW_SYN_RECV;
6475 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6476 ireq->ireq_family = sk_listener->sk_family;
6477 }
6478
6479 return req;
6480 }
6481 EXPORT_SYMBOL(inet_reqsk_alloc);
6482
6483
6484
6485
6486 static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
6487 {
6488 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6489 const char *msg = "Dropping request";
6490 bool want_cookie = false;
6491 struct net *net = sock_net(sk);
6492
6493 #ifdef CONFIG_SYN_COOKIES
6494 if (net->ipv4.sysctl_tcp_syncookies) {
6495 msg = "Sending cookies";
6496 want_cookie = true;
6497 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6498 } else
6499 #endif
6500 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6501
6502 if (!queue->synflood_warned &&
6503 net->ipv4.sysctl_tcp_syncookies != 2 &&
6504 xchg(&queue->synflood_warned, 1) == 0)
6505 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6506 proto, sk->sk_num, msg);
6507
6508 return want_cookie;
6509 }
6510
6511 static void tcp_reqsk_record_syn(const struct sock *sk,
6512 struct request_sock *req,
6513 const struct sk_buff *skb)
6514 {
6515 if (tcp_sk(sk)->save_syn) {
6516 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6517 u32 *copy;
6518
6519 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6520 if (copy) {
6521 copy[0] = len;
6522 memcpy(©[1], skb_network_header(skb), len);
6523 req->saved_syn = copy;
6524 }
6525 }
6526 }
6527
6528
6529
6530
6531 u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
6532 const struct tcp_request_sock_ops *af_ops,
6533 struct sock *sk, struct tcphdr *th)
6534 {
6535 struct tcp_sock *tp = tcp_sk(sk);
6536 u16 mss;
6537
6538 if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
6539 !inet_csk_reqsk_queue_is_full(sk))
6540 return 0;
6541
6542 if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
6543 return 0;
6544
6545 if (sk_acceptq_is_full(sk)) {
6546 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6547 return 0;
6548 }
6549
6550 mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
6551 if (!mss)
6552 mss = af_ops->mss_clamp;
6553
6554 return mss;
6555 }
6556 EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
6557
6558 int tcp_conn_request(struct request_sock_ops *rsk_ops,
6559 const struct tcp_request_sock_ops *af_ops,
6560 struct sock *sk, struct sk_buff *skb)
6561 {
6562 struct tcp_fastopen_cookie foc = { .len = -1 };
6563 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6564 struct tcp_options_received tmp_opt;
6565 struct tcp_sock *tp = tcp_sk(sk);
6566 struct net *net = sock_net(sk);
6567 struct sock *fastopen_sk = NULL;
6568 struct request_sock *req;
6569 bool want_cookie = false;
6570 struct dst_entry *dst;
6571 struct flowi fl;
6572
6573
6574
6575
6576
6577 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6578 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6579 want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
6580 if (!want_cookie)
6581 goto drop;
6582 }
6583
6584 if (sk_acceptq_is_full(sk)) {
6585 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6586 goto drop;
6587 }
6588
6589 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6590 if (!req)
6591 goto drop;
6592
6593 tcp_rsk(req)->af_specific = af_ops;
6594 tcp_rsk(req)->ts_off = 0;
6595
6596 tcp_clear_options(&tmp_opt);
6597 tmp_opt.mss_clamp = af_ops->mss_clamp;
6598 tmp_opt.user_mss = tp->rx_opt.user_mss;
6599 tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
6600 want_cookie ? NULL : &foc);
6601
6602 if (want_cookie && !tmp_opt.saw_tstamp)
6603 tcp_clear_options(&tmp_opt);
6604
6605 if (IS_ENABLED(CONFIG_SMC) && want_cookie)
6606 tmp_opt.smc_ok = 0;
6607
6608 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6609 tcp_openreq_init(req, &tmp_opt, skb, sk);
6610 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
6611
6612
6613 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6614
6615 af_ops->init_req(req, sk, skb);
6616
6617 if (security_inet_conn_request(sk, skb, req))
6618 goto drop_and_free;
6619
6620 if (tmp_opt.tstamp_ok)
6621 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
6622
6623 dst = af_ops->route_req(sk, &fl, req);
6624 if (!dst)
6625 goto drop_and_free;
6626
6627 if (!want_cookie && !isn) {
6628
6629 if (!net->ipv4.sysctl_tcp_syncookies &&
6630 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6631 (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
6632 !tcp_peer_is_proven(req, dst)) {
6633
6634
6635
6636
6637
6638
6639
6640 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6641 rsk_ops->family);
6642 goto drop_and_release;
6643 }
6644
6645 isn = af_ops->init_seq(skb);
6646 }
6647
6648 tcp_ecn_create_request(req, skb, sk, dst);
6649
6650 if (want_cookie) {
6651 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6652 req->cookie_ts = tmp_opt.tstamp_ok;
6653 if (!tmp_opt.tstamp_ok)
6654 inet_rsk(req)->ecn_ok = 0;
6655 }
6656
6657 tcp_rsk(req)->snt_isn = isn;
6658 tcp_rsk(req)->txhash = net_tx_rndhash();
6659 tcp_openreq_init_rwin(req, sk, dst);
6660 sk_rx_queue_set(req_to_sk(req), skb);
6661 if (!want_cookie) {
6662 tcp_reqsk_record_syn(sk, req, skb);
6663 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6664 }
6665 if (fastopen_sk) {
6666 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6667 &foc, TCP_SYNACK_FASTOPEN);
6668
6669 if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
6670 reqsk_fastopen_remove(fastopen_sk, req, false);
6671 bh_unlock_sock(fastopen_sk);
6672 sock_put(fastopen_sk);
6673 goto drop_and_free;
6674 }
6675 sk->sk_data_ready(sk);
6676 bh_unlock_sock(fastopen_sk);
6677 sock_put(fastopen_sk);
6678 } else {
6679 tcp_rsk(req)->tfo_listener = false;
6680 if (!want_cookie)
6681 inet_csk_reqsk_queue_hash_add(sk, req,
6682 tcp_timeout_init((struct sock *)req));
6683 af_ops->send_synack(sk, dst, &fl, req, &foc,
6684 !want_cookie ? TCP_SYNACK_NORMAL :
6685 TCP_SYNACK_COOKIE);
6686 if (want_cookie) {
6687 reqsk_free(req);
6688 return 0;
6689 }
6690 }
6691 reqsk_put(req);
6692 return 0;
6693
6694 drop_and_release:
6695 dst_release(dst);
6696 drop_and_free:
6697 __reqsk_free(req);
6698 drop:
6699 tcp_listendrop(sk);
6700 return 0;
6701 }
6702 EXPORT_SYMBOL(tcp_conn_request);