LCOV - code coverage report
Current view: top level - net/ipv4 - tcp_recovery.c (source / functions) Hit Total Coverage
Test: Real Lines: 20 81 24.7 %
Date: 2020-10-17 15:46:16 Functions: 0 9 0.0 %
Legend: Neither, QEMU, Real, Both Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : // SPDX-License-Identifier: GPL-2.0
       2                 :            : #include <linux/tcp.h>
       3                 :            : #include <net/tcp.h>
       4                 :            : 
       5                 :          1 : void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
       6                 :            : {
       7                 :            :         struct tcp_sock *tp = tcp_sk(sk);
       8                 :            : 
       9                 :          1 :         tcp_skb_mark_lost_uncond_verify(tp, skb);
      10                 :          1 :         if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
      11                 :            :                 /* Account for retransmits that are lost again */
      12                 :          0 :                 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
      13                 :          0 :                 tp->retrans_out -= tcp_skb_pcount(skb);
      14                 :          0 :                 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
      15                 :            :                               tcp_skb_pcount(skb));
      16                 :            :         }
      17                 :          1 : }
      18                 :            : 
      19                 :            : static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
      20                 :            : {
      21                 :          0 :         return t1 > t2 || (t1 == t2 && after(seq1, seq2));
      22                 :            : }
      23                 :            : 
      24                 :          0 : static u32 tcp_rack_reo_wnd(const struct sock *sk)
      25                 :            : {
      26                 :            :         struct tcp_sock *tp = tcp_sk(sk);
      27                 :            : 
      28                 :          0 :         if (!tp->reord_seen) {
      29                 :            :                 /* If reordering has not been observed, be aggressive during
      30                 :            :                  * the recovery or starting the recovery by DUPACK threshold.
      31                 :            :                  */
      32                 :          0 :                 if (inet_csk(sk)->icsk_ca_state >= TCP_CA_Recovery)
      33                 :            :                         return 0;
      34                 :            : 
      35                 :          0 :                 if (tp->sacked_out >= tp->reordering &&
      36                 :          0 :                     !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
      37                 :            :                         return 0;
      38                 :            :         }
      39                 :            : 
      40                 :            :         /* To be more reordering resilient, allow min_rtt/4 settling delay.
      41                 :            :          * Use min_rtt instead of the smoothed RTT because reordering is
      42                 :            :          * often a path property and less related to queuing or delayed ACKs.
      43                 :            :          * Upon receiving DSACKs, linearly increase the window up to the
      44                 :            :          * smoothed RTT.
      45                 :            :          */
      46                 :          0 :         return min((tcp_min_rtt(tp) >> 2) * tp->rack.reo_wnd_steps,
      47                 :            :                    tp->srtt_us >> 3);
      48                 :            : }
      49                 :            : 
      50                 :          1 : s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd)
      51                 :            : {
      52                 :          1 :         return tp->rack.rtt_us + reo_wnd -
      53                 :          1 :                tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb));
      54                 :            : }
      55                 :            : 
      56                 :            : /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
      57                 :            :  *
      58                 :            :  * Marks a packet lost, if some packet sent later has been (s)acked.
      59                 :            :  * The underlying idea is similar to the traditional dupthresh and FACK
      60                 :            :  * but they look at different metrics:
      61                 :            :  *
      62                 :            :  * dupthresh: 3 OOO packets delivered (packet count)
      63                 :            :  * FACK: sequence delta to highest sacked sequence (sequence space)
      64                 :            :  * RACK: sent time delta to the latest delivered packet (time domain)
      65                 :            :  *
      66                 :            :  * The advantage of RACK is it applies to both original and retransmitted
      67                 :            :  * packet and therefore is robust against tail losses. Another advantage
      68                 :            :  * is being more resilient to reordering by simply allowing some
      69                 :            :  * "settling delay", instead of tweaking the dupthresh.
      70                 :            :  *
      71                 :            :  * When tcp_rack_detect_loss() detects some packets are lost and we
      72                 :            :  * are not already in the CA_Recovery state, either tcp_rack_reo_timeout()
      73                 :            :  * or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
      74                 :            :  * make us enter the CA_Recovery state.
      75                 :            :  */
      76                 :          0 : static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
      77                 :            : {
      78                 :            :         struct tcp_sock *tp = tcp_sk(sk);
      79                 :            :         struct sk_buff *skb, *n;
      80                 :            :         u32 reo_wnd;
      81                 :            : 
      82                 :          0 :         *reo_timeout = 0;
      83                 :          0 :         reo_wnd = tcp_rack_reo_wnd(sk);
      84                 :          0 :         list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
      85                 :            :                                  tcp_tsorted_anchor) {
      86                 :            :                 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
      87                 :            :                 s32 remaining;
      88                 :            : 
      89                 :            :                 /* Skip ones marked lost but not yet retransmitted */
      90                 :          0 :                 if ((scb->sacked & TCPCB_LOST) &&
      91                 :            :                     !(scb->sacked & TCPCB_SACKED_RETRANS))
      92                 :          0 :                         continue;
      93                 :            : 
      94                 :          0 :                 if (!tcp_rack_sent_after(tp->rack.mstamp,
      95                 :            :                                          tcp_skb_timestamp_us(skb),
      96                 :            :                                          tp->rack.end_seq, scb->end_seq))
      97                 :            :                         break;
      98                 :            : 
      99                 :            :                 /* A packet is lost if it has not been s/acked beyond
     100                 :            :                  * the recent RTT plus the reordering window.
     101                 :            :                  */
     102                 :          0 :                 remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
     103                 :          0 :                 if (remaining <= 0) {
     104                 :          0 :                         tcp_mark_skb_lost(sk, skb);
     105                 :            :                         list_del_init(&skb->tcp_tsorted_anchor);
     106                 :            :                 } else {
     107                 :            :                         /* Record maximum wait time */
     108                 :          0 :                         *reo_timeout = max_t(u32, *reo_timeout, remaining);
     109                 :            :                 }
     110                 :            :         }
     111                 :          0 : }
     112                 :            : 
     113                 :          0 : void tcp_rack_mark_lost(struct sock *sk)
     114                 :            : {
     115                 :            :         struct tcp_sock *tp = tcp_sk(sk);
     116                 :            :         u32 timeout;
     117                 :            : 
     118                 :          0 :         if (!tp->rack.advanced)
     119                 :          0 :                 return;
     120                 :            : 
     121                 :            :         /* Reset the advanced flag to avoid unnecessary queue scanning */
     122                 :          0 :         tp->rack.advanced = 0;
     123                 :          0 :         tcp_rack_detect_loss(sk, &timeout);
     124                 :          0 :         if (timeout) {
     125                 :          0 :                 timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
     126                 :          0 :                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
     127                 :          0 :                                           timeout, inet_csk(sk)->icsk_rto);
     128                 :            :         }
     129                 :            : }
     130                 :            : 
     131                 :            : /* Record the most recently (re)sent time among the (s)acked packets
     132                 :            :  * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
     133                 :            :  * draft-cheng-tcpm-rack-00.txt
     134                 :            :  */
     135                 :          0 : void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
     136                 :            :                       u64 xmit_time)
     137                 :            : {
     138                 :            :         u32 rtt_us;
     139                 :            : 
     140                 :          0 :         rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
     141                 :          0 :         if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
     142                 :            :                 /* If the sacked packet was retransmitted, it's ambiguous
     143                 :            :                  * whether the retransmission or the original (or the prior
     144                 :            :                  * retransmission) was sacked.
     145                 :            :                  *
     146                 :            :                  * If the original is lost, there is no ambiguity. Otherwise
     147                 :            :                  * we assume the original can be delayed up to aRTT + min_rtt.
     148                 :            :                  * the aRTT term is bounded by the fast recovery or timeout,
     149                 :            :                  * so it's at least one RTT (i.e., retransmission is at least
     150                 :            :                  * an RTT later).
     151                 :            :                  */
     152                 :          0 :                 return;
     153                 :            :         }
     154                 :          0 :         tp->rack.advanced = 1;
     155                 :          0 :         tp->rack.rtt_us = rtt_us;
     156                 :          0 :         if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
     157                 :            :                                 end_seq, tp->rack.end_seq)) {
     158                 :          0 :                 tp->rack.mstamp = xmit_time;
     159                 :          0 :                 tp->rack.end_seq = end_seq;
     160                 :            :         }
     161                 :            : }
     162                 :            : 
     163                 :            : /* We have waited long enough to accommodate reordering. Mark the expired
     164                 :            :  * packets lost and retransmit them.
     165                 :            :  */
     166                 :          0 : void tcp_rack_reo_timeout(struct sock *sk)
     167                 :            : {
     168                 :            :         struct tcp_sock *tp = tcp_sk(sk);
     169                 :            :         u32 timeout, prior_inflight;
     170                 :            : 
     171                 :            :         prior_inflight = tcp_packets_in_flight(tp);
     172                 :          0 :         tcp_rack_detect_loss(sk, &timeout);
     173                 :          0 :         if (prior_inflight != tcp_packets_in_flight(tp)) {
     174                 :          0 :                 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
     175                 :          0 :                         tcp_enter_recovery(sk, false);
     176                 :          0 :                         if (!inet_csk(sk)->icsk_ca_ops->cong_control)
     177                 :          0 :                                 tcp_cwnd_reduction(sk, 1, 0);
     178                 :            :                 }
     179                 :          0 :                 tcp_xmit_retransmit_queue(sk);
     180                 :            :         }
     181                 :          0 :         if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
     182                 :          0 :                 tcp_rearm_rto(sk);
     183                 :          0 : }
     184                 :            : 
     185                 :            : /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
     186                 :            :  *
     187                 :            :  * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
     188                 :            :  * by srtt), since there is possibility that spurious retransmission was
     189                 :            :  * due to reordering delay longer than reo_wnd.
     190                 :            :  *
     191                 :            :  * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
     192                 :            :  * no. of successful recoveries (accounts for full DSACK-based loss
     193                 :            :  * recovery undo). After that, reset it to default (min_rtt/4).
     194                 :            :  *
     195                 :            :  * At max, reo_wnd is incremented only once per rtt. So that the new
     196                 :            :  * DSACK on which we are reacting, is due to the spurious retx (approx)
     197                 :            :  * after the reo_wnd has been updated last time.
     198                 :            :  *
     199                 :            :  * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
     200                 :            :  * absolute value to account for change in rtt.
     201                 :            :  */
     202                 :          1 : void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
     203                 :            : {
     204                 :            :         struct tcp_sock *tp = tcp_sk(sk);
     205                 :            : 
     206                 :          1 :         if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
     207                 :          1 :             !rs->prior_delivered)
     208                 :          1 :                 return;
     209                 :            : 
     210                 :            :         /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
     211                 :          1 :         if (before(rs->prior_delivered, tp->rack.last_delivered))
     212                 :          0 :                 tp->rack.dsack_seen = 0;
     213                 :            : 
     214                 :            :         /* Adjust the reo_wnd if update is pending */
     215                 :          1 :         if (tp->rack.dsack_seen) {
     216                 :          0 :                 tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
     217                 :            :                                                tp->rack.reo_wnd_steps + 1);
     218                 :          0 :                 tp->rack.dsack_seen = 0;
     219                 :          0 :                 tp->rack.last_delivered = tp->delivered;
     220                 :          0 :                 tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
     221                 :          1 :         } else if (!tp->rack.reo_wnd_persist) {
     222                 :          1 :                 tp->rack.reo_wnd_steps = 1;
     223                 :            :         }
     224                 :            : }
     225                 :            : 
     226                 :            : /* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
     227                 :            :  * the next unacked packet upon receiving
     228                 :            :  * a) three or more DUPACKs to start the fast recovery
     229                 :            :  * b) an ACK acknowledging new data during the fast recovery.
     230                 :            :  */
     231                 :          1 : void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced)
     232                 :            : {
     233                 :          1 :         const u8 state = inet_csk(sk)->icsk_ca_state;
     234                 :            :         struct tcp_sock *tp = tcp_sk(sk);
     235                 :            : 
     236                 :          1 :         if ((state < TCP_CA_Recovery && tp->sacked_out >= tp->reordering) ||
     237                 :          1 :             (state == TCP_CA_Recovery && snd_una_advanced)) {
     238                 :            :                 struct sk_buff *skb = tcp_rtx_queue_head(sk);
     239                 :            :                 u32 mss;
     240                 :            : 
     241                 :          0 :                 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
     242                 :          1 :                         return;
     243                 :            : 
     244                 :          0 :                 mss = tcp_skb_mss(skb);
     245                 :          0 :                 if (tcp_skb_pcount(skb) > 1 && skb->len > mss)
     246                 :          0 :                         tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
     247                 :            :                                      mss, mss, GFP_ATOMIC);
     248                 :            : 
     249                 :          0 :                 tcp_skb_mark_lost_uncond_verify(tp, skb);
     250                 :            :         }
     251                 :            : }
    

Generated by: LCOV version 1.14