Commit 1fc4fa2a authored by David Howells's avatar David Howells
Browse files

rxrpc: Fix congestion management



rxrpc has a problem in its congestion management in that it saves the
congestion window size (cwnd) from one call to another, but if this is 0 at
the time is saved, then the next call may not actually manage to ever
transmit anything.

To this end:

 (1) Don't save cwnd between calls, but rather reset back down to the
     initial cwnd and re-enter slow-start if data transmission is idle for
     more than an RTT.

 (2) Preserve ssthresh instead, as that is a handy estimate of pipe
     capacity.  Knowing roughly when to stop slow start and enter
     congestion avoidance can reduce the tendency to overshoot and drop
     larger amounts of packets when probing.

In future, cwind growth also needs to be constrained when the window isn't
being filled due to being application limited.

Reported-by: default avatarSimon Wilkinson <sxw@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
parent 6869ddb8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -193,6 +193,7 @@
	EM(rxrpc_cong_new_low_nack,		" NewLowN") \
	EM(rxrpc_cong_no_change,		" -") \
	EM(rxrpc_cong_progress,			" Progres") \
	EM(rxrpc_cong_idle_reset,		" IdleRes") \
	EM(rxrpc_cong_retransmit_again,		" ReTxAgn") \
	EM(rxrpc_cong_rtt_window_end,		" RttWinE") \
	E_(rxrpc_cong_saw_nack,			" SawNack")
+5 −4
Original line number Diff line number Diff line
@@ -332,7 +332,7 @@ struct rxrpc_peer {
	u32			rto_j;		/* Retransmission timeout in jiffies */
	u8			backoff;	/* Backoff timeout */

	u8			cong_cwnd;	/* Congestion window size */
	u8			cong_ssthresh;	/* Congestion slow-start threshold */
};

/*
@@ -626,6 +626,7 @@ struct rxrpc_call {
	u16			tx_backoff;	/* Delay to insert due to Tx failure */
	u8			tx_winsize;	/* Maximum size of Tx window */
#define RXRPC_TX_MAX_WINDOW	128
	ktime_t			tx_last_sent;	/* Last time a transmission occurred */

	/* Received data tracking */
	struct sk_buff_head	recvmsg_queue;	/* Queue of packets ready for recvmsg() */
@@ -687,10 +688,10 @@ struct rxrpc_call {
 * Summary of a new ACK and the changes it made to the Tx buffer packet states.
 */
struct rxrpc_ack_summary {
	u16			nr_acks;		/* Number of ACKs in packet */
	u16			nr_new_acks;		/* Number of new ACKs in packet */
	u16			nr_rot_new_acks;	/* Number of rotated new ACKs */
	u8			ack_reason;
	u8			nr_acks;		/* Number of ACKs in packet */
	u8			nr_new_acks;		/* Number of new ACKs in packet */
	u8			nr_rot_new_acks;	/* Number of rotated new ACKs */
	bool			saw_nacks;		/* Saw NACKs in packet */
	bool			new_low_nack;		/* T if new low NACK found */
	bool			retrans_timeo;		/* T if reTx due to timeout happened */
+2 −1
Original line number Diff line number Diff line
@@ -324,7 +324,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
	call->security = conn->security;
	call->security_ix = conn->security_ix;
	call->peer = rxrpc_get_peer(conn->params.peer);
	call->cong_cwnd = call->peer->cong_cwnd;
	call->cong_ssthresh = call->peer->cong_ssthresh;
	call->tx_last_sent = ktime_get_real();
	return call;
}

+6 −1
Original line number Diff line number Diff line
@@ -166,7 +166,12 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
	call->rx_winsize = rxrpc_rx_window_size;
	call->tx_winsize = 16;

	if (RXRPC_TX_SMSS > 2190)
		call->cong_cwnd = 2;
	else if (RXRPC_TX_SMSS > 1095)
		call->cong_cwnd = 3;
	else
		call->cong_cwnd = 4;
	call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;

	call->rxnet = rxnet;
+2 −1
Original line number Diff line number Diff line
@@ -363,7 +363,8 @@ static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
	if (!cp->peer)
		goto error;

	call->cong_cwnd = cp->peer->cong_cwnd;
	call->tx_last_sent = ktime_get_real();
	call->cong_ssthresh = cp->peer->cong_ssthresh;
	if (call->cong_cwnd >= call->cong_ssthresh)
		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
	else
Loading