Commit 1e253eda authored by Ben Hutchings's avatar Ben Hutchings

Add TCP DoS fixes

parent 4ea46855
......@@ -15,6 +15,10 @@ linux (4.19.37-4) UNRELEASED; urgency=medium
* mm/mincore.c: make mincore() more conservative (CVE-2019-5489)
* mwifiex: Fix heap overflow in mwifiex_uap_parse_tail_ies()
(CVE-2019-10126)
* tcp: limit payload size of sacked skbs (CVE-2019-11477)
* tcp: tcp_fragment() should apply sane memory limits (CVE-2019-11478)
* tcp: add tcp_min_snd_mss sysctl (CVE-2019-11479)
* tcp: enforce tcp_min_snd_mss in tcp_mtu_probing()
[ Romain Perier ]
* [rt] Update to 4.19.37-rt20
......
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 17 Jun 2019 10:03:53 -0700
Subject: [PATCH net 3/4] tcp: add tcp_min_snd_mss sysctl
Origin: https://patchwork.ozlabs.org/patch/1117157/
Some TCP peers announce a very small MSS option in their SYN and/or
SYN/ACK messages.
This forces the stack to send packets with a very high network/cpu
overhead.
Linux has enforced a minimal value of 48. Since this value includes
the size of TCP options, and that the options can consume up to 40
bytes, this means that each segment can include only 8 bytes of payload.
In some cases, it can be useful to increase the minimal value
to a saner value.
We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility
reasons.
Note that TCP_MAXSEG socket option enforces a minimal value
of (TCP_MIN_MSS). David Miller increased this minimal value
in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.")
from 64 to 88.
We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS.
CVE-2019-11479 -- tcp mss hardcoded to 48
Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Jonathan Looney <jtl@netflix.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Bruce Curtis <brucec@netflix.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Tyler Hicks <tyhicks@canonical.com>
---
Documentation/networking/ip-sysctl.txt | 8 ++++++++
include/net/netns/ipv4.h | 1 +
net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++
net/ipv4/tcp_ipv4.c | 1 +
net/ipv4/tcp_output.c | 3 +--
5 files changed, 22 insertions(+), 2 deletions(-)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -250,6 +250,14 @@ tcp_base_mss - INTEGER
Path MTU discovery (MTU probing). If MTU probing is enabled,
this is the initial MSS used by the connection.
+tcp_min_snd_mss - INTEGER
+ TCP SYN and SYNACK messages usually advertise an ADVMSS option,
+ as described in RFC 1122 and RFC 6691.
+ If this ADVMSS option is smaller than tcp_min_snd_mss,
+ it is silently capped to tcp_min_snd_mss.
+
+ Default : 48 (at least 8 bytes of payload per segment)
+
tcp_congestion_control - STRING
Set the congestion control algorithm to be used for new
connections. The algorithm "reno" is always available, but
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -113,6 +113,7 @@ struct netns_ipv4 {
#endif
int sysctl_tcp_mtu_probing;
int sysctl_tcp_base_mss;
+ int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold;
u32 sysctl_tcp_probe_interval;
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = {
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
+static int tcp_min_snd_mss_max = 65535;
static int ip_privileged_port_min;
static int ip_privileged_port_max = 65535;
static int ip_ttl_min = 1;
@@ -737,6 +739,15 @@ static struct ctl_table ipv4_net_table[]
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_min_snd_mss",
+ .data = &init_net.ipv4.sysctl_tcp_min_snd_mss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_min_snd_mss_min,
+ .extra2 = &tcp_min_snd_mss_max,
+ },
+ {
.procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold,
.maxlen = sizeof(int),
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2527,6 +2527,7 @@ static int __net_init tcp_sk_init(struct
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+ net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1462,8 +1462,7 @@ static inline int __tcp_mtu_to_mss(struc
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- if (mss_now < TCP_MIN_SND_MSS)
- mss_now = TCP_MIN_SND_MSS;
+ mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
return mss_now;
}
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 17 Jun 2019 10:03:54 -0700
Subject: [PATCH net 4/4] tcp: enforce tcp_min_snd_mss in tcp_mtu_probing()
Origin: https://patchwork.ozlabs.org/patch/1117158/
If mtu probing is enabled tcp_mtu_probing() could very well end up
with a too small MSS.
Use the new sysctl tcp_min_snd_mss to make sure MSS search
is performed in an acceptable range.
CVE-2019-11479 -- tcp mss hardcoded to 48
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: Jonathan Looney <jtl@netflix.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Bruce Curtis <brucec@netflix.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Tyler Hicks <tyhicks@canonical.com>
---
net/ipv4/tcp_timer.c | 1 +
1 file changed, 1 insertion(+)
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -166,6 +166,7 @@ static void tcp_mtu_probing(struct inet_
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+ mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 17 Jun 2019 10:03:51 -0700
Subject: [PATCH net 1/4] tcp: limit payload size of sacked skbs
Origin: https://patchwork.ozlabs.org/patch/1117155/
Jonathan Looney reported that TCP can trigger the following crash
in tcp_shifted_skb() :
BUG_ON(tcp_skb_pcount(skb) < pcount);
This can happen if the remote peer has advertized the smallest
MSS that linux TCP accepts : 48
An skb can hold 17 fragments, and each fragment can hold 32KB
on x86, or 64KB on PowerPC.
This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs
can overflow.
Note that tcp_sendmsg() builds skbs with less than 64KB
of payload, so this problem needs SACK to be enabled.
SACK blocks allow TCP to coalesce multiple skbs in the retransmit
queue, thus filling the 17 fragments to maximal capacity.
CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs
Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jonathan Looney <jtl@netflix.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Bruce Curtis <brucec@netflix.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
include/linux/tcp.h | 4 ++++
include/net/tcp.h | 2 ++
net/ipv4/tcp.c | 1 +
net/ipv4/tcp_input.c | 26 ++++++++++++++++++++------
net/ipv4/tcp_output.c | 6 +++---
5 files changed, 30 insertions(+), 9 deletions(-)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -485,4 +485,8 @@ static inline u16 tcp_mss_clamp(const st
return (user_mss && user_mss < mss) ? user_mss : mss;
}
+
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
+ int shiftlen);
+
#endif /* _LINUX_TCP_H */
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -55,6 +55,8 @@ void tcp_time_wait(struct sock *sk, int
#define MAX_TCP_HEADER (128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40
+#define TCP_MIN_SND_MSS 48
+#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
/*
* Never offer a window over 32767 without using window scaling. Some
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3829,6 +3829,7 @@ void __init tcp_init(void)
unsigned long limit;
unsigned int i;
+ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1315,7 +1315,7 @@ static bool tcp_shifted_skb(struct sock
TCP_SKB_CB(skb)->seq += shifted;
tcp_skb_pcount_add(prev, pcount);
- BUG_ON(tcp_skb_pcount(skb) < pcount);
+ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);
/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1381,6 +1381,21 @@ static int skb_can_shift(const struct sk
return !skb_headlen(skb) && skb_is_nonlinear(skb);
}
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+ int pcount, int shiftlen)
+{
+ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+ * to make sure not storing more than 65535 * 8 bytes per skb,
+ * even if current MSS is bigger.
+ */
+ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+ return 0;
+ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+ return 0;
+ return skb_shift(to, from, shiftlen);
+}
+
/* Try collapsing SACK blocks spanning across multiple skbs to a single
* skb.
*/
@@ -1486,7 +1501,7 @@ static struct sk_buff *tcp_shift_skb_dat
if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
goto fallback;
- if (!skb_shift(prev, skb, len))
+ if (!tcp_skb_shift(prev, skb, pcount, len))
goto fallback;
if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out;
@@ -1504,11 +1519,10 @@ static struct sk_buff *tcp_shift_skb_dat
goto out;
len = skb->len;
- if (skb_shift(prev, skb, len)) {
- pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, pcount, len))
+ tcp_shifted_skb(sk, prev, skb, state, pcount,
len, mss, 0);
- }
out:
return prev;
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1457,8 +1457,8 @@ static inline int __tcp_mtu_to_mss(struc
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- if (mss_now < 48)
- mss_now = 48;
+ if (mss_now < TCP_MIN_SND_MSS)
+ mss_now = TCP_MIN_SND_MSS;
return mss_now;
}
@@ -2727,7 +2727,7 @@ static bool tcp_collapse_retrans(struct
if (next_skb_size <= skb_availroom(skb))
skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size);
- else if (!skb_shift(skb, next_skb, next_skb_size))
+ else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
return false;
}
tcp_highest_sack_replace(sk, next_skb, skb);
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 17 Jun 2019 10:03:52 -0700
Subject: [PATCH net 2/4] tcp: tcp_fragment() should apply sane memory limits
Origin: https://patchwork.ozlabs.org/patch/1117156/
Jonathan Looney reported that a malicious peer can force a sender
to fragment its retransmit queue into tiny skbs, inflating memory
usage and/or overflow 32bit counters.
TCP allows an application to queue up to sk_sndbuf bytes,
so we need to give some allowance for non malicious splitting
of retransmit queue.
A new SNMP counter is added to monitor how many times TCP
did not allow to split an skb if the allowance was exceeded.
Note that this counter might increase in the case applications
use SO_SNDBUF socket option to lower sk_sndbuf.
CVE-2019-11478 : tcp_fragment, prevent fragmenting a packet when the
socket is already using more than half the allowed space
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jonathan Looney <jtl@netflix.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Cc: Bruce Curtis <brucec@netflix.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
include/uapi/linux/snmp.h | 1 +
net/ipv4/proc.c | 1 +
net/ipv4/tcp_output.c | 5 +++++
3 files changed, 7 insertions(+)
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -282,6 +282,7 @@ enum
LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */
LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */
+ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */
__LINUX_MIB_MAX
};
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -290,6 +290,7 @@ static const struct snmp_mib snmp4_net_l
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_SENTINEL
};
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1299,6 +1299,11 @@ int tcp_fragment(struct sock *sk, enum t
if (nsize < 0)
nsize = 0;
+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+ return -ENOMEM;
+ }
+
if (skb_unclone(skb, gfp))
return -ENOMEM;
......@@ -222,6 +222,10 @@ bugfix/all/mwifiex-abort-at-too-short-bss-descriptor-element.patch
bugfix/all/mwifiex-don-t-abort-on-small-spec-compliant-vendor-ies.patch
bugfix/all/mm-mincore.c-make-mincore-more-conservative.patch
bugfix/all/mwifiex-fix-heap-overflow-in-mwifiex_uap_parse_tail_.patch
bugfix/all/tcp-limit-payload-size-of-sacked-skbs.patch
bugfix/all/tcp-tcp_fragment-should-apply-sane-memory-limits.patch
bugfix/all/tcp-add-tcp_min_snd_mss-sysctl.patch
bugfix/all/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch
# Fix exported symbol versions
bugfix/all/module-disable-matching-missing-version-crc.patch
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment