diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 003b0ebbcfdda5347037dd6c3d07db3fbbe702db..47f4c544c91627816fd7a07e6c80f123c6fef2dc 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	__be16 protocol = skb->protocol;
 	u16 mac_len = skb->mac_len;
 	int gre_offset, outer_hlen;
-	bool need_csum;
+	bool need_csum, ufo;
 
 	if (unlikely(skb_shinfo(skb)->gso_type &
 				~(SKB_GSO_TCPV4 |
@@ -58,8 +58,20 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
 	skb->encap_hdr_csum = need_csum;
 
+	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+
 	features &= skb->dev->hw_enc_features;
 
+	/* The only checksum offload we care about from here on out is the
+	 * outer one so strip the existing checksum feature flags based
+	 * on the fact that we will be computing our checksum in software.
+	 */
+	if (ufo) {
+		features &= ~NETIF_F_CSUM_MASK;
+		if (!need_csum)
+			features |= NETIF_F_HW_CSUM;
+	}
+
 	/* segment inner packet. */
 	segs = skb_mac_gso_segment(skb, features);
 	if (IS_ERR_OR_NULL(segs)) {
@@ -75,8 +87,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 		struct gre_base_hdr *greh;
 		__be32 *pcsum;
 
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
+		/* Set up inner headers if we are offloading inner checksum */
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			skb_reset_inner_headers(skb);
+			skb->encapsulation = 1;
+		}
 
 		skb->mac_len = mac_len;
 		skb->protocol = protocol;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 56c4c8b88b28f2e96220ed68a67d7473aa40c567..f5abb1ae1358f974f8024c0599e6c9bdcca5be60 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -33,8 +33,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	__be16 new_protocol, bool is_ipv6)
 {
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+	bool remcsum, need_csum, offload_csum, ufo;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	bool remcsum, need_csum, offload_csum;
 	struct udphdr *uh = udp_hdr(skb);
 	u16 mac_offset = skb->mac_header;
 	__be16 protocol = skb->protocol;
@@ -62,6 +62,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
 	skb->remcsum_offload = remcsum;
 
+	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+
 	/* Try to offload checksum if possible */
 	offload_csum = !!(need_csum &&
 			  (skb->dev->features &
@@ -74,9 +76,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	 * outer one so strip the existing checksum feature flags and
 	 * instead set the flag based on our outer checksum offload value.
 	 */
-	if (remcsum) {
+	if (remcsum || ufo) {
 		features &= ~NETIF_F_CSUM_MASK;
-		if (offload_csum)
+		if (!need_csum || offload_csum)
 			features |= NETIF_F_HW_CSUM;
 	}
 
@@ -230,6 +232,13 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 
 	skb->ip_summed = CHECKSUM_NONE;
 
+	/* If there is no outer header we can fake a checksum offload
+	 * due to the fact that we have already done the checksum in
+	 * software prior to segmenting the frame.
+	 */
+	if (!skb->encap_hdr_csum)
+		features |= NETIF_F_HW_CSUM;
+
 	/* Fragment the skb. IP headers of the fragments are updated in
 	 * inet_gso_segment()
 	 */
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 7441e1e6389381a9ae65df5c433433d5cf63a3f2..2b0fbe6929e84e1bcd9cda057c98a0207a573cab 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -81,12 +81,18 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		csum = skb_checksum(skb, 0, skb->len, 0);
 		uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
 					  &ipv6h->daddr, csum);
-
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
 
 		skb->ip_summed = CHECKSUM_NONE;
 
+		/* If there is no outer header we can fake a checksum offload
+		 * due to the fact that we have already done the checksum in
+		 * software prior to segmenting the frame.
+		 */
+		if (!skb->encap_hdr_csum)
+			features |= NETIF_F_HW_CSUM;
+
 		/* Check if there is enough headroom to insert fragment header. */
 		tnl_hlen = skb_tnl_header_len(skb);
 		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {