Linux 4.0.1

fs: take i_mutex during prepare_binprm for set[ug]id executables
commit 8b01fc86b9 upstream. This prevents a race between chown() and execve(), where chowning a setuid-user binary to root would momentarily make the binary setuid root. This patch was mostly written by Linus Torvalds. Signed-off-by: Jann Horn <jann@thejh.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2015-04-29 10:22:30 +02:00 · 2015-04-29 10:22:18 +02:00 · 2015-04-29 10:22:18 +02:00 · 2015-04-29 10:22:18 +02:00 · 2015-04-29 10:22:18 +02:00 · 2015-04-29 10:22:17 +02:00
9 changed files with 128 additions and 137 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 0
-SUBLEVEL = 0
+SUBLEVEL = 1
 EXTRAVERSION =
 NAME = Hurr durr I'ma sheep

--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@ -531,20 +531,8 @@ struct bnx2x_fastpath {
 	struct napi_struct	napi;

 #ifdef CONFIG_NET_RX_BUSY_POLL
-	unsigned int state;
-#define BNX2X_FP_STATE_IDLE		      0
-#define BNX2X_FP_STATE_NAPI		(1 << 0)    /* NAPI owns this FP */
-#define BNX2X_FP_STATE_POLL		(1 << 1)    /* poll owns this FP */
-#define BNX2X_FP_STATE_DISABLED		(1 << 2)
-#define BNX2X_FP_STATE_NAPI_YIELD	(1 << 3)    /* NAPI yielded this FP */
-#define BNX2X_FP_STATE_POLL_YIELD	(1 << 4)    /* poll yielded this FP */
-#define BNX2X_FP_OWNED	(BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL)
-#define BNX2X_FP_YIELD	(BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD)
-#define BNX2X_FP_LOCKED	(BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED)
-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
-	/* protect state */
-	spinlock_t lock;
-#endif /* CONFIG_NET_RX_BUSY_POLL */
+	unsigned long		busy_poll_state;
+#endif

 	union host_hc_status_block	status_blk;
 	/* chip independent shortcuts into sb structure */
@ -619,104 +607,83 @@ struct bnx2x_fastpath {
 #define bnx2x_fp_qstats(bp, fp)	(&((bp)->fp_stats[(fp)->index].eth_q_stats))

 #ifdef CONFIG_NET_RX_BUSY_POLL
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+
+enum bnx2x_fp_state {
+	BNX2X_STATE_FP_NAPI	= BIT(0), /* NAPI handler owns the queue */
+
+	BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
+	BNX2X_STATE_FP_NAPI_REQ = BIT(1),
+
+	BNX2X_STATE_FP_POLL_BIT = 2,
+	BNX2X_STATE_FP_POLL     = BIT(2), /* busy_poll owns the queue */
+
+	BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
+};
+
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
 {
-	spin_lock_init(&fp->lock);
-	fp->state = BNX2X_FP_STATE_IDLE;
+	WRITE_ONCE(fp->busy_poll_state, 0);
 }

 /* called from the device poll routine to get ownership of a FP */
 static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
 {
-	bool rc = true;
+	unsigned long prev, old = READ_ONCE(fp->busy_poll_state);

-	spin_lock_bh(&fp->lock);
-	if (fp->state & BNX2X_FP_LOCKED) {
-		WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-		fp->state |= BNX2X_FP_STATE_NAPI_YIELD;
-		rc = false;
-	} else {
-		/* we don't care if someone yielded */
-		fp->state = BNX2X_FP_STATE_NAPI;
+	while (1) {
+		switch (old) {
+		case BNX2X_STATE_FP_POLL:
+			/* make sure bnx2x_fp_lock_poll() wont starve us */
+			set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
+				&fp->busy_poll_state);
+			/* fallthrough */
+		case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
+			return false;
+		default:
+			break;
+		}
+		prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
+		if (unlikely(prev != old)) {
+			old = prev;
+			continue;
+		}
+		return true;
 	}
-	spin_unlock_bh(&fp->lock);
-	return rc;
 }

-/* returns true is someone tried to get the FP while napi had it */
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
 {
-	bool rc = false;
-
-	spin_lock_bh(&fp->lock);
-	WARN_ON(fp->state &
-		(BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD));
-
-	if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
-		rc = true;
-
-	/* state ==> idle, unless currently disabled */
-	fp->state &= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	smp_wmb();
+	fp->busy_poll_state = 0;
 }

 /* called from bnx2x_low_latency_poll() */
 static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
 {
-	bool rc = true;
-
-	spin_lock_bh(&fp->lock);
-	if ((fp->state & BNX2X_FP_LOCKED)) {
-		fp->state |= BNX2X_FP_STATE_POLL_YIELD;
-		rc = false;
-	} else {
-		/* preserve yield marks */
-		fp->state |= BNX2X_FP_STATE_POLL;
-	}
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
 }

-/* returns true if someone tried to get the FP while it was locked */
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
 {
-	bool rc = false;
-
-	spin_lock_bh(&fp->lock);
-	WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-
-	if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
-		rc = true;
-
-	/* state ==> idle, unless currently disabled */
-	fp->state &= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-	return rc;
+	smp_mb__before_atomic();
+	clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
 }

-/* true if a socket is polling, even if it did not get the lock */
+/* true if a socket is polling */
 static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
 {
-	WARN_ON(!(fp->state & BNX2X_FP_OWNED));
-	return fp->state & BNX2X_FP_USER_PEND;
+	return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
 }

 /* false if fp is currently owned */
 static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
 {
-	int rc = true;
+	set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
+	return !bnx2x_fp_ll_polling(fp);

-	spin_lock_bh(&fp->lock);
-	if (fp->state & BNX2X_FP_OWNED)
-		rc = false;
-	fp->state |= BNX2X_FP_STATE_DISABLED;
-	spin_unlock_bh(&fp->lock);
-
-	return rc;
 }
 #else
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
 {
 }

@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
 	return true;
 }

-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
 {
-	return false;
 }

 static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
 	return false;
 }

-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
 {
-	return false;
 }

 static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp)
 	int i;

 	for_each_rx_queue_cnic(bp, i) {
-		bnx2x_fp_init_lock(&bp->fp[i]);
+		bnx2x_fp_busy_poll_init(&bp->fp[i]);
 		napi_enable(&bnx2x_fp(bp, i, napi));
 	}
 }
@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp)
 	int i;

 	for_each_eth_queue(bp, i) {
-		bnx2x_fp_init_lock(&bp->fp[i]);
+		bnx2x_fp_busy_poll_init(&bp->fp[i]);
 		napi_enable(&bnx2x_fp(bp, i, napi));
 	}
 }
@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 			}
 		}

+		bnx2x_fp_unlock_napi(fp);
+
 		/* Fall out from the NAPI loop if needed */
-		if (!bnx2x_fp_unlock_napi(fp) &&
-		    !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+		if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {

 			/* No need to update SB for FCoE L2 ring as long as
 			 * it's connected to the default SB and the SB
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@ -1713,12 +1713,6 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
 		}
 	}

-	skb = iptunnel_handle_offloads(skb, udp_sum, type);
-	if (IS_ERR(skb)) {
-		err = -EINVAL;
-		goto err;
-	}
-
 	skb_scrub_packet(skb, xnet);

 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
@ -1738,6 +1732,12 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
 		goto err;
 	}

+	skb = iptunnel_handle_offloads(skb, udp_sum, type);
+	if (IS_ERR(skb)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = md->vni;
@ -1798,10 +1798,6 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
 		}
 	}

-	skb = iptunnel_handle_offloads(skb, udp_sum, type);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ VXLAN_HLEN + sizeof(struct iphdr)
 			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@ -1817,6 +1813,10 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
 	if (WARN_ON(!skb))
 		return -ENOMEM;

+	skb = iptunnel_handle_offloads(skb, udp_sum, type);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = md->vni;
--- a/fs/exec.c
+++ b/fs/exec.c
@ -1265,6 +1265,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 	spin_unlock(&p->fs->lock);
 }

+static void bprm_fill_uid(struct linux_binprm *bprm)
+{
+	struct inode *inode;
+	unsigned int mode;
+	kuid_t uid;
+	kgid_t gid;
+
+	/* clear any previous set[ug]id data from a previous binary */
+	bprm->cred->euid = current_euid();
+	bprm->cred->egid = current_egid();
+
+	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+		return;
+
+	if (task_no_new_privs(current))
+		return;
+
+	inode = file_inode(bprm->file);
+	mode = READ_ONCE(inode->i_mode);
+	if (!(mode & (S_ISUID|S_ISGID)))
+		return;
+
+	/* Be careful if suid/sgid is set */
+	mutex_lock(&inode->i_mutex);
+
+	/* reload atomically mode/uid/gid now that lock held */
+	mode = inode->i_mode;
+	uid = inode->i_uid;
+	gid = inode->i_gid;
+	mutex_unlock(&inode->i_mutex);
+
+	/* We ignore suid/sgid if there are no mappings for them in the ns */
+	if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
+		 !kgid_has_mapping(bprm->cred->user_ns, gid))
+		return;
+
+	if (mode & S_ISUID) {
+		bprm->per_clear |= PER_CLEAR_ON_SETID;
+		bprm->cred->euid = uid;
+	}
+
+	if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+		bprm->per_clear |= PER_CLEAR_ON_SETID;
+		bprm->cred->egid = gid;
+	}
+}
+
 /*
 * Fill the binprm structure from the inode.
 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@ -1273,36 +1320,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 */
 int prepare_binprm(struct linux_binprm *bprm)
 {
-	struct inode *inode = file_inode(bprm->file);
-	umode_t mode = inode->i_mode;
 	int retval;

-
-	/* clear any previous set[ug]id data from a previous binary */
-	bprm->cred->euid = current_euid();
-	bprm->cred->egid = current_egid();
-
-	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !task_no_new_privs(current) &&
-	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
-	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
-		/* Set-uid? */
-		if (mode & S_ISUID) {
-			bprm->per_clear |= PER_CLEAR_ON_SETID;
-			bprm->cred->euid = inode->i_uid;
-		}
-
-		/* Set-gid? */
-		/*
-		 * If setgid is set but no group execute bit then this
-		 * is a candidate for mandatory locking, not a setgid
-		 * executable.
-		 */
-		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-			bprm->per_clear |= PER_CLEAR_ON_SETID;
-			bprm->cred->egid = inode->i_gid;
-		}
-	}
+	bprm_fill_uid(bprm);

 	/* fill in binprm security blob */
 	retval = security_bprm_set_creds(bprm);
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@ -1380,7 +1380,8 @@ peek_stack:
 			/* tell verifier to check for equivalent states
 			 * after every call and jump
 			 */
-			env->explored_states[t + 1] = STATE_LIST_MARK;
+			if (t + 1 < insn_cnt)
+				env->explored_states[t + 1] = STATE_LIST_MARK;
 		} else {
 			/* conditional jump with two edges */
 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@ -4169,19 +4169,21 @@ EXPORT_SYMBOL(skb_try_coalesce);
 */
 void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 {
-	if (xnet)
-		skb_orphan(skb);
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
-	skb->mark = 0;
 	skb_sender_cpu_clear(skb);
-	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
+
+	if (!xnet)
+		return;
+
+	skb_orphan(skb);
+	skb->mark = 0;
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);

--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 	int min_headroom;
 	int err;

-	skb = udp_tunnel_handle_offloads(skb, csum);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
 			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@ -131,6 +127,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 	if (unlikely(!skb))
 		return -ENOMEM;

+	skb = udp_tunnel_handle_offloads(skb, csum);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
 	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
 	geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@ -2929,6 +2929,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	}
 #endif

+	/* Do not fool tcpdump (if any), clean our debris */
+	skb->tstamp.tv64 = 0;
 	return skb;
 }
 EXPORT_SYMBOL(tcp_make_synack);
Author	SHA1	Message	Date
Greg Kroah-Hartman	1b0ebf2964	Linux 4.0.1	2015-04-29 10:22:30 +02:00
Jann Horn	bdb29adaff	fs: take i_mutex during prepare_binprm for set[ug]id executables commit `8b01fc86b9` upstream. This prevents a race between chown() and execve(), where chowning a setuid-user binary to root would momentarily make the binary setuid root. This patch was mostly written by Linus Torvalds. Signed-off-by: Jann Horn <jann@thejh.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:18 +02:00
Herbert Xu	d968c92507	skbuff: Do not scrub skb mark within the same name space [ Upstream commit `213dd74aee` ] On Wed, Apr 15, 2015 at 05:41:26PM +0200, Nicolas Dichtel wrote: > Le 15/04/2015 15:57, Herbert Xu a écrit : > >On Wed, Apr 15, 2015 at 06:22:29PM +0800, Herbert Xu wrote: > [snip] > >Subject: skbuff: Do not scrub skb mark within the same name space > > > >The commit `ea23192e8e` ("tunnels: > Maybe add a Fixes tag? > Fixes: `ea23192e8e` ("tunnels: harmonize cleanup done on skb on rx path") > > >harmonize cleanup done on skb on rx path") broke anyone trying to > >use netfilter marking across IPv4 tunnels. While most of the > >fields that are cleared by skb_scrub_packet don't matter, the > >netfilter mark must be preserved. > > > >This patch rearranges skb_scurb_packet to preserve the mark field. > nit: s/scurb/scrub > > Else it's fine for me. Sure. PS I used the wrong email for James the first time around. So let me repeat the question here. Should secmark be preserved or cleared across tunnels within the same name space? In fact, do our security models even support name spaces? ---8<--- The commit `ea23192e8e` ("tunnels: harmonize cleanup done on skb on rx path") broke anyone trying to use netfilter marking across IPv4 tunnels. While most of the fields that are cleared by skb_scrub_packet don't matter, the netfilter mark must be preserved. This patch rearranges skb_scrub_packet to preserve the mark field. Fixes: `ea23192e8e` ("tunnels: harmonize cleanup done on skb on rx path") Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Acked-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:18 +02:00
Herbert Xu	d44d126382	Revert "net: Reset secmark when scrubbing packet" [ Upstream commit `4c0ee414e8` ] This patch reverts commit `b8fb4e0648` because the secmark must be preserved even when a packet crosses namespace boundaries. The reason is that security labels apply to the system as a whole and is not per-namespace. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:18 +02:00
Alexei Starovoitov	32a08be658	bpf: fix verifier memory corruption [ Upstream commit `c3de6317d7` ] Due to missing bounds check the DAG pass of the BPF verifier can corrupt the memory which can cause random crashes during program loading: [8.449451] BUG: unable to handle kernel paging request at ffffffffffffffff [8.451293] IP: [<ffffffff811de33d>] kmem_cache_alloc_trace+0x8d/0x2f0 [8.452329] Oops: 0000 [#1] SMP [8.452329] Call Trace: [8.452329] [<ffffffff8116cc82>] bpf_check+0x852/0x2000 [8.452329] [<ffffffff8116b7e4>] bpf_prog_load+0x1e4/0x310 [8.452329] [<ffffffff811b190f>] ? might_fault+0x5f/0xb0 [8.452329] [<ffffffff8116c206>] SyS_bpf+0x806/0xa30 Fixes: `f1bca824da` ("bpf: add search pruning optimization to verifier") Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:18 +02:00
Eric Dumazet	db35b1ca6a	bnx2x: Fix busy_poll vs netpoll [ Upstream commit `074975d037` ] Commit `9a2620c877` ("bnx2x: prevent WARN during driver unload") switched the napi/busy_lock locking mechanism from spin_lock() into spin_lock_bh(), breaking inter-operability with netconsole, as netpoll disables interrupts prior to calling our napi mechanism. This switches the driver into using atomic assignments instead of the spinlock mechanisms previously employed. Based on initial patch from Yuval Mintz & Ariel Elior I basically added softirq starvation avoidance, and mixture of atomic operations, plain writes and barriers. Note this slightly reduces the overhead for this driver when no busy_poll sockets are in use. Fixes: `9a2620c877` ("bnx2x: prevent WARN during driver unload") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:17 +02:00
Eric Dumazet	8e7e388769	tcp: tcp_make_synack() should clear skb->tstamp [ Upstream commit `b50edd7812` ] I noticed tcpdump was giving funky timestamps for locally generated SYNACK messages on loopback interface. 11:42:46.938990 IP 127.0.0.1.48245 > 127.0.0.2.23850: S 945476042:945476042(0) win 43690 <mss 65495,nop,nop,sackOK,nop,wscale 7> 20:28:58.502209 IP 127.0.0.2.23850 > 127.0.0.1.48245: S 3160535375:3160535375(0) ack 945476043 win 43690 <mss 65495,nop,nop,sackOK,nop,wscale 7> This is because we need to clear skb->tstamp before entering lower stack, otherwise net_timestamp_check() does not set skb->tstamp. Fixes: `7faee5c0d5` ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:17 +02:00
Jesse Gross	6906cf7373	udptunnels: Call handle_offloads after inserting vlan tag. [ Upstream commit `b736a623bd` ] handle_offloads() calls skb_reset_inner_headers() to store the layer pointers to the encapsulated packet. However, we currently push the vlag tag (if there is one) onto the packet afterwards. This changes the MAC header for the encapsulated packet but it is not reflected in skb->inner_mac_header, which breaks GSO and drivers which attempt to use this for encapsulation offloads. Fixes: `1eaa8178` ("vxlan: Add tx-vlan offload support.") Signed-off-by: Jesse Gross <jesse@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-04-29 10:22:17 +02:00