Bug 176763 - [pf] [patch] Removing pf Source entries locks kernel.
Summary: [pf] [patch] Removing pf Source entries locks kernel.
Status: Closed FIXED
Alias: None
Product: Base System
Classification: Unclassified
Component: kern (show other bugs)
Version: 9.1-RELEASE
Hardware: Any Any
: Normal Affects Only Me
Assignee: Gleb Smirnoff
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2013-03-08 19:40 UTC by Kajetan Staszkiewicz
Modified: 2014-01-22 10:30 UTC (History)
0 users

See Also:


Attachments
file.diff (3.84 KB, patch)
2013-03-08 19:40 UTC, Kajetan Staszkiewicz
no flags Details | Diff
link-states-to-src-nodes.patch (10.88 KB, patch)
2013-11-18 17:12 UTC, Kajetan Staszkiewicz
no flags Details | Diff

Note You need to log in before you can comment on or make changes to this bug.
Description Kajetan Staszkiewicz 2013-03-08 19:40:01 UTC
The case will happen on a FreeBSD router with large amount of pf State and Source entries. Use pfctl to remove some Sources. For each matching source whole State table is searched element by element.

With hundreds of matching Sources (and hundreds of thousands of States to search through) this can freeze the kernel for a few seconds. Under some conditions (e.g. a DDoS attack hitting the IP address for which the Source entries will be removed), kernel will freeze permanently.

Fix: Create a list of State entries and attach it to Source. This way only this short list must be searched when Source is removed. List is maintained during State creation and removal.

Patch attached with submission follows:
How-To-Repeat: `pfctl -K`
Comment 1 Mark Linimon freebsd_committer freebsd_triage 2013-03-10 04:31:13 UTC
Responsible Changed
From-To: freebsd-bugs->freebsd-pf

Over to maintainer(s).
Comment 2 Kajetan Staszkiewicz 2013-11-18 17:12:36 UTC
The attached patch is for FreeBSD 10. It adds a new parameter "-c" to pfctl 
which when killing src_nodes, also kills states linked to the found nodes.

-- 
| pozdrawiam / greetings | powered by Debian, FreeBSD and CentOS |
|  Kajetan Staszkiewicz  | jabber,email: vegeta()tuxpowered net  |
|        Vegeta          | www: http://vegeta.tuxpowered.net     |
`------------------------^---------------------------------------'
Comment 3 Gleb Smirnoff freebsd_committer 2013-11-22 19:14:57 UTC
State Changed
From-To: open->patched

Fixed in head, thanks! 


Comment 4 Gleb Smirnoff freebsd_committer 2013-11-22 19:14:57 UTC
Responsible Changed
From-To: freebsd-pf->glebius

Fixed in head, thanks!
Comment 5 dfilter service freebsd_committer 2013-11-22 19:22:33 UTC
Author: glebius
Date: Fri Nov 22 19:22:26 2013
New Revision: 258480
URL: http://svnweb.freebsd.org/changeset/base/258480

Log:
  The DIOCKILLSRCNODES operation was implemented with O(m*n) complexity,
  where "m" is number of source nodes and "n" is number of states. Thus,
  on heavy loaded router its processing consumed a lot of CPU time.
  
  Reimplement it with O(m+n) complexity. We first scan through source
  nodes and disconnect matching ones, putting them on the freelist and
  marking with a cookie value in their expire field. Then we scan through
  the states, detecting references to source nodes with a cookie, and
  disconnect them as well. Then the freelist is passed to pf_free_src_nodes().
  
  In collaboration with:	Kajetan Staszkiewicz <kajetan.staszkiewicz innogames.de>
  PR:		kern/176763
  Sponsored by:	InnoGames GmbH
  Sponsored by:	Nginx, Inc.

Modified:
  head/sys/netpfil/pf/pf_ioctl.c

Modified: head/sys/netpfil/pf/pf_ioctl.c
==============================================================================
--- head/sys/netpfil/pf/pf_ioctl.c	Fri Nov 22 19:16:34 2013	(r258479)
+++ head/sys/netpfil/pf/pf_ioctl.c	Fri Nov 22 19:22:26 2013	(r258480)
@@ -155,6 +155,7 @@ struct cdev *pf_dev;
 static void		 pf_clear_states(void);
 static int		 pf_clear_tables(void);
 static void		 pf_clear_srcnodes(struct pf_src_node *);
+static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
 static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
 
 /*
@@ -3143,45 +3144,9 @@ DIOCCHANGEADDR_error:
 		break;
 	}
 
-	case DIOCKILLSRCNODES: {
-		struct pfioc_src_node_kill *psnk =
-		    (struct pfioc_src_node_kill *)addr;
-		struct pf_srchash	*sh;
-		struct pf_src_node	*sn;
-		u_int			i, killed = 0;
-
-		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
-		    i++, sh++) {
-		    /*
-		     * XXXGL: we don't ever acquire sources hash lock
-		     * but if we ever do, the below call to pf_clear_srcnodes()
-		     * would lead to a LOR.
-		     */
-		    PF_HASHROW_LOCK(sh);
-		    LIST_FOREACH(sn, &sh->nodes, entry)
-			if (PF_MATCHA(psnk->psnk_src.neg,
-				&psnk->psnk_src.addr.v.a.addr,
-				&psnk->psnk_src.addr.v.a.mask,
-				&sn->addr, sn->af) &&
-			    PF_MATCHA(psnk->psnk_dst.neg,
-				&psnk->psnk_dst.addr.v.a.addr,
-				&psnk->psnk_dst.addr.v.a.mask,
-				&sn->raddr, sn->af)) {
-				/* Handle state to src_node linkage */
-				if (sn->states != 0)
-					pf_clear_srcnodes(sn);
-				sn->expire = 1;
-				killed++;
-			}
-		    PF_HASHROW_UNLOCK(sh);
-		}
-
-		if (killed > 0)
-			pf_purge_expired_src_nodes();
-
-		psnk->psnk_killed = killed;
+	case DIOCKILLSRCNODES:
+		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
 		break;
-	}
 
 	case DIOCSETHOSTID: {
 		u_int32_t	*hostid = (u_int32_t *)addr;
@@ -3400,6 +3365,59 @@ pf_clear_srcnodes(struct pf_src_node *n)
 		n->states = 0;
 	}
 }
+
+static void
+pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
+{
+	struct pf_src_node_list	 kill;
+
+	LIST_INIT(&kill);
+	for (int i = 0; i <= V_pf_srchashmask; i++) {
+		struct pf_srchash *sh = &V_pf_srchash[i];
+		struct pf_src_node *sn, *tmp;
+
+		PF_HASHROW_LOCK(sh);
+		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
+			if (PF_MATCHA(psnk->psnk_src.neg,
+			      &psnk->psnk_src.addr.v.a.addr,
+			      &psnk->psnk_src.addr.v.a.mask,
+			      &sn->addr, sn->af) &&
+			    PF_MATCHA(psnk->psnk_dst.neg,
+			      &psnk->psnk_dst.addr.v.a.addr,
+			      &psnk->psnk_dst.addr.v.a.mask,
+			      &sn->raddr, sn->af)) {
+				pf_unlink_src_node_locked(sn);
+				LIST_INSERT_HEAD(&kill, sn, entry);
+				sn->expire = 1;
+			}
+		PF_HASHROW_UNLOCK(sh);
+	}
+
+	for (int i = 0; i <= V_pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+		struct pf_state *s;
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (s->src_node && s->src_node->expire == 1) {
+#ifdef INVARIANTS
+				s->src_node->states--;
+#endif
+				s->src_node = NULL;
+			}
+			if (s->nat_src_node && s->nat_src_node->expire == 1) {
+#ifdef INVARIANTS
+				s->nat_src_node->states--;
+#endif
+				s->nat_src_node = NULL;
+			}
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	psnk->psnk_killed = pf_free_src_nodes(&kill);
+}
+
 /*
  * XXX - Check for version missmatch!!!
  */
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 6 Gleb Smirnoff freebsd_committer 2014-01-22 10:29:18 UTC
State Changed
From-To: patched->closed

Merged to 10.0-STABLE.
Comment 7 dfilter service freebsd_committer 2014-01-22 10:29:24 UTC
Author: glebius
Date: Wed Jan 22 10:29:15 2014
New Revision: 261019
URL: http://svnweb.freebsd.org/changeset/base/261019

Log:
  Merge r258478, r258479, r258480, r259719: fixes related to mass source
  nodes removal.
  
  PR:		176763

Modified:
  stable/10/sys/net/pfvar.h
  stable/10/sys/netpfil/pf/pf.c
  stable/10/sys/netpfil/pf/pf_ioctl.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/net/pfvar.h
==============================================================================
--- stable/10/sys/net/pfvar.h	Wed Jan 22 10:18:25 2014	(r261018)
+++ stable/10/sys/net/pfvar.h	Wed Jan 22 10:29:15 2014	(r261019)
@@ -1643,8 +1643,9 @@ struct pf_ifspeed {
 #define	DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
 
 #ifdef _KERNEL
+LIST_HEAD(pf_src_node_list, pf_src_node);
 struct pf_srchash {
-	LIST_HEAD(, pf_src_node)	nodes;
+	struct pf_src_node_list		nodes;
 	struct mtx			lock;
 };
 
@@ -1750,8 +1751,11 @@ pf_release_state(struct pf_state *s)
 extern struct pf_state		*pf_find_state_byid(uint64_t, uint32_t);
 extern struct pf_state		*pf_find_state_all(struct pf_state_key_cmp *,
 				    u_int, int *);
-struct pf_src_node		*pf_find_src_node(struct pf_addr *, struct pf_rule *,
-				    sa_family_t, int);
+extern struct pf_src_node	*pf_find_src_node(struct pf_addr *,
+				    struct pf_rule *, sa_family_t, int);
+extern void			 pf_unlink_src_node(struct pf_src_node *);
+extern void			 pf_unlink_src_node_locked(struct pf_src_node *);
+extern u_int			 pf_free_src_nodes(struct pf_src_node_list *);
 extern void			 pf_print_state(struct pf_state *);
 extern void			 pf_print_flags(u_int8_t);
 extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,

Modified: stable/10/sys/netpfil/pf/pf.c
==============================================================================
--- stable/10/sys/netpfil/pf/pf.c	Wed Jan 22 10:18:25 2014	(r261018)
+++ stable/10/sys/netpfil/pf/pf.c	Wed Jan 22 10:29:15 2014	(r261019)
@@ -673,20 +673,53 @@ pf_insert_src_node(struct pf_src_node **
 	return (0);
 }
 
-static void
-pf_remove_src_node(struct pf_src_node *src)
+void
+pf_unlink_src_node_locked(struct pf_src_node *src)
 {
+#ifdef INVARIANTS
 	struct pf_srchash *sh;
 
 	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
-	PF_HASHROW_LOCK(sh);
+	PF_HASHROW_ASSERT(sh);
+#endif
 	LIST_REMOVE(src, entry);
-	PF_HASHROW_UNLOCK(sh);
-
+	if (src->rule.ptr)
+		src->rule.ptr->src_nodes--;
 	V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 	V_pf_status.src_nodes--;
+}
 
-	uma_zfree(V_pf_sources_z, src);
+void
+pf_unlink_src_node(struct pf_src_node *src)
+{
+	struct pf_srchash *sh;
+
+	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
+	PF_HASHROW_LOCK(sh);
+	pf_unlink_src_node_locked(src);
+	PF_HASHROW_UNLOCK(sh);
+}
+
+static void
+pf_free_src_node(struct pf_src_node *sn)
+{
+
+	KASSERT(sn->states == 0, ("%s: %p has refs", __func__, sn));
+	uma_zfree(V_pf_sources_z, sn);
+}
+
+u_int
+pf_free_src_nodes(struct pf_src_node_list *head)
+{
+	struct pf_src_node *sn, *tmp;
+	u_int count = 0;
+
+	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
+		pf_free_src_node(sn);
+		count++;
+	}
+
+	return (count);
 }
 
 /* Data storage structures initialization. */
@@ -1456,24 +1489,24 @@ pf_state_expires(const struct pf_state *
 void
 pf_purge_expired_src_nodes()
 {
+	struct pf_src_node_list	 freelist;
 	struct pf_srchash	*sh;
 	struct pf_src_node	*cur, *next;
 	int i;
 
+	LIST_INIT(&freelist);
 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
 	    PF_HASHROW_LOCK(sh);
 	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
 		if (cur->states == 0 && cur->expire <= time_uptime) {
-			if (cur->rule.ptr != NULL)
-				cur->rule.ptr->src_nodes--;
-			LIST_REMOVE(cur, entry);
-			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
-			V_pf_status.src_nodes--;
-			uma_zfree(V_pf_sources_z, cur);
+			pf_unlink_src_node_locked(cur);
+			LIST_INSERT_HEAD(&freelist, cur, entry);
 		} else if (cur->rule.ptr != NULL)
 			cur->rule.ptr->rule_flag |= PFRULE_REFS;
 	    PF_HASHROW_UNLOCK(sh);
 	}
+
+	pf_free_src_nodes(&freelist);
 }
 
 static void
@@ -3609,11 +3642,15 @@ csfailed:
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 
-	if (sn != NULL && sn->states == 0 && sn->expire == 0)
-		pf_remove_src_node(sn);
+	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
+		pf_unlink_src_node(sn);
+		pf_free_src_node(sn);
+	}
 
-	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0)
-		pf_remove_src_node(nsn);
+	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
+		pf_unlink_src_node(nsn);
+		pf_free_src_node(nsn);
+	}
 
 	return (PF_DROP);
 }

Modified: stable/10/sys/netpfil/pf/pf_ioctl.c
==============================================================================
--- stable/10/sys/netpfil/pf/pf_ioctl.c	Wed Jan 22 10:18:25 2014	(r261018)
+++ stable/10/sys/netpfil/pf/pf_ioctl.c	Wed Jan 22 10:29:15 2014	(r261019)
@@ -151,6 +151,7 @@ struct cdev *pf_dev;
 static void		 pf_clear_states(void);
 static int		 pf_clear_tables(void);
 static void		 pf_clear_srcnodes(struct pf_src_node *);
+static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
 static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
 
 /*
@@ -3139,45 +3140,9 @@ DIOCCHANGEADDR_error:
 		break;
 	}
 
-	case DIOCKILLSRCNODES: {
-		struct pfioc_src_node_kill *psnk =
-		    (struct pfioc_src_node_kill *)addr;
-		struct pf_srchash	*sh;
-		struct pf_src_node	*sn;
-		u_int			i, killed = 0;
-
-		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
-		    i++, sh++) {
-		    /*
-		     * XXXGL: we don't ever acquire sources hash lock
-		     * but if we ever do, the below call to pf_clear_srcnodes()
-		     * would lead to a LOR.
-		     */
-		    PF_HASHROW_LOCK(sh);
-		    LIST_FOREACH(sn, &sh->nodes, entry)
-			if (PF_MATCHA(psnk->psnk_src.neg,
-				&psnk->psnk_src.addr.v.a.addr,
-				&psnk->psnk_src.addr.v.a.mask,
-				&sn->addr, sn->af) &&
-			    PF_MATCHA(psnk->psnk_dst.neg,
-				&psnk->psnk_dst.addr.v.a.addr,
-				&psnk->psnk_dst.addr.v.a.mask,
-				&sn->raddr, sn->af)) {
-				/* Handle state to src_node linkage */
-				if (sn->states != 0)
-					pf_clear_srcnodes(sn);
-				sn->expire = 1;
-				killed++;
-			}
-		    PF_HASHROW_UNLOCK(sh);
-		}
-
-		if (killed > 0)
-			pf_purge_expired_src_nodes();
-
-		psnk->psnk_killed = killed;
+	case DIOCKILLSRCNODES:
+		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
 		break;
-	}
 
 	case DIOCSETHOSTID: {
 		u_int32_t	*hostid = (u_int32_t *)addr;
@@ -3396,6 +3361,59 @@ pf_clear_srcnodes(struct pf_src_node *n)
 		n->states = 0;
 	}
 }
+
+static void
+pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
+{
+	struct pf_src_node_list	 kill;
+
+	LIST_INIT(&kill);
+	for (int i = 0; i <= V_pf_srchashmask; i++) {
+		struct pf_srchash *sh = &V_pf_srchash[i];
+		struct pf_src_node *sn, *tmp;
+
+		PF_HASHROW_LOCK(sh);
+		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
+			if (PF_MATCHA(psnk->psnk_src.neg,
+			      &psnk->psnk_src.addr.v.a.addr,
+			      &psnk->psnk_src.addr.v.a.mask,
+			      &sn->addr, sn->af) &&
+			    PF_MATCHA(psnk->psnk_dst.neg,
+			      &psnk->psnk_dst.addr.v.a.addr,
+			      &psnk->psnk_dst.addr.v.a.mask,
+			      &sn->raddr, sn->af)) {
+				pf_unlink_src_node_locked(sn);
+				LIST_INSERT_HEAD(&kill, sn, entry);
+				sn->expire = 1;
+			}
+		PF_HASHROW_UNLOCK(sh);
+	}
+
+	for (int i = 0; i <= V_pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+		struct pf_state *s;
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (s->src_node && s->src_node->expire == 1) {
+#ifdef INVARIANTS
+				s->src_node->states--;
+#endif
+				s->src_node = NULL;
+			}
+			if (s->nat_src_node && s->nat_src_node->expire == 1) {
+#ifdef INVARIANTS
+				s->nat_src_node->states--;
+#endif
+				s->nat_src_node = NULL;
+			}
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	psnk->psnk_killed = pf_free_src_nodes(&kill);
+}
+
 /*
  * XXX - Check for version missmatch!!!
  */
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"