From 252cac065e6f3e9343bbac004517f1b3144c8c0f Mon Sep 17 00:00:00 2001 From: Kajetan Staszkiewicz Date: Thu, 9 Aug 2018 14:42:27 +0200 Subject: [PATCH] Fix locking issues around pf_map_addr --- sys/net/pfvar.h | 8 +++-- sys/netpfil/pf/pf.c | 81 +++++++++++++++++++++++------------------------ sys/netpfil/pf/pf_ioctl.c | 2 ++ sys/netpfil/pf/pf_lb.c | 76 ++++++++++++++++++++++++++++++++------------ 4 files changed, 103 insertions(+), 64 deletions(-) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 4076780caeb..1001a48126d 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -357,20 +357,21 @@ struct pf_poolhashkey { }; struct pf_pool { struct pf_palist list; struct pf_pooladdr *cur; struct pf_poolhashkey key; struct pf_addr counter; int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; + struct mtx lock; }; /* A packed Operating System description for fingerprinting */ typedef u_int32_t pf_osfp_t; #define PF_OSFP_ANY ((pf_osfp_t)0) #define PF_OSFP_UNKNOWN ((pf_osfp_t)-1) #define PF_OSFP_NOMATCH ((pf_osfp_t)-2) struct pf_osfp_entry { @@ -626,20 +627,21 @@ struct pf_threshold { u_int32_t count; u_int32_t last; }; struct pf_src_node { LIST_ENTRY(pf_src_node) entry; struct pf_addr addr; struct pf_addr raddr; union pf_rule_ptr rule; struct pfi_kif *kif; + struct pfi_kif *rkif; u_int64_t bytes[2]; u_int64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_threshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; u_int8_t ruletype; }; @@ -1578,21 +1580,22 @@ pf_release_state(struct pf_state *s) pf_free_state(s); return (1); } else return (0); } extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); extern struct pf_src_node *pf_find_src_node(struct pf_addr *, - struct pf_rule *, sa_family_t, int); + struct pf_rule *, sa_family_t, + struct pf_srchash **); extern void pf_unlink_src_node(struct pf_src_node *); extern u_int pf_free_src_nodes(struct pf_src_node_list *); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t, u_int16_t, u_int16_t, u_int8_t); VNET_DECLARE(struct ifnet *, sync_ifp); @@ -1767,21 +1770,22 @@ void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); void pf_step_into_anchor(struct pf_anchor_stackframe *, int *, struct pf_ruleset **, int, struct pf_rule **, struct pf_rule **, int *); int pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *, struct pf_ruleset **, int, struct pf_rule **, struct pf_rule **, int *); int pf_map_addr(u_int8_t, struct pf_rule *, struct pf_addr *, struct pf_addr *, - struct pf_addr *, struct pf_src_node **); + struct pfi_kif **, struct pf_addr *, + struct pf_src_node **, int); struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, uint16_t, uint16_t, struct pf_anchor_stackframe *); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); struct pf_state_key *pf_state_key_clone(struct pf_state_key *); #endif /* _KERNEL */ diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 8d719bf8c36..cd9236107d9 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -275,21 +275,22 @@ static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); static void pf_print_state_parts(struct pf_state *, struct pf_state_key *, struct pf_state_key *); static int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); static struct pf_state *pf_find_state(struct pfi_kif *, struct pf_state_key_cmp *, u_int); static int pf_src_connlimit(struct pf_state **); static void pf_overload_task(void *v, int pending); static int pf_insert_src_node(struct pf_src_node **, - struct pf_rule *, struct pf_addr *, sa_family_t); + struct pf_rule *, struct pf_addr *, sa_family_t, + struct pf_srchash **); static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); #ifdef INET static void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *, struct inpcb *); #endif /* INET */ #ifdef INET6 @@ -649,99 +650,93 @@ pf_overload_task(void *v, int pending) } SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) free(pfoe, M_PFTEMP); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: %u states killed", __func__, killed); CURVNET_RESTORE(); } /* - * Can return locked on failure, so that we can consistently - * allocate and insert a new one. + * Always returns locked, so that we can perform operations on found node or + * consistently allocate and insert a new one. */ struct pf_src_node * pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af, - int returnlocked) + struct pf_srchash **sh) { - struct pf_srchash *sh; struct pf_src_node *n; counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); - sh = &V_pf_srchash[pf_hashsrc(src, af)]; - PF_HASHROW_LOCK(sh); - LIST_FOREACH(n, &sh->nodes, entry) + *sh = &V_pf_srchash[pf_hashsrc(src, af)]; + PF_HASHROW_LOCK(*sh); + LIST_FOREACH(n, &(*sh)->nodes, entry) if (n->rule.ptr == rule && n->af == af && ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) break; - if (n != NULL) { - n->states++; - PF_HASHROW_UNLOCK(sh); - } else if (returnlocked == 0) - PF_HASHROW_UNLOCK(sh); return (n); } +/* + * Returns locked on success and unlocked on failure. + */ static int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, - struct pf_addr *src, sa_family_t af) + struct pf_addr *src, sa_family_t af, struct pf_srchash **sh) { KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR), ("%s for non-tracking rule %p", __func__, rule)); if (*sn == NULL) - *sn = pf_find_src_node(src, rule, af, 1); + *sn = pf_find_src_node(src, rule, af, sh); if (*sn == NULL) { - struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)]; - - PF_HASHROW_ASSERT(sh); + PF_HASHROW_ASSERT(*sh); if (!rule->max_src_nodes || counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes) (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); else counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1); if ((*sn) == NULL) { - PF_HASHROW_UNLOCK(sh); return (-1); } pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, rule->max_src_conn_rate.seconds); (*sn)->af = af; (*sn)->rule.ptr = rule; PF_ACPY(&(*sn)->addr, src, af); - LIST_INSERT_HEAD(&sh->nodes, *sn, entry); + LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry); (*sn)->creation = time_uptime; (*sn)->ruletype = rule->action; (*sn)->states = 1; if ((*sn)->rule.ptr != NULL) counter_u64_add((*sn)->rule.ptr->src_nodes, 1); - PF_HASHROW_UNLOCK(sh); counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], 1); return (-1); } } + (*sn)->states++; return (0); } void pf_unlink_src_node(struct pf_src_node *src) { PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]); LIST_REMOVE(src, entry); if (src->rule.ptr) @@ -3602,41 +3597,45 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk, struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen) { struct pf_state *s = NULL; struct pf_src_node *sn = NULL; struct tcphdr *th = pd->hdr.tcp; u_int16_t mss = V_tcp_mssdflt; u_short reason; + struct pf_srchash *sn_sh = NULL; + struct pf_srchash *nsn_sh = NULL; /* check maximums */ if (r->max_states && (counter_u64_fetch(r->states_cur) >= r->max_states)) { counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); REASON_SET(&reason, PFRES_MAXSTATES); goto csfailed; } /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { + pf_insert_src_node(&sn, r, pd->src, pd->af, &sn_sh) != 0) { REASON_SET(&reason, PFRES_SRCLIMIT); goto csfailed; } + /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { + pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af, &nsn_sh)) { REASON_SET(&reason, PFRES_SRCLIMIT); goto csfailed; } + s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto csfailed; } s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); if (r->allow_opts) @@ -3694,39 +3693,44 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, #endif s->timeout = PFTM_ICMP_FIRST_PACKET; break; default: s->src.state = PFOTHERS_SINGLE; s->dst.state = PFOTHERS_NO_TRAFFIC; s->timeout = PFTM_OTHER_FIRST_PACKET; } if (r->rt && r->rt != PF_FASTROUTE) { - if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) { + if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, &s->rt_kif, + NULL, &sn, 1)) { REASON_SET(&reason, PFRES_MAPFAILED); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); uma_zfree(V_pf_state_z, s); goto csfailed; } - s->rt_kif = r->rpool.cur->kif; } s->creation = time_uptime; s->expire = time_uptime; - if (sn != NULL) + if (sn != NULL) { s->src_node = sn; + sn->states++; + PF_HASHROW_UNLOCK(sn_sh); + } if (nsn != NULL) { /* XXX We only modify one side for now. */ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); s->nat_src_node = nsn; + nsn->states++; + PF_HASHROW_UNLOCK(nsn_sh); } if (pd->proto == IPPROTO_TCP) { if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); uma_zfree(V_pf_state_z, s); return (PF_DROP); } @@ -3811,46 +3815,40 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, return (PF_PASS); csfailed: if (sk != NULL) uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); if (sn != NULL) { - struct pf_srchash *sh; - - sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; - PF_HASHROW_LOCK(sh); if (--sn->states == 0 && sn->expire == 0) { pf_unlink_src_node(sn); uma_zfree(V_pf_sources_z, sn); counter_u64_add( V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); } - PF_HASHROW_UNLOCK(sh); } + if (sn_sh) + PF_HASHROW_UNLOCK(sn_sh); if (nsn != sn && nsn != NULL) { - struct pf_srchash *sh; - - sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)]; - PF_HASHROW_LOCK(sh); if (--nsn->states == 0 && nsn->expire == 0) { pf_unlink_src_node(nsn); uma_zfree(V_pf_sources_z, nsn); counter_u64_add( V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); } - PF_HASHROW_UNLOCK(sh); } + if (nsn_sh) + PF_HASHROW_UNLOCK(nsn_sh); return (PF_DROP); } static int pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) { struct pf_rule *r, *a = NULL; @@ -5460,20 +5458,21 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, } #ifdef INET static void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *m1; struct sockaddr_in dst; struct ip *ip; + struct pfi_kif *rt_kif = NULL; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; uint16_t ip_len, ip_off; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", __func__)); @@ -5523,25 +5522,24 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ifp = nh4.nh_ifp; dst.sin_addr = nh4.nh_addr; } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, - &naddr, NULL, &sn); + &naddr, &rt_kif, NULL, &sn, 0); if (!PF_AZERO(&naddr, AF_INET)) dst.sin_addr.s_addr = naddr.v4.s_addr; - ifp = r->rpool.cur->kif ? - r->rpool.cur->kif->pfik_ifp : NULL; + ifp = rt_kif ? rt_kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) dst.sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; PF_STATE_UNLOCK(s); } } if (ifp == NULL) goto bad; @@ -5638,20 +5636,21 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, #endif /* INET */ #ifdef INET6 static void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0; struct sockaddr_in6 dst; struct ip6_hdr *ip6; + struct pfi_kif *rt_kif = NULL; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", __func__)); if ((pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || @@ -5693,25 +5692,25 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, return; } if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, - &naddr, NULL, &sn); + &naddr, &rt_kif, NULL, &sn, 0); if (!PF_AZERO(&naddr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); - ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; + ifp = rt_kif ? rt_kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } if (s) PF_STATE_UNLOCK(s); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index babbd6069bb..bfad135ac10 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -413,20 +413,21 @@ pf_free_rule(struct pf_rule *rule) break; case PF_ADDR_TABLE: pfr_detach_table(rule->dst.addr.p.tbl); break; } if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); if (rule->kif) pfi_kif_unref(rule->kif); pf_anchor_remove(rule); + mtx_destroy(&rule->rpool.lock); pf_empty_pool(&rule->rpool.list); counter_u64_free(rule->states_cur); counter_u64_free(rule->states_tot); counter_u64_free(rule->src_nodes); free(rule, M_PFRULE); } static u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { @@ -1165,20 +1166,21 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td } #endif /* INET6 */ rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK); bcopy(&pr->rule, rule, sizeof(struct pf_rule)); if (rule->ifname[0]) kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); rule->src_nodes = counter_u64_alloc(M_WAITOK); + mtx_init(&rule->rpool.lock, "pf_pool", NULL, MTX_DEF); rule->cuid = td->td_ucred->cr_ruid; rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&rule->rpool.list); #define ERROUT(x) { error = (x); goto DIOCADDRULE_error; } PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 9f1b1dfbe26..1dcb50b8fd4 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -40,20 +40,21 @@ __FBSDID("$FreeBSD$"); #include "opt_pf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include +#include #include #include #include #include #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x static void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); @@ -213,21 +214,21 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, static int pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr, uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low, uint16_t high, struct pf_src_node **sn) { struct pf_state_key_cmp key; struct pf_addr init_addr; bzero(&init_addr, sizeof(init_addr)); - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn, 0)) return (1); if (proto == IPPROTO_ICMP) { low = 1; high = 65535; } bzero(&key, sizeof(key)); key.af = af; key.proto = proto; @@ -285,92 +286,112 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, NULL) { *nport = htons(tmp); return (0); } } } switch (r->rpool.opts & PF_POOL_TYPEMASK) { case PF_POOL_RANDOM: case PF_POOL_ROUNDROBIN: - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, + sn, 0)) return (1); break; case PF_POOL_NONE: case PF_POOL_SRCHASH: case PF_POOL_BITMASK: default: return (1); } } while (! PF_AEQ(&init_addr, naddr, af) ); return (1); /* none available */ } int pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) + struct pf_addr *naddr, struct pfi_kif **rt_kif, struct pf_addr *init_addr, + struct pf_src_node **sn, int return_locked) { struct pf_pool *rpool = &r->rpool; struct pf_addr *raddr = NULL, *rmask = NULL; + bool pool_locked = false; + struct pf_srchash *sh = NULL; /* Try to find a src_node if none was given and this is a sticky-address rule. */ if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(saddr, r, af, 0); + *sn = pf_find_src_node(saddr, r, af, &sh); /* If a src_node was found or explicitly given and it has a non-zero route address, use this address. A zeroed address is found if the src node was created just a moment ago in pf_create_state and it needs to be filled in with routing decision calculated here. */ if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { PF_ACPY(naddr, &(*sn)->raddr, af); + if (rt_kif) + *rt_kif = (*sn)->rkif; if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf_map_addr: src tracking maps "); pf_print_host(saddr, 0, af); printf(" to "); pf_print_host(naddr, 0, af); printf("\n"); } + if (!return_locked) + PF_HASHROW_UNLOCK(sh); return (0); } /* Find the route using chosen algorithm. Store the found route in src_node if it was given or found. */ - if (rpool->cur->addr.type == PF_ADDR_NOROUTE) + if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (1); + } if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { switch (af) { #ifdef INET case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) + PF_POOL_ROUNDROBIN) { + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (1); + } raddr = &rpool->cur->addr.p.dyn->pfid_addr4; rmask = &rpool->cur->addr.p.dyn->pfid_mask4; break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) + PF_POOL_ROUNDROBIN) { + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (1); + } raddr = &rpool->cur->addr.p.dyn->pfid_addr6; rmask = &rpool->cur->addr.p.dyn->pfid_mask6; break; #endif /* INET6 */ } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) + if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (1); /* unsupported */ + } } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; } switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_NONE: PF_ACPY(naddr, raddr, af); break; case PF_POOL_BITMASK: @@ -418,41 +439,33 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, case PF_POOL_SRCHASH: { unsigned char hash[16]; pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); break; } case PF_POOL_ROUNDROBIN: { + mtx_lock(&rpool->lock); + pool_locked = true; struct pf_pooladdr *acur = rpool->cur; /* * XXXGL: in the round-robin case we need to store * the round-robin machine state in the rule, thus * forwarding thread needs to modify rule. * - * This is done w/o locking, because performance is assumed - * more important than round-robin precision. - * * In the simpliest case we just update the "rpool->cur" * pointer. However, if pool contains tables or dynamic * addresses, then "tblidx" is also used to store machine - * state. Since "tblidx" is int, concurrent access to it can't - * lead to inconsistence, only to lost of precision. - * - * Things get worse, if table contains not hosts, but - * prefixes. In this case counter also stores machine state, - * and for IPv6 address, counter can't be updated atomically. - * Probably, using round-robin on a table containing IPv6 - * prefixes (or even IPv4) would cause a panic. + * state. */ if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, af)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, af)) goto get_addr; @@ -464,55 +477,76 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); if (rpool->cur->addr.type == PF_ADDR_TABLE) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; + mtx_unlock(&rpool->lock); + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (1); } } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; + mtx_unlock(&rpool->lock); + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (1); } } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; PF_ACPY(&rpool->counter, raddr, af); } get_addr: PF_ACPY(naddr, &rpool->counter, af); if (init_addr != NULL && PF_AZERO(init_addr, af)) PF_ACPY(init_addr, naddr, af); PF_AINC(&rpool->counter, af); + if (rt_kif) + *rt_kif = rpool->cur->kif; break; } } - if (*sn != NULL) + if (*sn != NULL) { PF_ACPY(&(*sn)->raddr, naddr, af); + (*sn)->rkif = rpool->cur->kif; + } if (V_pf_status.debug >= PF_DEBUG_MISC && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { printf("pf_map_addr: selected address "); pf_print_host(naddr, 0, af); + if (rpool->cur->kif) { + printf(" interface %s", rpool->cur->kif->pfik_name); + } + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + printf(" table %s\n", + rpool->cur->addr.p.tbl->pfrkt_name); + } printf("\n"); } + if (pool_locked) + mtx_unlock(&rpool->lock); + if (sh && !return_locked) + PF_HASHROW_UNLOCK(sh); return (0); } struct pf_rule * pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_src_node **sn, struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, uint16_t sport, uint16_t dport, struct pf_anchor_stackframe *anchor_stack) { @@ -635,21 +669,21 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, break; #endif /* INET6 */ } } else PF_POOLMASK(naddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, daddr, pd->af); break; } break; case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn, 0)) goto notrans; if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask, daddr, pd->af); if (r->rpool.proxy_port[1]) { uint32_t tmp_nport; tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) % (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] + -- 2.11.0