Skip to content

Commit de4c5ef

Browse files
committed
Florisn Westphal says: ==================== These are netfilter fixes for the *net* tree. First patch resolves a false-positive lockdep splat: rcu_dereference is used outside of rcu read lock. Let lockdep validate that the transaction mutex is locked. Second patch fixes a kdoc warning added in previous PR. Third patch fixes a memory leak: The catchall element isn't disabled correctly, this allows userspace to deactivate the element again. This results in refcount underflow which in turn prevents memory release. This was always broken since the feature was added in 5.13. Patch 4 fixes an incorrect change in the previous pull request: Adding a duplicate key to a set should work if the duplicate key has expired, restore this behaviour. All from myself. Patch #5 resolves an old historic artifact in sctp conntrack: a 300ms timeout for shutdown_ack. Increase this to 3s. From Xin Long. Patch #6 fixes a sysctl data race in ipvs, two threads can clobber the sysctl value, from Sishuai Gong. This is a day-0 bug that predates git history. Patches 7, 8 and 9, from Pablo Neira Ayuso, are also followups for the previous GC rework in nf_tables: The netlink notifier and the netns exit path must both increment the gc worker seqcount, else worker may encounter stale (free'd) pointers. ================ Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents b35c968 + 23185c6 commit de4c5ef

File tree

7 files changed

+69
-31
lines changed

7 files changed

+69
-31
lines changed

Documentation/networking/nf_conntrack-sysctl.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
178178
Default is set to (hb_interval * path_max_retrans + rto_max)
179179

180180
nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
181-
default 0.3
181+
default 3
182182

183183
nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
184-
default 0.3
184+
default 3
185185

186186
nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
187187
default 3

include/net/netfilter/nf_tables.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ struct nft_set_elem_expr {
534534
* @expr: stateful expression
535535
* @ops: set ops
536536
* @flags: set flags
537+
* @dead: set will be freed, never cleared
537538
* @genmask: generation mask
538539
* @klen: key length
539540
* @dlen: data length

net/netfilter/ipvs/ip_vs_ctl.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,6 +1876,7 @@ static int
18761876
proc_do_sync_threshold(struct ctl_table *table, int write,
18771877
void *buffer, size_t *lenp, loff_t *ppos)
18781878
{
1879+
struct netns_ipvs *ipvs = table->extra2;
18791880
int *valp = table->data;
18801881
int val[2];
18811882
int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
18851886
.mode = table->mode,
18861887
};
18871888

1889+
mutex_lock(&ipvs->sync_mutex);
18881890
memcpy(val, valp, sizeof(val));
18891891
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
18901892
if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
18941896
else
18951897
memcpy(valp, val, sizeof(val));
18961898
}
1899+
mutex_unlock(&ipvs->sync_mutex);
18971900
return rc;
18981901
}
18991902

@@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
43214324
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
43224325
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
43234326
tbl[idx].data = &ipvs->sysctl_sync_threshold;
4327+
tbl[idx].extra2 = ipvs;
43244328
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
43254329
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
43264330
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;

net/netfilter/nf_conntrack_proto_sctp.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
4949
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
5050
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
5151
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
52-
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
53-
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
52+
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
53+
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
5454
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
5555
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
5656
};
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
105105
{
106106
/* ORIGINAL */
107107
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
108-
/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
108+
/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
109109
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
110110
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
111111
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},

net/netfilter/nf_tables_api.c

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7091,6 +7091,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
70917091
ret = __nft_set_catchall_flush(ctx, set, &elem);
70927092
if (ret < 0)
70937093
break;
7094+
nft_set_elem_change_active(ctx->net, set, ext);
70947095
}
70957096

70967097
return ret;
@@ -9480,9 +9481,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
94809481
if (!trans)
94819482
return NULL;
94829483

9484+
trans->net = maybe_get_net(net);
9485+
if (!trans->net) {
9486+
kfree(trans);
9487+
return NULL;
9488+
}
9489+
94839490
refcount_inc(&set->refs);
94849491
trans->set = set;
9485-
trans->net = get_net(net);
94869492
trans->seq = gc_seq;
94879493

94889494
return trans;
@@ -9738,6 +9744,22 @@ static void nft_set_commit_update(struct list_head *set_update_list)
97389744
}
97399745
}
97409746

9747+
static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
9748+
{
9749+
unsigned int gc_seq;
9750+
9751+
/* Bump gc counter, it becomes odd, this is the busy mark. */
9752+
gc_seq = READ_ONCE(nft_net->gc_seq);
9753+
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
9754+
9755+
return gc_seq;
9756+
}
9757+
9758+
static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
9759+
{
9760+
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
9761+
}
9762+
97419763
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
97429764
{
97439765
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9823,9 +9845,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
98239845

98249846
WRITE_ONCE(nft_net->base_seq, base_seq);
98259847

9826-
/* Bump gc counter, it becomes odd, this is the busy mark. */
9827-
gc_seq = READ_ONCE(nft_net->gc_seq);
9828-
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
9848+
gc_seq = nft_gc_seq_begin(nft_net);
98299849

98309850
/* step 3. Start new generation, rules_gen_X now in use. */
98319851
net->nft.gencursor = nft_gencursor_next(net);
@@ -10038,7 +10058,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
1003810058
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
1003910059
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
1004010060

10041-
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
10061+
nft_gc_seq_end(nft_net, gc_seq);
1004210062
nf_tables_commit_release(net);
1004310063

1004410064
return 0;
@@ -11039,13 +11059,17 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
1103911059
struct net *net = n->net;
1104011060
unsigned int deleted;
1104111061
bool restart = false;
11062+
unsigned int gc_seq;
1104211063

1104311064
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
1104411065
return NOTIFY_DONE;
1104511066

1104611067
nft_net = nft_pernet(net);
1104711068
deleted = 0;
1104811069
mutex_lock(&nft_net->commit_mutex);
11070+
11071+
gc_seq = nft_gc_seq_begin(nft_net);
11072+
1104911073
if (!list_empty(&nf_tables_destroy_list))
1105011074
rcu_barrier();
1105111075
again:
@@ -11068,6 +11092,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
1106811092
if (restart)
1106911093
goto again;
1107011094
}
11095+
nft_gc_seq_end(nft_net, gc_seq);
11096+
1107111097
mutex_unlock(&nft_net->commit_mutex);
1107211098

1107311099
return NOTIFY_DONE;
@@ -11105,12 +11131,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
1110511131
static void __net_exit nf_tables_exit_net(struct net *net)
1110611132
{
1110711133
struct nftables_pernet *nft_net = nft_pernet(net);
11134+
unsigned int gc_seq;
1110811135

1110911136
mutex_lock(&nft_net->commit_mutex);
11137+
11138+
gc_seq = nft_gc_seq_begin(nft_net);
11139+
1111011140
if (!list_empty(&nft_net->commit_list) ||
1111111141
!list_empty(&nft_net->module_list))
1111211142
__nf_tables_abort(net, NFNL_ABORT_NONE);
11143+
1111311144
__nft_release_tables(net);
11145+
11146+
nft_gc_seq_end(nft_net, gc_seq);
11147+
1111411148
mutex_unlock(&nft_net->commit_mutex);
1111511149
WARN_ON_ONCE(!list_empty(&nft_net->tables));
1111611150
WARN_ON_ONCE(!list_empty(&nft_net->module_list));

net/netfilter/nft_dynset.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
191191
if (IS_ERR(set))
192192
return PTR_ERR(set);
193193

194+
if (set->flags & NFT_SET_OBJECT)
195+
return -EOPNOTSUPP;
196+
194197
if (set->ops->update == NULL)
195198
return -EOPNOTSUPP;
196199

net/netfilter/nft_set_pipapo.c

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
566566
goto out;
567567

568568
if (last) {
569+
if (nft_set_elem_expired(&f->mt[b].e->ext))
570+
goto next_match;
569571
if ((genmask &&
570572
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
571573
goto next_match;
@@ -600,17 +602,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
600602
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
601603
const struct nft_set_elem *elem, unsigned int flags)
602604
{
603-
struct nft_pipapo_elem *ret;
604-
605-
ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
605+
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
606606
nft_genmask_cur(net));
607-
if (IS_ERR(ret))
608-
return ret;
609-
610-
if (nft_set_elem_expired(&ret->ext))
611-
return ERR_PTR(-ENOENT);
612-
613-
return ret;
614607
}
615608

616609
/**
@@ -1549,7 +1542,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
15491542

15501543
/**
15511544
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
1552-
* @set: nftables API set representation
1545+
* @_set: nftables API set representation
15531546
* @m: Matching data
15541547
*/
15551548
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
@@ -1697,6 +1690,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
16971690
priv->clone = new_clone;
16981691
}
16991692

1693+
static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
1694+
{
1695+
#ifdef CONFIG_PROVE_LOCKING
1696+
const struct net *net = read_pnet(&set->net);
1697+
1698+
return lockdep_is_held(&nft_pernet(net)->commit_mutex);
1699+
#else
1700+
return true;
1701+
#endif
1702+
}
1703+
17001704
static void nft_pipapo_abort(const struct nft_set *set)
17011705
{
17021706
struct nft_pipapo *priv = nft_set_priv(set);
@@ -1705,7 +1709,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
17051709
if (!priv->dirty)
17061710
return;
17071711

1708-
m = rcu_dereference(priv->match);
1712+
m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
17091713

17101714
new_clone = pipapo_clone(m);
17111715
if (IS_ERR(new_clone))
@@ -1732,11 +1736,7 @@ static void nft_pipapo_activate(const struct net *net,
17321736
const struct nft_set *set,
17331737
const struct nft_set_elem *elem)
17341738
{
1735-
struct nft_pipapo_elem *e;
1736-
1737-
e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
1738-
if (IS_ERR(e))
1739-
return;
1739+
struct nft_pipapo_elem *e = elem->priv;
17401740

17411741
nft_set_elem_change_active(net, set, &e->ext);
17421742
}
@@ -1950,10 +1950,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
19501950

19511951
data = (const u8 *)nft_set_ext_key(&e->ext);
19521952

1953-
e = pipapo_get(net, set, data, 0);
1954-
if (IS_ERR(e))
1955-
return;
1956-
19571953
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
19581954
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
19591955
const u8 *match_start, *match_end;

0 commit comments

Comments
 (0)