Skip to content

Commit 03428ca

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: conntrack: rework offload nf_conn timeout extension logic
Offload nf_conn entries may not see traffic for a very long time. To prevent incorrect 'ct is stale' checks during nf_conntrack table lookup, the gc worker extends the timeout nf_conn entries marked for offload to a large value. The existing logic suffers from a few problems. Garbage collection runs without locks, its unlikely but possible that @ct is removed right after the 'offload' bit test. In that case, the timeout of a new/reallocated nf_conn entry will be increased. Prevent this by obtaining a reference count on the ct object and re-check of the confirmed and offload bits. If those are not set, the ct is being removed, skip the timeout extension in this case. Parallel teardown is also problematic: cpu1 cpu2 gc_worker calls flow_offload_teardown() tests OFFLOAD bit, set clear OFFLOAD bit ct->timeout is repaired (e.g. set to timeout[UDP_CT_REPLIED]) nf_ct_offload_timeout() called expire value is fetched <INTERRUPT> -> NF_CT_DAY timeout for flow that isn't offloaded (and might not see any further packets). Use cmpxchg: if ct->timeout was repaired after the 2nd 'offload bit' test passed, then ct->timeout will only be updated of ct->timeout was not altered in between. As we already have a gc worker for flowtable entries, ct->timeout repair can be handled from the flowtable gc worker. This avoids having flowtable specific logic in the conntrack core and avoids checking entries that were never offloaded. This allows to remove the nf_ct_offload_timeout helper. Its safe to use in the add case, but not on teardown. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent 3176859 commit 03428ca

File tree

3 files changed

+103
-18
lines changed

3 files changed

+103
-18
lines changed

include/net/netfilter/nf_conntrack.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -312,16 +312,6 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
312312

313313
#define NF_CT_DAY (86400 * HZ)
314314

315-
/* Set an arbitrary timeout large enough not to ever expire, this save
316-
* us a check for the IPS_OFFLOAD_BIT from the packet path via
317-
* nf_ct_is_expired().
318-
*/
319-
static inline void nf_ct_offload_timeout(struct nf_conn *ct)
320-
{
321-
if (nf_ct_expires(ct) < NF_CT_DAY / 2)
322-
WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
323-
}
324-
325315
struct kernel_param;
326316

327317
int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);

net/netfilter/nf_conntrack_core.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,12 +1544,6 @@ static void gc_worker(struct work_struct *work)
15441544

15451545
tmp = nf_ct_tuplehash_to_ctrack(h);
15461546

1547-
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
1548-
nf_ct_offload_timeout(tmp);
1549-
if (!nf_conntrack_max95)
1550-
continue;
1551-
}
1552-
15531547
if (expired_count > GC_SCAN_EXPIRED_MAX) {
15541548
rcu_read_unlock();
15551549

net/netfilter/nf_flow_table_core.c

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
304304
return err;
305305
}
306306

307-
nf_ct_offload_timeout(flow->ct);
307+
nf_ct_refresh(flow->ct, NF_CT_DAY);
308308

309309
if (nf_flowtable_hw_offload(flow_table)) {
310310
__set_bit(NF_FLOW_HW, &flow->flags);
@@ -424,15 +424,116 @@ static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
424424
return flow_table->type->gc && flow_table->type->gc(flow);
425425
}
426426

427+
/**
428+
* nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry
429+
* @ct: Flowtable offloaded tcp ct
430+
*
431+
* Return: number of seconds when ct entry should expire.
432+
*/
433+
static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct)
434+
{
435+
u8 state = READ_ONCE(ct->proto.tcp.state);
436+
437+
switch (state) {
438+
case TCP_CONNTRACK_SYN_SENT:
439+
case TCP_CONNTRACK_SYN_RECV:
440+
return 0;
441+
case TCP_CONNTRACK_ESTABLISHED:
442+
return NF_CT_DAY;
443+
case TCP_CONNTRACK_FIN_WAIT:
444+
case TCP_CONNTRACK_CLOSE_WAIT:
445+
case TCP_CONNTRACK_LAST_ACK:
446+
case TCP_CONNTRACK_TIME_WAIT:
447+
return 5 * 60 * HZ;
448+
case TCP_CONNTRACK_CLOSE:
449+
return 0;
450+
}
451+
452+
return 0;
453+
}
454+
455+
/**
456+
* nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry
457+
* @ct: Flowtable offloaded ct
458+
*
459+
* Datapath lookups in the conntrack table will evict nf_conn entries
460+
* if they have expired.
461+
*
462+
* Once nf_conn entries have been offloaded, nf_conntrack might not see any
463+
* packets anymore. Thus ct->timeout is no longer refreshed and ct can
464+
* be evicted.
465+
*
466+
* To avoid the need for an additional check on the offload bit for every
467+
* packet processed via nf_conntrack_in(), set an arbitrary timeout large
468+
* enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT
469+
* from the packet path via nf_ct_is_expired().
470+
*/
471+
static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
472+
{
473+
static const u32 min_timeout = 5 * 60 * HZ;
474+
u32 expires = nf_ct_expires(ct);
475+
476+
/* normal case: large enough timeout, nothing to do. */
477+
if (likely(expires >= min_timeout))
478+
return;
479+
480+
/* must check offload bit after this, we do not hold any locks.
481+
* flowtable and ct entries could have been removed on another CPU.
482+
*/
483+
if (!refcount_inc_not_zero(&ct->ct_general.use))
484+
return;
485+
486+
/* load ct->status after refcount increase */
487+
smp_acquire__after_ctrl_dep();
488+
489+
if (nf_ct_is_confirmed(ct) &&
490+
test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
491+
u8 l4proto = nf_ct_protonum(ct);
492+
u32 new_timeout = true;
493+
494+
switch (l4proto) {
495+
case IPPROTO_UDP:
496+
new_timeout = NF_CT_DAY;
497+
break;
498+
case IPPROTO_TCP:
499+
new_timeout = nf_flow_table_tcp_timeout(ct);
500+
break;
501+
default:
502+
WARN_ON_ONCE(1);
503+
break;
504+
}
505+
506+
/* Update to ct->timeout from nf_conntrack happens
507+
* without holding ct->lock.
508+
*
509+
* Use cmpxchg to ensure timeout extension doesn't
510+
* happen when we race with conntrack datapath.
511+
*
512+
* The inverse -- datapath updating ->timeout right
513+
* after this -- is fine, datapath is authoritative.
514+
*/
515+
if (new_timeout) {
516+
new_timeout += nfct_time_stamp;
517+
cmpxchg(&ct->timeout, expires, new_timeout);
518+
}
519+
}
520+
521+
nf_ct_put(ct);
522+
}
523+
427524
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
428525
struct flow_offload *flow, void *data)
429526
{
527+
bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags);
528+
430529
if (nf_flow_has_expired(flow) ||
431530
nf_ct_is_dying(flow->ct) ||
432531
nf_flow_custom_gc(flow_table, flow))
433532
flow_offload_teardown(flow);
533+
else if (!teardown)
534+
nf_flow_table_extend_ct_timeout(flow->ct);
434535

435-
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
536+
if (teardown) {
436537
if (test_bit(NF_FLOW_HW, &flow->flags)) {
437538
if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
438539
nf_flow_offload_del(flow_table, flow);

0 commit comments

Comments
 (0)