Skip to content

Commit fdbaf51

Browse files
committed
netfilter: flowtable: add CLOSING state
tcp rst/fin packet triggers an immediate teardown of the flow which results in sending flows back to the classic forwarding path. This behaviour was introduced by: da5984e ("netfilter: nf_flow_table: add support for sending flows back to the slow path") b6f27d3 ("netfilter: nf_flow_table: tear down TCP flows if RST or FIN was seen") whose goal is to expedite removal of flow entries from the hardware table. Before these patches, the flow was released after the flow entry timed out. However, this approach leads to packet races when restoring the conntrack state as well as late flow re-offload situations when the TCP connection is ending. This patch adds a new CLOSING state that is is entered when tcp rst/fin packet is seen. This allows for an early removal of the flow entry from the hardware table. But the flow entry still remains in software, so tcp packets to shut down the flow are not sent back to slow path. If syn packet is seen from this new CLOSING state, then this flow enters teardown state, ct state is set to TCP_CONNTRACK_CLOSE state and packet is sent to slow path, so this TCP reopen scenario can be handled by conntrack. TCP_CONNTRACK_CLOSE provides a small timeout that aims at quickly releasing this stale entry from the conntrack table. Moreover, skip hardware re-offload from flowtable software packet if the flow is in CLOSING state. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent b8baac3 commit fdbaf51

File tree

3 files changed

+58
-19
lines changed

3 files changed

+58
-19
lines changed

include/net/netfilter/nf_flow_table.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ struct flow_offload_tuple_rhash {
163163
enum nf_flow_flags {
164164
NF_FLOW_SNAT,
165165
NF_FLOW_DNAT,
166+
NF_FLOW_CLOSING,
166167
NF_FLOW_TEARDOWN,
167168
NF_FLOW_HW,
168169
NF_FLOW_HW_DYING,

net/netfilter/nf_flow_table_core.c

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,23 @@ void flow_offload_route_init(struct flow_offload *flow,
161161
}
162162
EXPORT_SYMBOL_GPL(flow_offload_route_init);
163163

164-
static void flow_offload_fixup_tcp(struct nf_conn *ct)
164+
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
165+
{
166+
return nf_flow_timeout_delta(flow->timeout) <= 0;
167+
}
168+
169+
static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state)
165170
{
166171
struct ip_ct_tcp *tcp = &ct->proto.tcp;
167172

168173
spin_lock_bh(&ct->lock);
174+
if (tcp->state != tcp_state)
175+
tcp->state = tcp_state;
176+
177+
/* syn packet triggers the TCP reopen case from conntrack. */
178+
if (tcp->state == TCP_CONNTRACK_CLOSE)
179+
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
180+
169181
/* Conntrack state is outdated due to offload bypass.
170182
* Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks
171183
* TCP reset validation will fail.
@@ -177,36 +189,58 @@ static void flow_offload_fixup_tcp(struct nf_conn *ct)
177189
spin_unlock_bh(&ct->lock);
178190
}
179191

180-
static void flow_offload_fixup_ct(struct nf_conn *ct)
192+
static void flow_offload_fixup_ct(struct flow_offload *flow)
181193
{
194+
struct nf_conn *ct = flow->ct;
182195
struct net *net = nf_ct_net(ct);
183196
int l4num = nf_ct_protonum(ct);
197+
bool expired, closing = false;
198+
u32 offload_timeout = 0;
184199
s32 timeout;
185200

186201
if (l4num == IPPROTO_TCP) {
187-
struct nf_tcp_net *tn = nf_tcp_pernet(net);
202+
const struct nf_tcp_net *tn = nf_tcp_pernet(net);
203+
u8 tcp_state;
188204

189-
flow_offload_fixup_tcp(ct);
205+
/* Enter CLOSE state if fin/rst packet has been seen, this
206+
* allows TCP reopen from conntrack. Otherwise, pick up from
207+
* the last seen TCP state.
208+
*/
209+
closing = test_bit(NF_FLOW_CLOSING, &flow->flags);
210+
if (closing) {
211+
flow_offload_fixup_tcp(ct, TCP_CONNTRACK_CLOSE);
212+
timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]);
213+
expired = false;
214+
} else {
215+
tcp_state = READ_ONCE(ct->proto.tcp.state);
216+
flow_offload_fixup_tcp(ct, tcp_state);
217+
timeout = READ_ONCE(tn->timeouts[tcp_state]);
218+
expired = nf_flow_has_expired(flow);
219+
}
220+
offload_timeout = READ_ONCE(tn->offload_timeout);
190221

191-
timeout = tn->timeouts[ct->proto.tcp.state];
192-
timeout -= tn->offload_timeout;
193222
} else if (l4num == IPPROTO_UDP) {
194-
struct nf_udp_net *tn = nf_udp_pernet(net);
223+
const struct nf_udp_net *tn = nf_udp_pernet(net);
195224
enum udp_conntrack state =
196225
test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
197226
UDP_CT_REPLIED : UDP_CT_UNREPLIED;
198227

199-
timeout = tn->timeouts[state];
200-
timeout -= tn->offload_timeout;
228+
timeout = READ_ONCE(tn->timeouts[state]);
229+
expired = nf_flow_has_expired(flow);
230+
offload_timeout = READ_ONCE(tn->offload_timeout);
201231
} else {
202232
return;
203233
}
204234

235+
if (expired)
236+
timeout -= offload_timeout;
237+
205238
if (timeout < 0)
206239
timeout = 0;
207240

208-
if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
209-
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
241+
if (closing ||
242+
nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
243+
nf_ct_refresh(ct, timeout);
210244
}
211245

212246
static void flow_offload_route_release(struct flow_offload *flow)
@@ -326,18 +360,14 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
326360
else
327361
return;
328362

329-
if (likely(!nf_flowtable_hw_offload(flow_table)))
363+
if (likely(!nf_flowtable_hw_offload(flow_table)) ||
364+
test_bit(NF_FLOW_CLOSING, &flow->flags))
330365
return;
331366

332367
nf_flow_offload_add(flow_table, flow);
333368
}
334369
EXPORT_SYMBOL_GPL(flow_offload_refresh);
335370

336-
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
337-
{
338-
return nf_flow_timeout_delta(flow->timeout) <= 0;
339-
}
340-
341371
static void flow_offload_del(struct nf_flowtable *flow_table,
342372
struct flow_offload *flow)
343373
{
@@ -354,7 +384,7 @@ void flow_offload_teardown(struct flow_offload *flow)
354384
{
355385
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
356386
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
357-
flow_offload_fixup_ct(flow->ct);
387+
flow_offload_fixup_ct(flow);
358388
}
359389
EXPORT_SYMBOL_GPL(flow_offload_teardown);
360390

@@ -542,6 +572,10 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
542572
} else {
543573
flow_offload_del(flow_table, flow);
544574
}
575+
} else if (test_bit(NF_FLOW_CLOSING, &flow->flags) &&
576+
test_bit(NF_FLOW_HW, &flow->flags) &&
577+
!test_bit(NF_FLOW_HW_DYING, &flow->flags)) {
578+
nf_flow_offload_del(flow_table, flow);
545579
} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
546580
nf_flow_offload_stats(flow_table, flow);
547581
}

net/netfilter/nf_flow_table_ip.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,15 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
2828
return 0;
2929

3030
tcph = (void *)(skb_network_header(skb) + thoff);
31-
if (unlikely(tcph->fin || tcph->rst)) {
31+
if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) {
3232
flow_offload_teardown(flow);
3333
return -1;
3434
}
3535

36+
if ((tcph->fin || tcph->rst) &&
37+
!test_bit(NF_FLOW_CLOSING, &flow->flags))
38+
set_bit(NF_FLOW_CLOSING, &flow->flags);
39+
3640
return 0;
3741
}
3842

0 commit comments

Comments
 (0)