A system crash caused by an nf_conn use-after-free
发表于 2025-08-15
nf_conn use-after-free 导致的系统crash
1. 问题现象
QA那边上报了一个系统crash问题,据说复现概率挺高,每次都crash在同样位置。 于是查看crash log,发现是expectaion的timer超时处理中, nf_ct_unlink_expect_report 执行时,发生空地址引用导致crash。
2. 初步分析
日志内容如下
[ 1907.969774] ------------[ cut here ]------------
[ 1907.969776] WARNING: CPU: 5 PID: 0 at net/netfilter/nf_conntrack_expect.c:55 nf_ct_unlink_expect_report+0x6d/0x1f0
[ 1907.969792] CPU: 5 PID: 0 Comm: swapper/5 Kdump: loaded Tainted: G O 6.1 #1
[ 1907.969794] Hardware name: Supermicro SYS-2049P-TN8R-FI005/X11QPL, BIOS 3.3 02/19/2020
[ 1907.969795] RIP: 0010:nf_ct_unlink_expect_report+0x6d/0x1f0
[ 1907.969798] Code: b0 00 00 00 48 85 c0 75 25 eb 1f 44 8b 0d 13 57 fb 00 45 8b 50 1c 4c 01 c0 45 85 d2 74 df 45 39 ca 74 da 48 8b 99 b0 00 00 00 <0f> 0b 31 c0 48 83 bf 80 00 00 00 00 0f 85 13 01 00 00 48 8b 4f 10
[ 1907.969800] RSP: 0018:ffffc90008940e30 EFLAGS: 00010246
[ 1907.969802] RAX: 0000000000000000 RBX: ffffffff81e22440 RCX: ffff8882668de800
[ 1907.969803] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888266150000
[ 1907.969804] RBP: ffffc90008940e58 R08: ffff88c84f792980 R09: 00000000ffffffc0
[ 1907.969806] R10: ffff8897e0c9cc08 R11: ffffffff80ec9470 R12: ffffffff80ec9470
[ 1907.969806] R13: 0000000000000000 R14: ffff888266150000 R15: ffff888266150078
[ 1907.969807] FS: 0000000000000000(0000) GS:ffff8897e0c80000(0000) knlGS:0000000000000000
[ 1907.969809] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1907.969810] CR2: 00007ff70fb51e4c CR3: 0000000001c14003 CR4: 00000000007706e0
[ 1907.969812] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1907.969813] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1907.969814] PKRU: 55555554
[ 1907.969814] Call Trace:
[ 1907.969815] <IRQ>
[ 1907.969817] ? __warn+0x197/0x270
[ 1907.969819] ? nf_ct_unlink_expect_report+0x6d/0x1f0
[ 1907.969821] ? report_bug+0x1af/0x240
[ 1907.969825] ? nf_ct_unlink_expect_report+0x6d/0x1f0
[ 1907.969827] ? handle_bug+0x41/0x70
[ 1907.969830] ? exc_invalid_op+0x1b/0x50
[ 1907.969832] ? asm_exc_invalid_op+0x1b/0x20
[ 1907.969834] ? nf_ct_expect_dst_hash+0x120/0x120
[ 1907.969837] ? nf_ct_expect_dst_hash+0x120/0x120
[ 1907.969839] ? nf_ct_unlink_expect_report+0x6d/0x1f0
[ 1907.969840] nf_ct_expectation_timed_out+0x2b/0x90
[ 1907.969843] ? nf_ct_expect_dst_hash+0x120/0x120
[ 1907.969844] call_timer_fn+0x2f/0x110
[ 1907.969848] run_timer_softirq+0x616/0x700
[ 1907.969850] ? tick_sched_timer+0x129/0x290
[ 1907.969853] __do_softirq+0xdc/0x2ab
[ 1907.969855] irq_exit_rcu+0x6c/0xa0
[ 1907.969858] sysvec_apic_timer_interrupt+0x76/0x90
[ 1907.969861] </IRQ>
//省略无关
[ 1907.969890] BUG: kernel NULL pointer dereference, address: 0000000000000010
[ 1908.053228] #PF: supervisor write access in kernel mode
[ 1908.115727] #PF: error_code(0x0002) - not-present page
[ 1908.177195] PGD 0 P4D 0
[ 1908.207457] Oops: 0002 [#1] SMP NOPTI
[ 1908.251242] CPU: 5 PID: 0 Comm: swapper/5 Kdump: loaded Tainted: G W O 6.1 #1
[ 1908.351180] Hardware name: Supermicro SYS-2049P-TN8R-FI005/X11QPL, BIOS 3.3 02/19/2020
[ 1908.445924] RIP: 0010:nf_ct_unlink_expect_report+0xd4/0x1f0
[ 1908.512589] Code: 8b 08 0d 00 00 4f 8b 04 c1 41 ff 48 04 4c 8b 07 4c 8b 4f 08 4d 89 01 4d 85 c0 74 04 4d 89 48 08 48 89 4f 08 8b 8f a8 00 00 00 <fe> 4c 08 10 48 8b 47 70 48 8b 80 b0 00 00 00 48 8b 80 90 0c 00 00
[ 1908.737335] RSP: 0018:ffffc90008940e30 EFLAGS: 00010246
[ 1908.799836] RAX: 0000000000000000 RBX: ffffffff81e22440 RCX: 0000000000000000
[ 1908.885220] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888266150000
[ 1908.970599] RBP: ffffc90008940e58 R08: 0000000000000000 R09: ffff88c84f7929a8
[ 1909.055984] R10: ffff8897e0c9cc08 R11: ffffffff80ec9470 R12: ffffffff80ec9470
[ 1909.141370] R13: 0000000000000000 R14: ffff888266150000 R15: ffff888266150078
[ 1909.226754] FS: 0000000000000000(0000) GS:ffff8897e0c80000(0000) knlGS:0000000000000000
[ 1909.323573] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1909.392322] CR2: 0000000000000010 CR3: 0000000001c14003 CR4: 00000000007706e0
[ 1909.477704] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1909.563084] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1909.648468] PKRU: 55555554
[ 1909.680815] Call Trace:
[ 1909.710035] <IRQ>
[ 1909.734062] ? __die_body+0x82/0x130
[ 1909.776804] ? page_fault_oops+0x419/0x510
[ 1909.825789] ? do_user_addr_fault+0x4d6/0x6a0
[ 1909.877890] ? report_bug+0x1af/0x240
[ 1909.921676] ? ct_nmi_enter+0x94/0xc0
[ 1909.965457] ? exc_page_fault+0x4f/0xa0
[ 1910.011321] ? asm_exc_page_fault+0x27/0x30
[ 1910.061349] ? nf_ct_expect_dst_hash+0x120/0x120
[ 1910.116568] ? nf_ct_expect_dst_hash+0x120/0x120
[ 1910.171793] ? nf_ct_unlink_expect_report+0xd4/0x1f0
[ 1910.231178] nf_ct_expectation_timed_out+0x2b/0x90
[ 1910.288482] ? nf_ct_expect_dst_hash+0x120/0x120
[ 1910.343703] call_timer_fn+0x2f/0x110
[ 1910.387488] run_timer_softirq+0x616/0x700
[ 1910.436469] ? tick_sched_timer+0x129/0x290
[ 1910.486496] __do_softirq+0xdc/0x2ab
[ 1910.529241] irq_exit_rcu+0x6c/0xa0
[ 1910.570944] sysvec_apic_timer_interrupt+0x76/0x90
[ 1910.628249] </IRQ>
//省略无关
我们发现crash前有个warn, 正好对应代码中的 WARN_ON(!master_help), 也就是说 master_help指针为NULL。
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
struct nf_conntrack_net *cnet;
WARN_ON(!master_help); //mmaster_help为NULL, 触发warn
WARN_ON(timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
cnet = nf_ct_pernet(net);
cnet->expect_count--;
hlist_del_rcu(&exp->lnode);
master_help->expecting[exp->class]--; //空指针引用
nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
nf_ct_expect_put(exp);
NF_CT_STAT_INC(net, expect_delete);
}
很容易就定位到造成问题的直接原因。但是为什么master_help为NULL呢?从代码逻辑上看这里时不应该为NULL的。
3. 分析coredump
使用crash工具打开coredump文件,查看相关变量
crash> kmem ffff888266150000 //exp
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff8881044a0200 216 1 111 3 8k nf_conntrack_expect
SLAB MEMORY NODE TOTAL ALLOCATED FREE
ffffea0009985400 ffff888266150000 0 37 1 36
FREE / [ALLOCATED]
[ffff888266150000]
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea0009985400 266150000 ffff8881044a0200 0 1 200000000010200 slab,head
crash> nf_conntrack_expect.master -x ffff888266150000 //master
master = 0xffff8882668de800,
crash> kmem 0xffff8882668de800
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff888100042a00 512 9533 13216 413 16k kmalloc-512
SLAB MEMORY NODE TOTAL ALLOCATED FREE
ffffea00099a3700 ffff8882668dc000 0 32 14 18
FREE / [ALLOCATED]
ffff8882668de800 //master指向的nf_conn内存已被释放!!!
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea00099a3780 2668de000 dead000000000400 0 0 200000000000000
crash> nf_conn.ext -x 0xffff8882668de800
ext = 0xffff88c84f792980,
crash> kmem 0xffff88c84f792980
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff888100042700 128 16907 31872 996 4k kmalloc-128
SLAB MEMORY NODE TOTAL ALLOCATED FREE
ffffea01213de480 ffff88c84f792000 3 32 17 15
FREE / [ALLOCATED]
ffff88c84f792980 (cpu 11 cache) //master->ext指向的nf_ct_ext内存已被释放!!!
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea01213de480 484f792000 ffff888100042700 ffff88c84f792b80 1 e00000000000200 slab
crash> nf_ct_ext -x ffff88c84f792980
struct nf_ct_ext {
offset = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x22, 0x0, 0x8, 0x0, 0x0}, //offset为0
len = 0x58,
gen_id = 0x1,
data = 0xffff88c84f7929a0 ""
}
也就是说因为 nf_conn和nf_ct_ext对应内存已经被释放,导致 渠道offset[0]的值是0,master_help = nfct_help(exp->master)函数返回NULL。 进一步的原因是内存的use-after-free。
4. 验证推断
查看代码在 nf_conntrack_free 之前 ,有调用 nf_ct_remove_expectations 会将所有的expactation的timer移除。
于是猜想是不是在移除timer时,timer刚好在执行,导致移除timer失败。这样nf_conn被释放后,timer继续执行出现use-after-free。 且不说这种概率超小,不应该这么频繁出现,而且在这种极端情况下nf_conn和nf_ct_ext内存还没有被改写,就算访问到也是原有数据。
但还是想着验证下吧,于是在nf_conntrack_alloc, nf_conntrack_free 中添加日志,编译debug image,期望能在复现后看到nf_conn已被释放的日志。
很快QA就又复现问题了,查看debug 日志,奇怪的是,根本没有问题nf_conn的释放日志,更奇怪的是也没有问题nf_conn的申请日志! 这就有点诡异了。
5. 迭代分析和验证
为了帮助定位,开启了SLUB_DEBUG编译选项,开启后会在slab的后面保存申请和释放的函数信息。
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
{
//忽略无关
if (kmem_cache_debug(s)) {
freelist = alloc_single_from_new_slab(s, slab, orig_size);
if (unlikely(!freelist))
goto new_objects;
if (s->flags & SLAB_STORE_USER)
set_track(s, freelist, TRACK_ALLOC, addr); //会将申请函数地址存入
return freelist;
}
//忽略无关
}
再次复现后,查看问题 nf_conn的slab中的申请释放信息,发现对应内存在crash前曾被用作skb的buffer,没有有效信息可用。
追查代码,定时器是在 nf_ct_expect_insert 是添加的,于是在这里增加调用栈,编译debug image,再去复现。 复现后,发现在crash前,就只有一个 nf_ct_expect_insert ,后续的crash就是这个expectation。
为了查看nf_ct_expect_insert时的nf_conn信息,我在这个函数中调用了panic函数,让它在这里产生coredump。 再次复现后,分析coredump,在nf_conn的slab中找到了申请函数
crash> kmem ffff888136fcd000 //nf_conn
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff888100048f40 512 14303 15666 746 32k kmalloc-512
SLAB MEMORY NODE TOTAL ALLOCATED FREE
ffffea0004dbf200 ffff888136fc8000 0 21 21 0
FREE / [ALLOCATED]
[ffff888136fcce00]
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea0004dbf340 136fcd000 dead000000000400 0 0 200000000000000
crash> struct kmem_cache.offset,inuse -x ffff888100048f40
offset = 0x208,
inuse = 0x208,
object_size = 0x200,
crash> p (0xffff888136fcd000 + 0x208 + 0x8) -x
$4 = 0xffff888136fcd210 //track
crash> track ffff888136fcd210 -x
struct track {
addr = 0xffffffff80f13d8d,
handle = 0x7cf00be,
cpu = 0xd,
pid = 0x1247,
when = 0xfffc8e5c
}
crash> rd -s ffff888136fcd210
ffff888136fcd210: xt_ct_tg_check_v2+29 //申请函数
查看代码发现nf_conn的申请不是 nf_conntrack_alloc, 而是 nf_ct_tmpl_alloc。同样的,释放函数不是 nf_conntrack_free 而是 nf_ct_tmpl_free 。 到这里才了解为什么之前看不到释放日志。是因为没考虑到template的申请和释放。
并且nf_conn template 的释放并不会调用 nf_ct_remove_expectations 来删除对应的timer。这就会导致 nf_conn释放后,timer依然存在,并在超时后调用。最终出现use-after-free问题。
void nf_ct_destroy(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
pr_debug("%s(%p)\n", __func__, ct);
WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct); //释放nf_conn template
return; //返回
}
if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
destroy_gre_conntrack(ct);
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
* too.
*/
nf_ct_remove_expectations(ct); //删除expectaion的timer
if (ct->master)
nf_ct_put(ct->master);
pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
nf_conntrack_free(ct);
}
重新在 nf_ct_tmpl_alloc 和 nf_ct_tmpl_free 中添加日志,并编译image复现。 复现后,查看日志,确实如此。
//申请
[ 1978.316487] nf_conntrack: [nf_ct_tmpl_alloc:580] nf_conn:ffff8881391e3800 ext:0
//插入
[ 2131.989389] [nf_ct_expect_insert:417] exp:ffff88823aac8008 master:ffff8881391e3800 ext:ffff888286a3c500 jiffies:4296796140 timeout:300 expires:4297096140
//释放
[ 2140.352889] nf_conntrack: [nf_ct_tmpl_free:594] nf_conn:ffff8881391e3800 ext:6b6b6b6b6b6b6b6b
[ 2140.352891] CPU: 0 PID: 4691 Comm: netd Kdump: loaded Tainted: G W O 6.1 #16
[ 2140.352892] Hardware name: Supermicro SYS-2049P-TN8R-FI005/X11QPL, BIOS 3.3 02/19/2020
[ 2140.352893] Call Trace:
[ 2140.352893] <TASK>
[ 2140.352894] nf_ct_tmpl_free+0x4f/0x60
[ 2140.352896] nf_ct_destroy+0xce/0x290
[ 2140.352898] xt_ct_tg_destroy+0x78/0xc0
[ 2140.352900] xt_ct_tg_destroy_v1+0x12/0x20
[ 2140.352902] cleanup_entry+0x115/0x1b0
[ 2140.352904] __do_replace+0x3ab/0x530
[ 2140.352906] ? do_ipt_set_ctl+0x5ef/0x6c0
[ 2140.352907] do_ipt_set_ctl+0x5ef/0x6c0
[ 2140.352909] nf_setsockopt+0x1a8/0x2e0
[ 2140.352911] raw_setsockopt+0x7b/0x120
[ 2140.352912] sock_common_setsockopt+0x18/0x30
[ 2140.352913] __sys_setsockopt+0xb9/0x130
[ 2140.352915] __x64_sys_setsockopt+0x21/0x30
[ 2140.352917] do_syscall_64+0x49/0xa0
[ 2140.352919] ? irqentry_exit+0x12/0x40
[ 2140.352920] entry_SYSCALL_64_after_hwframe+0x64/0xce
//crash
[ 2433.066066] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b6b: 0000 [#1] SMP NOPTI
[ 2433.187797] CPU: 10 PID: 66 Comm: ksoftirqd/10 Kdump: loaded Tainted: G W O 6.1 #16
[ 2433.293977] Hardware name: Supermicro SYS-2049P-TN8R-FI005/X11QPL, BIOS 3.3 02/19/2020
[ 2433.306651] nf_conntrack: [__nf_conntrack_alloc:1729] nf_conn:ffff8882a9268440 jiffies:4297097457
[ 2433.388722] RIP: 0010:nf_ct_unlink_expect_report+0x2d/0x1f0
[ 2433.388730] Code: 00 00 55 48 89 e5 41 56 53 48 83 ec 18 65 48 8b 04 25 28 00 00 00 48 89 45 e8 48 8b 4f 70 4c 8b 81 e8 00 00 00 4d 85 c0 74 39 <41> 0f b7 00 48 85 c0 74 30 41 83 78 1c 00 75 11 4c 01 c0 48 8b 99
[ 2433.388732] RSP: 0018:ffffc9000ce0fce0 EFLAGS: 00010202
[ 2433.848812] RAX: a79bfdc906a58200 RBX: ffff88823aac8088 RCX: ffff8881391e3800
[ 2433.934200] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88823aac8008
[ 2434.019584] RBP: ffffc9000ce0fd08 R08: 6b6b6b6b6b6b6b6b R09: 0000000000000000
[ 2434.104964] R10: ffff8897e0f1cc00 R11: ffffffff80ee7e00 R12: ffffffff80ee7e00
[ 2434.190349] R13: 0000000000000000 R14: ffff88823aac8008 R15: ffff88823aac8088
[ 2434.275728] FS: 0000000000000000(0000) GS:ffff8897e0f00000(0000) knlGS:0000000000000000
[ 2434.372555] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2434.441296] CR2: 00007f980bc97000 CR3: 0000000107734003 CR4: 00000000007706e0
[ 2434.526684] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2434.612066] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2434.697449] PKRU: 55555554
[ 2434.729791] Call Trace:
[ 2434.759017] <TASK>
[ 2434.784079] ? __die_body+0x82/0x130
[ 2434.826826] ? die_addr+0xaa/0xe0
[ 2434.866446] ? exc_general_protection+0x13a/0x1e0
[ 2434.922711] ? asm_exc_general_protection+0x27/0x30
[ 2434.981054] ? nf_ct_expect_dst_hash+0x120/0x120
[ 2435.036276] ? nf_ct_expect_dst_hash+0x120/0x120
[ 2435.091503] ? nf_ct_unlink_expect_report+0x2d/0x1f0
[ 2435.150885] nf_ct_expectation_timed_out+0x2b/0x90
[ 2435.208189] ? nf_ct_expect_dst_hash+0x120/0x120
[ 2435.263415] call_timer_fn+0x2f/0x110
[ 2435.307195] run_timer_softirq+0x616/0x700
[ 2435.356179] ? newidle_balance+0x299/0x320
[ 2435.405166] __do_softirq+0xdc/0x2ab
[ 2435.447904] run_ksoftirqd+0x1c/0x30
[ 2435.490649] smpboot_thread_fn+0xe8/0x1b0
[ 2435.538595] kthread+0x269/0x2a0
[ 2435.577179] ? __smpboot_create_thread+0x220/0x220
[ 2435.634479] ? kthreadd+0x380/0x380
[ 2435.676187] ret_from_fork+0x1f/0x30
[ 2435.718930] </TASK>
6. 深入分析
一个skb的nf_conn是template nf_conntrack,这不合理。 正常来说,template nf_conntrack在 nf_conntrack_in 处理时,会将其替换成真正的 nf_conntrack。
unsigned int
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
//省略无关
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED)
return NF_ACCEPT;
skb->_nfct = 0; //template nf_conntrack 则 置空_nfct
}
//省略无关
ret = resolve_normal_ct(tmpl, skb, dataoff,
protonum, state);
}
在nf_conntrack_in中,如果skb->_nfct是 template conntrack,则置空 skb->_nfct。随后调用 resolve_normal_ct() –> init_conntrack() –> __nf_conntrack_alloc() –> ct = kmem_cache_alloc(nf_conntrack_cachep, gfp) 申请正式的nf_conntrack。并 调用 nf_ct_set() 赋予 skb->_nfct
也就是说nf_conntrack_in 处理后,skb->_nfct 不可能是template nf_conntrack了。
但是为什么测试环境会出现呢? 又看了一遍nf_conntrack_in 的代码,发现里面有个提前跳出逻辑。愿意是为了提高性能,不再进行 nf_conntrack 处理。但是有点粗暴,导致template conntrack没有清理,最终导致了问题发生。
7. 问题修复
修复方案1
最初想的是将expectation的remove操作挪到 tmpl nf_conn的释放之前即可。这样即使expectation的master是template conntrack也能正确释放。
net/netfilter/nf_conntrack_core.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 344f88295976..7f6b95404907 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -577,6 +577,13 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
WARN_ON(refcount_read(&nfct->use) != 0);
+ /* Expectations will have been removed in clean_from_lists,
+ * except TFTP can create an expectation on the first packet,
+ * before connection is in the list, so we need to clean here,
+ * too.
+ */
+ nf_ct_remove_expectations(ct);
+
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
@@ -585,13 +592,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
destroy_gre_conntrack(ct);
- /* Expectations will have been removed in clean_from_lists,
- * except TFTP can create an expectation on the first packet,
- * before connection is in the list, so we need to clean here,
- * too.
- */
- nf_ct_remove_expectations(ct);
-
if (ct->master)
nf_ct_put(ct->master);
base-commit: 01792bc3e5bdafa171dd83c7073f00e7de93a653
修复方案2
经过更深的了解产品代码和nf_conntrack的整体逻辑后。将处理前置是更好的办法,这样能避免很多不必要的操作。类似expectaion的创建和删除。
只需要将 提前跳出逻辑放到 template conntrack的判断之后。
unsigned int
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
//省略无关
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED)
return NF_ACCEPT;
skb->_nfct = 0; //template nf_conntrack 则 置空_nfct
}
//提前跳出逻辑
if (SPECIAL_CONDITION)
return ACCEPT;
}
出image,测试验证通过。
8. 后记
在软件工程中,有个名词叫“前置”,意思是达到同样的效果,处理越靠前,越高效,越能降低成本。
例如本例中的处理。 方案1也能达到同样效果,但是在此之前,系统为数据包创建了无效的expectaion,添加了定时器,最终又将其删除。这一系列操作都是不必要的,浪费了CPU时间片。 方案2在最开始就直接将skb->_nfct置空,也就没有后续的无效操作。更简洁高效。
类似的还有测试, 如果问题在单元测试发现,比在集成测试发现成本要低。在集成测试发现,比在客户环境发现成本更低。
本文访问次数:... 次