Skip to the content.

[iptables]Series 4: iptables initialization and rule configuration

发表于 2025-04-07

iptables的初始化和配置下发

为了更好的熟悉iptables规则,我们先了解下相关表项在内核的初始化。 以filter为例。

1. filter模块的初始化

#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
			    (1 << NF_INET_FORWARD) | \
			    (1 << NF_INET_LOCAL_OUT))    //在 INPUT/FORWARD/OUTPUT 3个hook链上。

static const struct xt_table packet_filter = {
	.name		= "filter",
	.valid_hooks	= FILTER_VALID_HOOKS,
	.me		= THIS_MODULE,
	.af		= NFPROTO_IPV4,
	.priority	= NF_IP_PRI_FILTER,    //优先级,值越小优先级越高。NF_IP_PRI_FILTER=0
};

static int __init iptable_filter_init(void)
{
	int ret = xt_register_template(&packet_filter,
				       iptable_filter_table_init);    //将xt_template注册到xt_templates[af]链表,后续第一次用到时会调用iptable_filter_table_init创建默认表

	if (ret < 0)
		return ret;

	filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);    //创建nf_hook_ops数组filter_ops,hook=ipt_do_table。
	if (IS_ERR(filter_ops)) {
		xt_unregister_template(&packet_filter);
		return PTR_ERR(filter_ops);
	}

	ret = register_pernet_subsys(&iptable_filter_net_ops);    //注册net_namespace初始化回调函数。
	if (ret < 0) {
		xt_unregister_template(&packet_filter);
		kfree(filter_ops);
		return ret;
	}

	return 0;
}

xt_templates全局链表数组

iptable_filter_init --> xt_register_template --> list_add(&t->list, &xt_templates[af]) 后续查找xt_table失败时,会通过xt_templates[af]链表找到对应的xt_template,并调用iptable_filter_table_init创建并注册

filter_ops全局nf_hook_ops数组

iptable_filter_init --> xt_hook_ops_alloc --> ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL) 通过FILTER_VALID_HOOKS得到num_hooks=3 。 ops[i].hook = ipt_do_table ops[i].priority = NF_IP_PRI_FILTER = 0 这写ops后续会注册到对应的nf_hook_entry数组,按照priority插入到数组对应位置。

2. filter表的初始化

filter模块初始化后,尚未创建filter表。当我们查询或添加规则时,会触发fitlre表的初始化。

iptables查询操作

前面我们已经简单梳理过iptables的查询,这里只侧重表的初始化路径。 do_ipt_get_ctl --> [IPT_SO_GET_INFO]get_info --> xt_request_find_table_lock --> xt_find_table_lock

do_ipt_get_ctl --> [IPT_SO_GET_ENTRIES]get_info --> xt_find_table_lock

iptables配置操作

系统调用setsockopt(7, SOL_IP, IPT_SO_SET_REPLACE, "filter\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 13064)

这里只侧重表的初始化路径。 对应 do_ipt_set_ctl --> [IPT_SO_SET_REPLACE]do_replace --> __do_replace --> xt_request_find_table_lock --> xt_find_table_lock

xt_find_table_lock函数

/* Find table by name, grabs mutex & ref.  Returns ERR_PTR on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
				    const char *name)
{
	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);  //得到xt_net
	struct module *owner = NULL;
	struct xt_template *tmpl;
	struct xt_table *t;

	mutex_lock(&xt[af].mutex);
	list_for_each_entry(t, &xt_net->tables[af], list)    //遍历链表,查找filter的xt_table
		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
			return t;

	/* Table doesn't exist in this netns, check larval list */
	list_for_each_entry(tmpl, &xt_templates[af], list) {  //没有找到xt_table, 从xt_templates[af]链表中查找xt_template
		int err;

		if (strcmp(tmpl->name, name))
			continue;
		if (!try_module_get(tmpl->me))
			goto out;

		owner = tmpl->me;

		mutex_unlock(&xt[af].mutex);
		err = tmpl->table_init(net);    //执行表初始化 iptable_filter_table_init
		if (err < 0) {
			module_put(owner);
			return ERR_PTR(err);
		}

		mutex_lock(&xt[af].mutex);
		break;
	}

	/* and once again: */
	list_for_each_entry(t, &xt_net->tables[af], list)  //初始化后,会将filter的xt_table加入到链表。再次查找
		if (strcmp(t->name, name) == 0)
			return t;

	module_put(owner);
 out:
	mutex_unlock(&xt[af].mutex);
	return ERR_PTR(-ENOENT);
}

调用iptable_filter_table_init 初始化filter表

static int iptable_filter_table_init(struct net *net)
{
	struct ipt_replace *repl;
	int err;

	repl = ipt_alloc_initial_table(&packet_filter);  //创建默认的空ipt_replace
	if (repl == NULL)
		return -ENOMEM;
	/* Entry 1 is the FORWARD hook */
	((struct ipt_standard *)repl->entries)[1].target.verdict =
		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;

	err = ipt_register_table(net, &packet_filter, repl, filter_ops);
	kfree(repl);
	return err;
}

int ipt_register_table(struct net *net, const struct xt_table *table,
		       const struct ipt_replace *repl,
		       const struct nf_hook_ops *template_ops)
{
	struct nf_hook_ops *ops;
	unsigned int num_ops;
	int ret, i;
	struct xt_table_info *newinfo;
	struct xt_table_info bootstrap = {0};
	void *loc_cpu_entry;
	struct xt_table *new_table;

	newinfo = xt_alloc_table_info(repl->size);    //申请xt_table_info
	if (!newinfo)
		return -ENOMEM;

	loc_cpu_entry = newinfo->entries;
	memcpy(loc_cpu_entry, repl->entries, repl->size);

	ret = translate_table(net, newinfo, loc_cpu_entry, repl);  //
	if (ret != 0) {
		xt_free_table_info(newinfo);
		return ret;
	}

	new_table = xt_register_table(net, table, &bootstrap, newinfo);  //注册filter的xt_table到链上
	if (IS_ERR(new_table)) {
		struct ipt_entry *iter;

		xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
			cleanup_entry(iter, net);
		xt_free_table_info(newinfo);
		return PTR_ERR(new_table);
	}

	/* No template? No need to do anything. This is used by 'nat' table, it registers
	 * with the nat core instead of the netfilter core.
	 */
	if (!template_ops)
		return 0;

	num_ops = hweight32(table->valid_hooks);
	if (num_ops == 0) {
		ret = -EINVAL;
		goto out_free;
	}

	ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);  //dup filter_ops
	if (!ops) {
		ret = -ENOMEM;
		goto out_free;
	}

	for (i = 0; i < num_ops; i++)
		ops[i].priv = new_table;

	new_table->ops = ops;

	ret = nf_register_net_hooks(net, ops, num_ops);  //注册到对应hook的链上。
	if (ret != 0)
		goto out_free;

	return ret;

out_free:
	__ipt_unregister_table(net, new_table);
	return ret;
}

初始化并添加filter的xt_table

struct xt_table *xt_register_table(struct net *net,
				   const struct xt_table *input_table,
				   struct xt_table_info *bootstrap,
				   struct xt_table_info *newinfo)
{
	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
	struct xt_table_info *private;
	struct xt_table *t, *table;
	int ret;

	/* Don't add one object to multiple lists. */
	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
	if (!table) {
		ret = -ENOMEM;
		goto out;
	}

	mutex_lock(&xt[table->af].mutex);
	/* Simplifies replace_table code. */
	table->private = bootstrap;

	if (!xt_replace_table(table, 0, newinfo, &ret))    //替换table->private为newinfo
		goto unlock;

	private = table->private;
	pr_debug("table->private->number = %u\n", private->number);

	/* save number of initial entries */
	private->initial_entries = private->number;

	list_add(&table->list, &xt_net->tables[table->af]);  //将filter的xt_table添加到xt_net->tables[table->af]链表中
	mutex_unlock(&xt[table->af].mutex);
	return table;

unlock:
	mutex_unlock(&xt[table->af].mutex);
	kfree(table);
out:
	return ERR_PTR(ret);
}

注册filter_ops

nf_register_net_hooks --> nf_register_net_hook --> __nf_register_net_hook -->

static int __nf_register_net_hook(struct net *net, int pf,
				  const struct nf_hook_ops *reg)
{
	struct nf_hook_entries *p, *new_hooks;
	struct nf_hook_entries __rcu **pp;
	int err;
//省略无关
	pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);  //pp=net->nf.hooks_ipv4 + hooknum

	mutex_lock(&nf_hook_mutex);
	p = nf_entry_dereference(*pp);
	new_hooks = nf_hook_entries_grow(p, reg);    //扩展nf_hook_entries可变结构,并将filter的ops按优先级放到对应位置。

	if (!IS_ERR(new_hooks)) {
		hooks_validate(new_hooks);
		rcu_assign_pointer(*pp, new_hooks);    //更新net->nf.hooks_ipv4 + hooknum
	}

	mutex_unlock(&nf_hook_mutex);
	if (IS_ERR(new_hooks))
		return PTR_ERR(new_hooks);

//省略无关
	nf_static_key_inc(reg, pf);

	BUG_ON(p == new_hooks);
	nf_hook_entries_free(p);
	return 0;
}

本文访问次数:...