Skip to the content.

ipset command fails to execute after upgrading the kernel

发表于 2025-05-29

升级内核后,ipset执行失败的问题分析

1. 问题现象

这段时间负责产品内核升级。由于内核升级跨度大,冲突特别多,代码特别乱。 终于内核能够稳定运行后,测试发现ipset不能正常执行。

2. 初步分析

初步分析发现新内核ipset版本升级到7了,而应用程序ipset 6.38 支持版本还是6。 于是下载了最新的ipset-7.24, 由于我们的ipset是定制过的,添加了一些自定义字段。所以还得将旧代码做diff生成patch,再应用打新ipset中,修复冲突。

一切完成之后,编译版本。本以为万事大吉,没想到运行失败。

ipset v7.24: Kernel error received: ipset protocol error

3. 继续深入

3.1 ipset debug

为了方便定位,将ipset的debug开关打开了

#define IPSET_DEBUG 1  //开启debug打印

3.2 kernel ipset debug

同时内核中将可能返回 -IPSET_ERR_PROTOCOL 的地方都加了打印。

3.3 调试迭代

先是发现是 ip_set_ad 这个函数返回的 -IPSET_ERR_PROTOCOL

May 29 10:31:19 kern.info kernel: [  157.433117] [ip_set_ad:2017]

对应函数下面部分:

static int ip_set_ad(struct net *net, struct sock *ctnl,
		     struct sk_buff *skb,
		     enum ipset_adt adt,
		     const struct nlmsghdr *nlh,
		     const struct nlattr * const attr[],
		     struct netlink_ext_ack *extack)
{
//省略无关

	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
	if (!set)
		return -ENOENT;

	use_lineno = !!attr[IPSET_ATTR_LINENO];
	if (attr[IPSET_ATTR_DATA]) {
		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
				     attr[IPSET_ATTR_DATA],
				     set->type->adt_policy, NULL)) {
				     
			pr_info("[%s:%d] \n", __func__, __LINE__);    //这里
			return -IPSET_ERR_PROTOCOL;
				     	}
		ret = call_ad(net, ctnl, skb, set, tb, adt, flags,
			      use_lineno);
	} else {
//省略无关
	}
	return ret;
}

也就是说是 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL) 处理过程中出错导致了命令执行失败。 也就是说在分析嵌套的 IPSET_ATTR_DATA nla 内容时有错误发生。 找到突破点后,继续在 nla_parse_nested 中 添加调试日志, 编译,运行后,可以清晰发现错误位置。

  • ipset命令的debug 输出
 # ipset add ip_set_gp1 203.203.203.203,udp:65535 priv1 1
//省略无关部分
Message header: sent cmd  ADD (9) flags: 0x205
	len 92
	flag none
	seq 1748540363
	Command attributes:
	PROTOCOL: 7
	SETNAME: ip_set_gp1
		ADT attributes:
		IP: 203.203.203.203
		PORT: 65535
		PROTO: 17
		CADT_LINENO: 0
		PRIV1: 1
Message header: received msg ERROR, flags: 0
	len 112
	errcode -4097
	seq 1748540363
session.c: callback_error:  called, cmd ADD
session.c: callback_error: nlmsgerr error: 4097
session.c: decode_errmsg: nlsmg_len: 92
session.c: generic_data_attr_cb: attr type: 1, len 5
session.c: generic_data_attr_cb: attr type: 2, len 15
session.c: generic_data_attr_cb: attr type: 7, len 48
mnl.c: ipset_mnl_query: nfln_cb_run2, ret: -1, errno 0
session.c: ipset_commit: ret: -1
session.c: ipset_cmd: reset data
ipset.c: ipset_parse_argv: ret -1
ipset v7.24: Kernel error received: ipset protocol error
ipset.c: default_custom_error: status: 4
  • 内核日志部分
May 29 10:39:20  kern.info kernel: [  638.073733] [__nla_validate_parse:653] OK type:1
May 29 10:39:20  kern.info kernel: [  638.073836] [__nla_validate_parse:653] OK type:1
May 29 10:39:20  kern.info kernel: [  638.073840] [__nla_validate_parse:653] OK type:2
May 29 10:39:20  kern.info kernel: [  638.074255] [__nla_validate_parse:653] OK type:1
May 29 10:39:20  kern.info kernel: [  638.074260] [__nla_validate_parse:653] OK type:2
May 29 10:39:20  kern.info kernel: [  638.074263] [__nla_validate_parse:653] OK type:7
May 29 10:39:20  kern.info kernel: [  638.074266] [__nla_validate_parse:653] OK type:1
May 29 10:39:20  kern.info kernel: [  638.074268] [__nla_validate_parse:653] OK type:4
May 29 10:39:20  kern.info kernel: [  638.074271] [__nla_validate_parse:653] OK type:7
May 29 10:39:20  kern.info kernel: [  638.074273] [validate_nla:545] 
May 29 10:39:20  kern.info kernel: [  638.074277] [__nla_validate_parse:646]type:12 err:-22
May 29 10:39:20  kern.info kernel: [  638.074281] [ip_set_ad:2017] 

3.4 代码分析

从内核日志上看, 是 __nla_validate_parse 中 调用 validate_nla 去验证 nla时出错导致。

static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
				const struct nla_policy *policy,
				unsigned int validate,
				struct netlink_ext_ack *extack,
				struct nlattr **tb, unsigned int depth)
{
//省略无关

	nla_for_each_attr(nla, head, len, rem) {
		u16 type = nla_type(nla);

		if (type == 0 || type > maxtype) {
			if (validate & NL_VALIDATE_MAXTYPE) {
				NL_SET_ERR_MSG_ATTR(extack, nla,
						    "Unknown attribute type");
				pr_info("[%s:%d] \n", __func__, __LINE__);
				return -EINVAL;
			}
			continue;
		}
		type = array_index_nospec(type, maxtype + 1);
		if (policy) {
			int err = validate_nla(nla, maxtype, policy,
					       validate, extack, depth);

			if (err < 0) {
				pr_info("[%s:%d]type:%u err:%d\n", __func__, __LINE__, type, err);  //这里
				return err;
				}
		}

//省略无关
	return 0;
}

static int validate_nla(const struct nlattr *nla, int maxtype,
			const struct nla_policy *policy, unsigned int validate,
			struct netlink_ext_ack *extack, unsigned int depth)
{
//省略无关

	switch (pt->type) {
//省略无关
	case NLA_UNSPEC: 
		if (validate & NL_VALIDATE_UNSPEC) {
			NL_SET_ERR_MSG_ATTR(extack, nla,
					    "Unsupported attribute");
			pr_info("[%s:%d] \n", __func__, __LINE__);  //这里
			return -EINVAL;
		}
		if (attrlen < pt->len) {
			pr_info("[%s:%d] \n", __func__, __LINE__);
			goto out_err;
			}
		break;

	default:
		if (pt->len)
			minlen = pt->len;
		else
			minlen = nla_attr_minlen[pt->type];

		if (attrlen < minlen) {
			pr_info("[%s:%d] \n", __func__, __LINE__);
			goto out_err;
			}
	}
//省略无关

	return 0;
out_err:
	NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
				"Attribute failed policy validation");
	return err;
}

从log上知道 验证的是type=12,也就是我们自定义的PRIV1 参数。从代码上看是进入了 NLA_UNSPEC 的处理。为了知道 type=12对应的policy,我们使用crash工具实时调试内核。

crash> ip_set_net_id
ip_set_net_id = $1 = 35
crash> p init_net.gen.ptr[35]
$2 = (void *) 0xffff88810195e6d0
crash> ip_set_net -x 0xffff88810195e6d0 
struct ip_set_net {
  ip_set_list = 0xffff88810e8cf800,
  ip_set_max = 0x100,
  is_deleted = 0x0,
  is_destroyed = 0x0
}
crash> ip_set_net -x
struct ip_set_net {
    struct ip_set **ip_set_list;
    ip_set_id_t ip_set_max;
    bool is_deleted;
    bool is_destroyed;
}
SIZE: 0x10
crash> rd 0xffff88810e8cf800 6
ffff88810e8cf800:  ffff888101444d80 ffff888112f22fc0   .MD....../......
ffff88810e8cf810:  0000000000000000 0000000000000000   ................
ffff88810e8cf820:  0000000000000000 0000000000000000   ................

crash> ip_set ffff888112f22fc0
struct ip_set {
  name = "ip_set_gp1\000",
  lock = {
    {
      rlock = {
        raw_lock = {
          {
            val = {
              counter = 0
            },
            {
              locked = 0 '\000',
              pending = 0 '\000'
            },
            {
              locked_pending = 0,
              tail = 0
            }
          }
        },
        magic = 3735899821,
        owner_cpu = 4294967295,
        owner = 0xffffffffffffffff
      }
    }
  },
  ref = 0,
  ref_netlink = 0,
  type = 0xffffffff81e6fe20 <hash_ipport_type>,
  variant = 0xffffffff8198be20 <hash_ipport4_variant>,
  family = 2 '\002',
  revision = 6 '\006',
  extensions = 0 '\000',
  flags = 2 '\002',
  timeout = 4294967295,
  elements = 0,
  ext_size = 0,
  dsize = 16,
  offset = {0, 0, 0, 0},
  data = 0xffff888112f23080
}

crash> 
crash> p hash_ipport_type.adt_policy[12]
$6 = {
  type = 0 '\000',          //NLA_UNSPEC
  validation_type = 0 '\000',
  len = 0,
  {
    strict_start_type = 0,
    bitfield32_valid = 0,
    mask = 0,
    reject_message = 0x0,
    nested_policy = 0x0,
    range = 0x0,
    range_signed = 0x0,
    {
      min = 0,
      max = 0
    },
    validate = 0x0
  }
}

policy[12]的的type确实是 NLA_UNSPEC, 那么传入的validate呢? 我们查到是 NL_VALIDATE_STRICT

#define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\
			    NL_VALIDATE_MAXTYPE |\
			    NL_VALIDATE_UNSPEC |\
			    NL_VALIDATE_STRICT_ATTRS |\
			    NL_VALIDATE_NESTED)

static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
				   const struct nlattr *nla,
				   const struct nla_policy *policy,
				   struct netlink_ext_ack *extack)
{
	if (!(nla->nla_type & NLA_F_NESTED)) {
		NL_SET_ERR_MSG_ATTR(extack, nla, "NLA_F_NESTED is missing");
		pr_info("[%s:%d] \n", __func__, __LINE__);
		return -EINVAL;
	}

	return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy,
			   NL_VALIDATE_STRICT, extack);
}

所以在 validate_nla 中会被认为错误而返回。

3.5 根因复现

基于以上就是policy中没有自定义PRIV1的policy, 导致type为 NLA_UNSPEC, 从而失败。 检查代码,确实没有 type=12 即 IPSET_ATTR_PRIVATE1 的policy,这是导致问题的根本原因

static struct ip_set_type hash_ipport_type __read_mostly = {
	.name		= "hash:ip,port",
	.protocol	= IPSET_PROTOCOL,
	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
	.dimension	= IPSET_DIM_TWO,
	.family		= NFPROTO_UNSPEC,
	.revision_min	= IPSET_TYPE_REV_MIN,
	.revision_max	= IPSET_TYPE_REV_MAX,
	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
	.create		= hash_ipport_create,
	.create_policy	= {
		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
	},
	.adt_policy	= {
		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
					    .len  = IPSET_MAX_COMMENT_SIZE },
		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PRIVATE1]	= { .type = NLA_U32 },	//新增type=12对应的policy后OK
	},
	.me		= THIS_MODULE,
};

那么为什么升级前是好的呢? 对比代码发现,旧内核代码并没有检查 type为 NLA_UNSPEC的policy,也就是忽略了。而新内核检查更严格,有未在policy中定义的,则会判定为错误。

4. 总结

这次升级内核遇到了好多因为检查不通过而失败的问题了。归根结底是内核为了安全,检查越来越严格了。

本文访问次数:...