Merge branch 'bpf-int128-btf'

Yonghong Song says: ==================== Previous maximum supported integer bit width is 64. But the __int128 type has been supported by most (if not all) 64bit architectures including bpf for both gcc and clang. The kernel itself uses __int128 for x64 and arm64. Some bcc tools are using __int128 in bpf programs to describe ipv6 addresses. Without 128bit int support, the vmlinux BTF won't work and those bpf programs using __int128 cannot utilize BTF. This patch set therefore implements BTF __int128 support. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Merge branch 'bpf-int128-btf'
Yonghong Song says: ==================== Previous maximum supported integer bit width is 64. But the __int128 type has been supported by most (if not all) 64bit architectures including bpf for both gcc and clang. The kernel itself uses __int128 for x64 and arm64. Some bcc tools are using __int128 in bpf programs to describe ipv6 addresses. Without 128bit int support, the vmlinux BTF won't work and those bpf programs using __int128 cannot utilize BTF. This patch set therefore implements BTF __int128 support. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
e13279e2 · Daniel Borkmann · eeedd352 · e86e5138 · e13279e2 · e13279e2
Commit e13279e2 authored Jan 16, 2019 by Daniel Borkmann
Showing with 418 additions and 68 deletions

kernel/bpf/btf.c kernel/bpf/btf.c +85 -19

tools/bpf/bpftool/btf_dumper.c tools/bpf/bpftool/btf_dumper.c +86 -12

tools/testing/selftests/bpf/test_btf.c tools/testing/selftests/bpf/test_btf.c +247 -37

No files found.
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -157,7 +157,7 @@
 *
 */

-#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE)
+#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
 #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
 #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
 #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
@@ -525,7 +525,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)

 /*
 * Regular int is not a bit field and it must be either
- * u8/u16/u32/u64.
+ * u8/u16/u32/u64 or __int128.
 */
 static bool btf_type_int_is_regular(const struct btf_type *t)
 {
@@ -538,7 +538,8 @@ static bool btf_type_int_is_regular(const struct btf_type *t)
 	if (BITS_PER_BYTE_MASKED(nr_bits) ||
 	    BTF_INT_OFFSET(int_data) ||
 	    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
-	     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+	     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
+	     nr_bytes != (2 * sizeof(u64)))) {
 		return false;
 	}

@@ -1063,9 +1064,9 @@ static int btf_int_check_member(struct btf_verifier_env *env,
 	nr_copy_bits = BTF_INT_BITS(int_data) +
 		BITS_PER_BYTE_MASKED(struct_bits_off);

-	if (nr_copy_bits > BITS_PER_U64) {
+	if (nr_copy_bits > BITS_PER_U128) {
 		btf_verifier_log_member(env, struct_type, member,
-					"nr_copy_bits exceeds 64");
+					"nr_copy_bits exceeds 128");
 		return -EINVAL;
 	}

@@ -1119,9 +1120,9 @@ static int btf_int_check_kflag_member(struct btf_verifier_env *env,

 	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
 	nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
-	if (nr_copy_bits > BITS_PER_U64) {
+	if (nr_copy_bits > BITS_PER_U128) {
 		btf_verifier_log_member(env, struct_type, member,
-					"nr_copy_bits exceeds 64");
+					"nr_copy_bits exceeds 128");
 		return -EINVAL;
 	}

@@ -1168,9 +1169,9 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,

 	nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);

-	if (nr_bits > BITS_PER_U64) {
+	if (nr_bits > BITS_PER_U128) {
 		btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
-				      BITS_PER_U64);
+				      BITS_PER_U128);
 		return -EINVAL;
 	}

@@ -1211,31 +1212,93 @@ static void btf_int_log(struct btf_verifier_env *env,
 			 btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
 }

+static void btf_int128_print(struct seq_file *m, void *data)
+{
+	/* data points to a __int128 number.
+	 * Suppose
+	 *     int128_num = *(__int128 *)data;
+	 * The below formulas shows what upper_num and lower_num represents:
+	 *     upper_num = int128_num >> 64;
+	 *     lower_num = int128_num & 0xffffffffFFFFFFFFULL;
+	 */
+	u64 upper_num, lower_num;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+	upper_num = *(u64 *)data;
+	lower_num = *(u64 *)(data + 8);
+#else
+	upper_num = *(u64 *)(data + 8);
+	lower_num = *(u64 *)data;
+#endif
+	if (upper_num == 0)
+		seq_printf(m, "0x%llx", lower_num);
+	else
+		seq_printf(m, "0x%llx%016llx", upper_num, lower_num);
+}
+
+static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
+			     u16 right_shift_bits)
+{
+	u64 upper_num, lower_num;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+	upper_num = print_num[0];
+	lower_num = print_num[1];
+#else
+	upper_num = print_num[1];
+	lower_num = print_num[0];
+#endif
+
+	/* shake out un-needed bits by shift/or operations */
+	if (left_shift_bits >= 64) {
+		upper_num = lower_num << (left_shift_bits - 64);
+		lower_num = 0;
+	} else {
+		upper_num = (upper_num << left_shift_bits) |
+			    (lower_num >> (64 - left_shift_bits));
+		lower_num = lower_num << left_shift_bits;
+	}
+
+	if (right_shift_bits >= 64) {
+		lower_num = upper_num >> (right_shift_bits - 64);
+		upper_num = 0;
+	} else {
+		lower_num = (lower_num >> right_shift_bits) |
+			    (upper_num << (64 - right_shift_bits));
+		upper_num = upper_num >> right_shift_bits;
+	}
+
+#ifdef __BIG_ENDIAN_BITFIELD
+	print_num[0] = upper_num;
+	print_num[1] = lower_num;
+#else
+	print_num[0] = lower_num;
+	print_num[1] = upper_num;
+#endif
+}
+
 static void btf_bitfield_seq_show(void *data, u8 bits_offset,
 				  u8 nr_bits, struct seq_file *m)
 {
 	u16 left_shift_bits, right_shift_bits;
 	u8 nr_copy_bytes;
 	u8 nr_copy_bits;
-	u64 print_num;
+	u64 print_num[2] = {};

 	nr_copy_bits = nr_bits + bits_offset;
 	nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);

-	print_num = 0;
-	memcpy(&print_num, data, nr_copy_bytes);
+	memcpy(print_num, data, nr_copy_bytes);

 #ifdef __BIG_ENDIAN_BITFIELD
 	left_shift_bits = bits_offset;
 #else
-	left_shift_bits = BITS_PER_U64 - nr_copy_bits;
+	left_shift_bits = BITS_PER_U128 - nr_copy_bits;
 #endif
-	right_shift_bits = BITS_PER_U64 - nr_bits;
-
-	print_num <<= left_shift_bits;
-	print_num >>= right_shift_bits;
+	right_shift_bits = BITS_PER_U128 - nr_bits;

-	seq_printf(m, "0x%llx", print_num);
+	btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
+	btf_int128_print(m, print_num);
 }


@@ -1250,7 +1313,7 @@ static void btf_int_bits_seq_show(const struct btf *btf,

 	/*
 	 * bits_offset is at most 7.
-	 * BTF_INT_OFFSET() cannot exceed 64 bits.
+	 * BTF_INT_OFFSET() cannot exceed 128 bits.
 	 */
 	total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
 	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
@@ -1274,6 +1337,9 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
 	}

 	switch (nr_bits) {
+	case 128:
+		btf_int128_print(m, data);
+		break;
 	case 64:
 		if (sign)
 			seq_printf(m, "%lld", *(s64 *)data);

--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -73,35 +73,104 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
 	return ret;
 }

+static void btf_int128_print(json_writer_t *jw, const void *data,
+			     bool is_plain_text)
+{
+	/* data points to a __int128 number.
+	 * Suppose
+	 *     int128_num = *(__int128 *)data;
+	 * The below formulas shows what upper_num and lower_num represents:
+	 *     upper_num = int128_num >> 64;
+	 *     lower_num = int128_num & 0xffffffffFFFFFFFFULL;
+	 */
+	__u64 upper_num, lower_num;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+	upper_num = *(__u64 *)data;
+	lower_num = *(__u64 *)(data + 8);
+#else
+	upper_num = *(__u64 *)(data + 8);
+	lower_num = *(__u64 *)data;
+#endif
+
+	if (is_plain_text) {
+		if (upper_num == 0)
+			jsonw_printf(jw, "0x%llx", lower_num);
+		else
+			jsonw_printf(jw, "0x%llx%016llx", upper_num, lower_num);
+	} else {
+		if (upper_num == 0)
+			jsonw_printf(jw, "\"0x%llx\"", lower_num);
+		else
+			jsonw_printf(jw, "\"0x%llx%016llx\"", upper_num, lower_num);
+	}
+}
+
+static void btf_int128_shift(__u64 *print_num, u16 left_shift_bits,
+			     u16 right_shift_bits)
+{
+	__u64 upper_num, lower_num;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+	upper_num = print_num[0];
+	lower_num = print_num[1];
+#else
+	upper_num = print_num[1];
+	lower_num = print_num[0];
+#endif
+
+	/* shake out un-needed bits by shift/or operations */
+	if (left_shift_bits >= 64) {
+		upper_num = lower_num << (left_shift_bits - 64);
+		lower_num = 0;
+	} else {
+		upper_num = (upper_num << left_shift_bits) |
+			    (lower_num >> (64 - left_shift_bits));
+		lower_num = lower_num << left_shift_bits;
+	}
+
+	if (right_shift_bits >= 64) {
+		lower_num = upper_num >> (right_shift_bits - 64);
+		upper_num = 0;
+	} else {
+		lower_num = (lower_num >> right_shift_bits) |
+			    (upper_num << (64 - right_shift_bits));
+		upper_num = upper_num >> right_shift_bits;
+	}
+
+#ifdef __BIG_ENDIAN_BITFIELD
+	print_num[0] = upper_num;
+	print_num[1] = lower_num;
+#else
+	print_num[0] = lower_num;
+	print_num[1] = upper_num;
+#endif
+}
+
 static void btf_dumper_bitfield(__u32 nr_bits, __u8 bit_offset,
 				const void *data, json_writer_t *jw,
 				bool is_plain_text)
 {
 	int left_shift_bits, right_shift_bits;
+	__u64 print_num[2] = {};
 	int bytes_to_copy;
 	int bits_to_copy;
-	__u64 print_num;

 	bits_to_copy = bit_offset + nr_bits;
 	bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);

-	print_num = 0;
-	memcpy(&print_num, data, bytes_to_copy);
+	memcpy(print_num, data, bytes_to_copy);
 #if defined(__BIG_ENDIAN_BITFIELD)
 	left_shift_bits = bit_offset;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	left_shift_bits = 64 - bits_to_copy;
+	left_shift_bits = 128 - bits_to_copy;
 #else
 #error neither big nor little endian
 #endif
-	right_shift_bits = 64 - nr_bits;
+	right_shift_bits = 128 - nr_bits;

-	print_num <<= left_shift_bits;
-	print_num >>= right_shift_bits;
-	if (is_plain_text)
-		jsonw_printf(jw, "0x%llx", print_num);
-	else
-		jsonw_printf(jw, "%llu", print_num);
+	btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
+	btf_int128_print(jw, print_num, is_plain_text);
 }


@@ -113,7 +182,7 @@ static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset,
 	int total_bits_offset;

 	/* bits_offset is at most 7.
-	 * BTF_INT_OFFSET() cannot exceed 64 bits.
+	 * BTF_INT_OFFSET() cannot exceed 128 bits.
 	 */
 	total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
 	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
@@ -139,6 +208,11 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
 		return 0;
 	}

+	if (nr_bits == 128) {
+		btf_int128_print(jw, data, is_plain_text);
+		return 0;
+	}
+
 	switch (BTF_INT_ENCODING(*int_type)) {
 	case 0:
 		if (BTF_INT_BITS(*int_type) == 64)

--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c