b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Copyright 2013 Google Inc. |
| 4 | * Author: Willem de Bruijn (willemb@google.com) |
| 5 | * |
| 6 | * A basic test of packet socket fanout behavior. |
| 7 | * |
| 8 | * Control: |
| 9 | * - create fanout fails as expected with illegal flag combinations |
| 10 | * - join fanout fails as expected with diverging types or flags |
| 11 | * |
| 12 | * Datapath: |
| 13 | * Open a pair of packet sockets and a pair of INET sockets, send a known |
| 14 | * number of packets across the two INET sockets and count the number of |
| 15 | * packets enqueued onto the two packet sockets. |
| 16 | * |
| 17 | * The test currently runs for |
| 18 | * - PACKET_FANOUT_HASH |
| 19 | * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER |
| 20 | * - PACKET_FANOUT_LB |
| 21 | * - PACKET_FANOUT_CPU |
| 22 | * - PACKET_FANOUT_ROLLOVER |
| 23 | * - PACKET_FANOUT_CBPF |
| 24 | * - PACKET_FANOUT_EBPF |
| 25 | * |
| 26 | * Todo: |
| 27 | * - functionality: PACKET_FANOUT_FLAG_DEFRAG |
| 28 | */ |
| 29 | |
| 30 | #define _GNU_SOURCE /* for sched_setaffinity */ |
| 31 | |
| 32 | #include <arpa/inet.h> |
| 33 | #include <errno.h> |
| 34 | #include <fcntl.h> |
| 35 | #include <linux/unistd.h> /* for __NR_bpf */ |
| 36 | #include <linux/filter.h> |
| 37 | #include <linux/bpf.h> |
| 38 | #include <linux/if_packet.h> |
| 39 | #include <net/if.h> |
| 40 | #include <net/ethernet.h> |
| 41 | #include <netinet/ip.h> |
| 42 | #include <netinet/udp.h> |
| 43 | #include <poll.h> |
| 44 | #include <sched.h> |
| 45 | #include <stdint.h> |
| 46 | #include <stdio.h> |
| 47 | #include <stdlib.h> |
| 48 | #include <string.h> |
| 49 | #include <sys/mman.h> |
| 50 | #include <sys/socket.h> |
| 51 | #include <sys/stat.h> |
| 52 | #include <sys/types.h> |
| 53 | #include <unistd.h> |
| 54 | |
| 55 | #include "psock_lib.h" |
| 56 | |
| 57 | #define RING_NUM_FRAMES 20 |
| 58 | |
| 59 | /* Open a socket in a given fanout mode. |
| 60 | * @return -1 if mode is bad, a valid socket otherwise */ |
| 61 | static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) |
| 62 | { |
| 63 | struct sockaddr_ll addr = {0}; |
| 64 | int fd, val; |
| 65 | |
| 66 | fd = socket(PF_PACKET, SOCK_RAW, 0); |
| 67 | if (fd < 0) { |
| 68 | perror("socket packet"); |
| 69 | exit(1); |
| 70 | } |
| 71 | |
| 72 | pair_udp_setfilter(fd); |
| 73 | |
| 74 | addr.sll_family = AF_PACKET; |
| 75 | addr.sll_protocol = htons(ETH_P_IP); |
| 76 | addr.sll_ifindex = if_nametoindex("lo"); |
| 77 | if (addr.sll_ifindex == 0) { |
| 78 | perror("if_nametoindex"); |
| 79 | exit(1); |
| 80 | } |
| 81 | if (bind(fd, (void *) &addr, sizeof(addr))) { |
| 82 | perror("bind packet"); |
| 83 | exit(1); |
| 84 | } |
| 85 | |
| 86 | val = (((int) typeflags) << 16) | group_id; |
| 87 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { |
| 88 | if (close(fd)) { |
| 89 | perror("close packet"); |
| 90 | exit(1); |
| 91 | } |
| 92 | return -1; |
| 93 | } |
| 94 | |
| 95 | return fd; |
| 96 | } |
| 97 | |
| 98 | static void sock_fanout_set_cbpf(int fd) |
| 99 | { |
| 100 | struct sock_filter bpf_filter[] = { |
| 101 | BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */ |
| 102 | BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */ |
| 103 | }; |
| 104 | struct sock_fprog bpf_prog; |
| 105 | |
| 106 | bpf_prog.filter = bpf_filter; |
| 107 | bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); |
| 108 | |
| 109 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, |
| 110 | sizeof(bpf_prog))) { |
| 111 | perror("fanout data cbpf"); |
| 112 | exit(1); |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) |
| 117 | { |
| 118 | int sockopt; |
| 119 | socklen_t sockopt_len = sizeof(sockopt); |
| 120 | |
| 121 | if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT, |
| 122 | &sockopt, &sockopt_len)) { |
| 123 | perror("failed to getsockopt"); |
| 124 | exit(1); |
| 125 | } |
| 126 | *typeflags = sockopt >> 16; |
| 127 | *group_id = sockopt & 0xfffff; |
| 128 | } |
| 129 | |
| 130 | static void sock_fanout_set_ebpf(int fd) |
| 131 | { |
| 132 | static char log_buf[65536]; |
| 133 | |
| 134 | const int len_off = __builtin_offsetof(struct __sk_buff, len); |
| 135 | struct bpf_insn prog[] = { |
| 136 | { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, |
| 137 | { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 }, |
| 138 | { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN }, |
| 139 | { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 }, |
| 140 | { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 }, |
| 141 | { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR }, |
| 142 | { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 }, |
| 143 | { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, |
| 144 | { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } |
| 145 | }; |
| 146 | union bpf_attr attr; |
| 147 | int pfd; |
| 148 | |
| 149 | memset(&attr, 0, sizeof(attr)); |
| 150 | attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; |
| 151 | attr.insns = (unsigned long) prog; |
| 152 | attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); |
| 153 | attr.license = (unsigned long) "GPL"; |
| 154 | attr.log_buf = (unsigned long) log_buf, |
| 155 | attr.log_size = sizeof(log_buf), |
| 156 | attr.log_level = 1, |
| 157 | |
| 158 | pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); |
| 159 | if (pfd < 0) { |
| 160 | perror("bpf"); |
| 161 | fprintf(stderr, "bpf verifier:\n%s\n", log_buf); |
| 162 | exit(1); |
| 163 | } |
| 164 | |
| 165 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { |
| 166 | perror("fanout data ebpf"); |
| 167 | exit(1); |
| 168 | } |
| 169 | |
| 170 | if (close(pfd)) { |
| 171 | perror("close ebpf"); |
| 172 | exit(1); |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | static char *sock_fanout_open_ring(int fd) |
| 177 | { |
| 178 | struct tpacket_req req = { |
| 179 | .tp_block_size = getpagesize(), |
| 180 | .tp_frame_size = getpagesize(), |
| 181 | .tp_block_nr = RING_NUM_FRAMES, |
| 182 | .tp_frame_nr = RING_NUM_FRAMES, |
| 183 | }; |
| 184 | char *ring; |
| 185 | int val = TPACKET_V2; |
| 186 | |
| 187 | if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val, |
| 188 | sizeof(val))) { |
| 189 | perror("packetsock ring setsockopt version"); |
| 190 | exit(1); |
| 191 | } |
| 192 | if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, |
| 193 | sizeof(req))) { |
| 194 | perror("packetsock ring setsockopt"); |
| 195 | exit(1); |
| 196 | } |
| 197 | |
| 198 | ring = mmap(0, req.tp_block_size * req.tp_block_nr, |
| 199 | PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |
| 200 | if (ring == MAP_FAILED) { |
| 201 | perror("packetsock ring mmap"); |
| 202 | exit(1); |
| 203 | } |
| 204 | |
| 205 | return ring; |
| 206 | } |
| 207 | |
| 208 | static int sock_fanout_read_ring(int fd, void *ring) |
| 209 | { |
| 210 | struct tpacket2_hdr *header = ring; |
| 211 | int count = 0; |
| 212 | |
| 213 | while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) { |
| 214 | count++; |
| 215 | header = ring + (count * getpagesize()); |
| 216 | } |
| 217 | |
| 218 | return count; |
| 219 | } |
| 220 | |
| 221 | static int sock_fanout_read(int fds[], char *rings[], const int expect[]) |
| 222 | { |
| 223 | int ret[2]; |
| 224 | |
| 225 | ret[0] = sock_fanout_read_ring(fds[0], rings[0]); |
| 226 | ret[1] = sock_fanout_read_ring(fds[1], rings[1]); |
| 227 | |
| 228 | fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", |
| 229 | ret[0], ret[1], expect[0], expect[1]); |
| 230 | |
| 231 | if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && |
| 232 | (!(ret[0] == expect[1] && ret[1] == expect[0]))) { |
| 233 | fprintf(stderr, "warning: incorrect queue lengths\n"); |
| 234 | return 1; |
| 235 | } |
| 236 | |
| 237 | return 0; |
| 238 | } |
| 239 | |
| 240 | /* Test illegal mode + flag combination */ |
| 241 | static void test_control_single(void) |
| 242 | { |
| 243 | fprintf(stderr, "test: control single socket\n"); |
| 244 | |
| 245 | if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | |
| 246 | PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { |
| 247 | fprintf(stderr, "ERROR: opened socket with dual rollover\n"); |
| 248 | exit(1); |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | /* Test illegal group with different modes or flags */ |
| 253 | static void test_control_group(void) |
| 254 | { |
| 255 | int fds[2]; |
| 256 | |
| 257 | fprintf(stderr, "test: control multiple sockets\n"); |
| 258 | |
| 259 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); |
| 260 | if (fds[0] == -1) { |
| 261 | fprintf(stderr, "ERROR: failed to open HASH socket\n"); |
| 262 | exit(1); |
| 263 | } |
| 264 | if (sock_fanout_open(PACKET_FANOUT_HASH | |
| 265 | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { |
| 266 | fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); |
| 267 | exit(1); |
| 268 | } |
| 269 | if (sock_fanout_open(PACKET_FANOUT_HASH | |
| 270 | PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { |
| 271 | fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); |
| 272 | exit(1); |
| 273 | } |
| 274 | if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) { |
| 275 | fprintf(stderr, "ERROR: joined group with wrong mode\n"); |
| 276 | exit(1); |
| 277 | } |
| 278 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); |
| 279 | if (fds[1] == -1) { |
| 280 | fprintf(stderr, "ERROR: failed to join group\n"); |
| 281 | exit(1); |
| 282 | } |
| 283 | if (close(fds[1]) || close(fds[0])) { |
| 284 | fprintf(stderr, "ERROR: closing sockets\n"); |
| 285 | exit(1); |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | /* Test creating a unique fanout group ids */ |
| 290 | static void test_unique_fanout_group_ids(void) |
| 291 | { |
| 292 | int fds[3]; |
| 293 | uint16_t typeflags, first_group_id, second_group_id; |
| 294 | |
| 295 | fprintf(stderr, "test: unique ids\n"); |
| 296 | |
| 297 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH | |
| 298 | PACKET_FANOUT_FLAG_UNIQUEID, 0); |
| 299 | if (fds[0] == -1) { |
| 300 | fprintf(stderr, "ERROR: failed to create a unique id group.\n"); |
| 301 | exit(1); |
| 302 | } |
| 303 | |
| 304 | sock_fanout_getopts(fds[0], &typeflags, &first_group_id); |
| 305 | if (typeflags != PACKET_FANOUT_HASH) { |
| 306 | fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags); |
| 307 | exit(1); |
| 308 | } |
| 309 | |
| 310 | if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) { |
| 311 | fprintf(stderr, "ERROR: joined group with wrong type.\n"); |
| 312 | exit(1); |
| 313 | } |
| 314 | |
| 315 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id); |
| 316 | if (fds[1] == -1) { |
| 317 | fprintf(stderr, |
| 318 | "ERROR: failed to join previously created group.\n"); |
| 319 | exit(1); |
| 320 | } |
| 321 | |
| 322 | fds[2] = sock_fanout_open(PACKET_FANOUT_HASH | |
| 323 | PACKET_FANOUT_FLAG_UNIQUEID, 0); |
| 324 | if (fds[2] == -1) { |
| 325 | fprintf(stderr, |
| 326 | "ERROR: failed to create a second unique id group.\n"); |
| 327 | exit(1); |
| 328 | } |
| 329 | |
| 330 | sock_fanout_getopts(fds[2], &typeflags, &second_group_id); |
| 331 | if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID, |
| 332 | second_group_id) != -1) { |
| 333 | fprintf(stderr, |
| 334 | "ERROR: specified a group id when requesting unique id\n"); |
| 335 | exit(1); |
| 336 | } |
| 337 | |
| 338 | if (close(fds[0]) || close(fds[1]) || close(fds[2])) { |
| 339 | fprintf(stderr, "ERROR: closing sockets\n"); |
| 340 | exit(1); |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | static int test_datapath(uint16_t typeflags, int port_off, |
| 345 | const int expect1[], const int expect2[]) |
| 346 | { |
| 347 | const int expect0[] = { 0, 0 }; |
| 348 | char *rings[2]; |
| 349 | uint8_t type = typeflags & 0xFF; |
| 350 | int fds[2], fds_udp[2][2], ret; |
| 351 | |
| 352 | fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n", |
| 353 | typeflags, (uint16_t)PORT_BASE, |
| 354 | (uint16_t)(PORT_BASE + port_off)); |
| 355 | |
| 356 | fds[0] = sock_fanout_open(typeflags, 0); |
| 357 | fds[1] = sock_fanout_open(typeflags, 0); |
| 358 | if (fds[0] == -1 || fds[1] == -1) { |
| 359 | fprintf(stderr, "ERROR: failed open\n"); |
| 360 | exit(1); |
| 361 | } |
| 362 | if (type == PACKET_FANOUT_CBPF) |
| 363 | sock_fanout_set_cbpf(fds[0]); |
| 364 | else if (type == PACKET_FANOUT_EBPF) |
| 365 | sock_fanout_set_ebpf(fds[0]); |
| 366 | |
| 367 | rings[0] = sock_fanout_open_ring(fds[0]); |
| 368 | rings[1] = sock_fanout_open_ring(fds[1]); |
| 369 | pair_udp_open(fds_udp[0], PORT_BASE); |
| 370 | pair_udp_open(fds_udp[1], PORT_BASE + port_off); |
| 371 | sock_fanout_read(fds, rings, expect0); |
| 372 | |
| 373 | /* Send data, but not enough to overflow a queue */ |
| 374 | pair_udp_send(fds_udp[0], 15); |
| 375 | pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1); |
| 376 | ret = sock_fanout_read(fds, rings, expect1); |
| 377 | |
| 378 | /* Send more data, overflow the queue */ |
| 379 | pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1); |
| 380 | /* TODO: ensure consistent order between expect1 and expect2 */ |
| 381 | ret |= sock_fanout_read(fds, rings, expect2); |
| 382 | |
| 383 | if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || |
| 384 | munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { |
| 385 | fprintf(stderr, "close rings\n"); |
| 386 | exit(1); |
| 387 | } |
| 388 | if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || |
| 389 | close(fds_udp[0][1]) || close(fds_udp[0][0]) || |
| 390 | close(fds[1]) || close(fds[0])) { |
| 391 | fprintf(stderr, "close datapath\n"); |
| 392 | exit(1); |
| 393 | } |
| 394 | |
| 395 | return ret; |
| 396 | } |
| 397 | |
| 398 | static int set_cpuaffinity(int cpuid) |
| 399 | { |
| 400 | cpu_set_t mask; |
| 401 | |
| 402 | CPU_ZERO(&mask); |
| 403 | CPU_SET(cpuid, &mask); |
| 404 | if (sched_setaffinity(0, sizeof(mask), &mask)) { |
| 405 | if (errno != EINVAL) { |
| 406 | fprintf(stderr, "setaffinity %d\n", cpuid); |
| 407 | exit(1); |
| 408 | } |
| 409 | return 1; |
| 410 | } |
| 411 | |
| 412 | return 0; |
| 413 | } |
| 414 | |
| 415 | int main(int argc, char **argv) |
| 416 | { |
| 417 | const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; |
| 418 | const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; |
| 419 | const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; |
| 420 | const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; |
| 421 | const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; |
| 422 | const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; |
| 423 | const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; |
| 424 | const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; |
| 425 | int port_off = 2, tries = 20, ret; |
| 426 | |
| 427 | test_control_single(); |
| 428 | test_control_group(); |
| 429 | test_unique_fanout_group_ids(); |
| 430 | |
| 431 | /* find a set of ports that do not collide onto the same socket */ |
| 432 | ret = test_datapath(PACKET_FANOUT_HASH, port_off, |
| 433 | expect_hash[0], expect_hash[1]); |
| 434 | while (ret) { |
| 435 | fprintf(stderr, "info: trying alternate ports (%d)\n", tries); |
| 436 | ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, |
| 437 | expect_hash[0], expect_hash[1]); |
| 438 | if (!--tries) { |
| 439 | fprintf(stderr, "too many collisions\n"); |
| 440 | return 1; |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, |
| 445 | port_off, expect_hash_rb[0], expect_hash_rb[1]); |
| 446 | ret |= test_datapath(PACKET_FANOUT_LB, |
| 447 | port_off, expect_lb[0], expect_lb[1]); |
| 448 | ret |= test_datapath(PACKET_FANOUT_ROLLOVER, |
| 449 | port_off, expect_rb[0], expect_rb[1]); |
| 450 | |
| 451 | ret |= test_datapath(PACKET_FANOUT_CBPF, |
| 452 | port_off, expect_bpf[0], expect_bpf[1]); |
| 453 | ret |= test_datapath(PACKET_FANOUT_EBPF, |
| 454 | port_off, expect_bpf[0], expect_bpf[1]); |
| 455 | |
| 456 | set_cpuaffinity(0); |
| 457 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, |
| 458 | expect_cpu0[0], expect_cpu0[1]); |
| 459 | if (!set_cpuaffinity(1)) |
| 460 | /* TODO: test that choice alternates with previous */ |
| 461 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, |
| 462 | expect_cpu1[0], expect_cpu1[1]); |
| 463 | |
| 464 | ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off, |
| 465 | expect_uniqueid[0], expect_uniqueid[1]); |
| 466 | |
| 467 | if (ret) |
| 468 | return 1; |
| 469 | |
| 470 | printf("OK. All tests passed\n"); |
| 471 | return 0; |
| 472 | } |