Commit f52403b6 authored by Kui-Feng Lee's avatar Kui-Feng Lee Committed by Martin KaFai Lau
Browse files

selftests/bpf: Add traffic monitor functions.



Add functions that capture packets and print log in the background. They
are supposed to be used for debugging flaky network test cases. A monitored
test case should call traffic_monitor_start() to start a thread to capture
packets in the background for a given namespace and call
traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
the later patches.)

    lo      In  IPv4 127.0.0.1:40265 > 127.0.0.1:55907: TCP, length 68, SYN
    lo      In  IPv4 127.0.0.1:55907 > 127.0.0.1:40265: TCP, length 60, SYN, ACK
    lo      In  IPv4 127.0.0.1:40265 > 127.0.0.1:55907: TCP, length 60, ACK
    lo      In  IPv4 127.0.0.1:55907 > 127.0.0.1:40265: TCP, length 52, ACK
    lo      In  IPv4 127.0.0.1:40265 > 127.0.0.1:55907: TCP, length 52, FIN, ACK
    lo      In  IPv4 127.0.0.1:55907 > 127.0.0.1:40265: TCP, length 52, RST, ACK
    Packet file: packets-2173-86-select_reuseport:sockhash_IPv4_TCP_LOOPBACK_test_detach_bpf-test.log
    #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK test_detach_bpf:OK

The above is the output of an example. It shows the packets of a connection
and the name of the file that contains captured packets in the directory
/tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.

This feature only works if libpcap is available. (Could be found by pkg-config)

Acked-by: default avatarStanislav Fomichev <sdf@fomichev.me>
Signed-off-by: default avatarKui-Feng Lee <thinker.li@gmail.com>
Link: https://lore.kernel.org/r/20240815053254.470944-2-thinker.li@gmail.com


Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parent b97ce547
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -41,6 +41,10 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \
LDFLAGS += $(SAN_LDFLAGS)
LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread

LDLIBS += $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null)
CFLAGS += $(shell $(PKG_CONFIG) --cflags libpcap 2>/dev/null)
CFLAGS += $(shell $(PKG_CONFIG) --exists libpcap 2>/dev/null && echo "-DTRAFFIC_MONITOR=1")

# The following tests perform type punning and they may break strict
# aliasing rules, which are exploited by both GCC and clang by default
# while optimizing.  This can lead to broken programs.
+454 −0
Original line number Diff line number Diff line
@@ -11,17 +11,31 @@
#include <arpa/inet.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/eventfd.h>

#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/limits.h>

#include <linux/ip.h>
#include <linux/udp.h>
#include <netinet/tcp.h>
#include <net/if.h>

#include "bpf_util.h"
#include "network_helpers.h"
#include "test_progs.h"

#ifdef TRAFFIC_MONITOR
/* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
#include <pcap/pcap.h>
#include <pcap/dlt.h>
#endif

#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
#endif
@@ -660,3 +674,443 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)

	return err;
}

#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx {
	pcap_t *pcap;
	pcap_dumper_t *dumper;
	pthread_t thread;
	int wake_fd;

	volatile bool done;
	char pkt_fname[PATH_MAX];
	int pcap_fd;
};

/* Is this packet captured with a Ethernet protocol type? */
static bool is_ethernet(const u_char *packet)
{
	u16 arphdr_type;

	memcpy(&arphdr_type, packet + 8, 2);
	arphdr_type = ntohs(arphdr_type);

	/* Except the following cases, the protocol type contains the
	 * Ethernet protocol type for the packet.
	 *
	 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
	 */
	switch (arphdr_type) {
	case 770: /* ARPHRD_FRAD */
	case 778: /* ARPHDR_IPGRE */
	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
		printf("Packet captured: arphdr_type=%d\n", arphdr_type);
		return false;
	}
	return true;
}

static const char * const pkt_types[] = {
	"In",
	"B",			/* Broadcast */
	"M",			/* Multicast */
	"C",			/* Captured with the promiscuous mode */
	"Out",
};

static const char *pkt_type_str(u16 pkt_type)
{
	if (pkt_type < ARRAY_SIZE(pkt_types))
		return pkt_types[pkt_type];
	return "Unknown";
}

/* Show the information of the transport layer in the packet */
static void show_transport(const u_char *packet, u16 len, u32 ifindex,
			   const char *src_addr, const char *dst_addr,
			   u16 proto, bool ipv6, u8 pkt_type)
{
	char *ifname, _ifname[IF_NAMESIZE];
	const char *transport_str;
	u16 src_port, dst_port;
	struct udphdr *udp;
	struct tcphdr *tcp;

	ifname = if_indextoname(ifindex, _ifname);
	if (!ifname) {
		snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
		ifname = _ifname;
	}

	if (proto == IPPROTO_UDP) {
		udp = (struct udphdr *)packet;
		src_port = ntohs(udp->source);
		dst_port = ntohs(udp->dest);
		transport_str = "UDP";
	} else if (proto == IPPROTO_TCP) {
		tcp = (struct tcphdr *)packet;
		src_port = ntohs(tcp->source);
		dst_port = ntohs(tcp->dest);
		transport_str = "TCP";
	} else if (proto == IPPROTO_ICMP) {
		printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
		       packet[0], packet[1]);
		return;
	} else if (proto == IPPROTO_ICMPV6) {
		printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
		       packet[0], packet[1]);
		return;
	} else {
		printf("%-7s %-3s %s %s > %s: protocol %d\n",
		       ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
		       src_addr, dst_addr, proto);
		return;
	}

	/* TCP or UDP*/

	flockfile(stdout);
	if (ipv6)
		printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
		       dst_addr, dst_port, transport_str, len);
	else
		printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
		       dst_addr, dst_port, transport_str, len);

	if (proto == IPPROTO_TCP) {
		if (tcp->fin)
			printf(", FIN");
		if (tcp->syn)
			printf(", SYN");
		if (tcp->rst)
			printf(", RST");
		if (tcp->ack)
			printf(", ACK");
	}

	printf("\n");
	funlockfile(stdout);
}

static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
{
	char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
	struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
	const char *src, *dst;
	u_char proto;

	src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
	if (!src)
		src = "<invalid>";
	dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
	if (!dst)
		dst = "<invalid>";
	proto = pkt->nexthdr;
	show_transport(packet + sizeof(struct ipv6hdr),
		       ntohs(pkt->payload_len),
		       ifindex, src, dst, proto, true, pkt_type);
}

static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
{
	char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
	struct iphdr *pkt = (struct iphdr *)packet;
	const char *src, *dst;
	u_char proto;

	src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
	if (!src)
		src = "<invalid>";
	dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
	if (!dst)
		dst = "<invalid>";
	proto = pkt->protocol;
	show_transport(packet + sizeof(struct iphdr),
		       ntohs(pkt->tot_len),
		       ifindex, src, dst, proto, false, pkt_type);
}

static void *traffic_monitor_thread(void *arg)
{
	char *ifname, _ifname[IF_NAMESIZE];
	const u_char *packet, *payload;
	struct tmonitor_ctx *ctx = arg;
	pcap_dumper_t *dumper = ctx->dumper;
	int fd = ctx->pcap_fd, nfds, r;
	int wake_fd = ctx->wake_fd;
	struct pcap_pkthdr header;
	pcap_t *pcap = ctx->pcap;
	u32 ifindex;
	fd_set fds;
	u16 proto;
	u8 ptype;

	nfds = (fd > wake_fd ? fd : wake_fd) + 1;
	FD_ZERO(&fds);

	while (!ctx->done) {
		FD_SET(fd, &fds);
		FD_SET(wake_fd, &fds);
		r = select(nfds, &fds, NULL, NULL, NULL);
		if (!r)
			continue;
		if (r < 0) {
			if (errno == EINTR)
				continue;
			log_err("Fail to select on pcap fd and wake fd");
			break;
		}

		/* This instance of pcap is non-blocking */
		packet = pcap_next(pcap, &header);
		if (!packet)
			continue;

		/* According to the man page of pcap_dump(), first argument
		 * is the pcap_dumper_t pointer even it's argument type is
		 * u_char *.
		 */
		pcap_dump((u_char *)dumper, &header, packet);

		/* Not sure what other types of packets look like. Here, we
		 * parse only Ethernet and compatible packets.
		 */
		if (!is_ethernet(packet))
			continue;

		/* Skip SLL2 header
		 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
		 *
		 * Although the document doesn't mention that, the payload
		 * doesn't include the Ethernet header. The payload starts
		 * from the first byte of the network layer header.
		 */
		payload = packet + 20;

		memcpy(&proto, packet, 2);
		proto = ntohs(proto);
		memcpy(&ifindex, packet + 4, 4);
		ifindex = ntohl(ifindex);
		ptype = packet[10];

		if (proto == ETH_P_IPV6) {
			show_ipv6_packet(payload, ifindex, ptype);
		} else if (proto == ETH_P_IP) {
			show_ipv4_packet(payload, ifindex, ptype);
		} else {
			ifname = if_indextoname(ifindex, _ifname);
			if (!ifname) {
				snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
				ifname = _ifname;
			}

			printf("%-7s %-3s Unknown network protocol type 0x%x\n",
			       ifname, pkt_type_str(ptype), proto);
		}
	}

	return NULL;
}

/* Prepare the pcap handle to capture packets.
 *
 * This pcap is non-blocking and immediate mode is enabled to receive
 * captured packets as soon as possible.  The snaplen is set to 1024 bytes
 * to limit the size of captured content. The format of the link-layer
 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
 * technologies.
 */
static pcap_t *traffic_monitor_prepare_pcap(void)
{
	char errbuf[PCAP_ERRBUF_SIZE];
	pcap_t *pcap;
	int r;

	/* Listen on all NICs in the namespace */
	pcap = pcap_create("any", errbuf);
	if (!pcap) {
		log_err("Failed to open pcap: %s", errbuf);
		return NULL;
	}
	/* Limit the size of the packet (first N bytes) */
	r = pcap_set_snaplen(pcap, 1024);
	if (r) {
		log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
		goto error;
	}
	/* To receive packets as fast as possible */
	r = pcap_set_immediate_mode(pcap, 1);
	if (r) {
		log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
		goto error;
	}
	r = pcap_setnonblock(pcap, 1, errbuf);
	if (r) {
		log_err("Failed to set nonblock: %s", errbuf);
		goto error;
	}
	r = pcap_activate(pcap);
	if (r) {
		log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
		goto error;
	}
	/* Determine the format of the link-layer header */
	r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
	if (r) {
		log_err("Failed to set datalink: %s", pcap_geterr(pcap));
		goto error;
	}

	return pcap;
error:
	pcap_close(pcap);
	return NULL;
}

static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
{
	char *p;

	if (subtest_name)
		snprintf(buf, len, "%s__%s", test_name, subtest_name);
	else
		snprintf(buf, len, "%s", test_name);
	while ((p = strchr(buf, '/')))
		*p = '_';
	while ((p = strchr(buf, ' ')))
		*p = '_';
}

#define PCAP_DIR "/tmp/tmon_pcap"

/* Start to monitor the network traffic in the given network namespace.
 *
 * netns: the name of the network namespace to monitor. If NULL, the
 *        current network namespace is monitored.
 * test_name: the name of the running test.
 * subtest_name: the name of the running subtest if there is. It should be
 *               NULL if it is not a subtest.
 *
 * This function will start a thread to capture packets going through NICs
 * in the give network namespace.
 */
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
					   const char *subtest_name)
{
	struct nstoken *nstoken = NULL;
	struct tmonitor_ctx *ctx;
	char test_name_buf[64];
	static int tmon_seq;
	int r;

	if (netns) {
		nstoken = open_netns(netns);
		if (!nstoken)
			return NULL;
	}
	ctx = malloc(sizeof(*ctx));
	if (!ctx) {
		log_err("Failed to malloc ctx");
		goto fail_ctx;
	}
	memset(ctx, 0, sizeof(*ctx));

	encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
	snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
		 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
		 test_name_buf, netns ? netns : "unknown");

	r = mkdir(PCAP_DIR, 0755);
	if (r && errno != EEXIST) {
		log_err("Failed to create " PCAP_DIR);
		goto fail_pcap;
	}

	ctx->pcap = traffic_monitor_prepare_pcap();
	if (!ctx->pcap)
		goto fail_pcap;
	ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
	if (ctx->pcap_fd < 0) {
		log_err("Failed to get pcap fd");
		goto fail_dumper;
	}

	/* Create a packet file */
	ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
	if (!ctx->dumper) {
		log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
		goto fail_dumper;
	}

	/* Create an eventfd to wake up the monitor thread */
	ctx->wake_fd = eventfd(0, 0);
	if (ctx->wake_fd < 0) {
		log_err("Failed to create eventfd");
		goto fail_eventfd;
	}

	r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
	if (r) {
		log_err("Failed to create thread");
		goto fail;
	}

	close_netns(nstoken);

	return ctx;

fail:
	close(ctx->wake_fd);

fail_eventfd:
	pcap_dump_close(ctx->dumper);
	unlink(ctx->pkt_fname);

fail_dumper:
	pcap_close(ctx->pcap);

fail_pcap:
	free(ctx);

fail_ctx:
	close_netns(nstoken);

	return NULL;
}

static void traffic_monitor_release(struct tmonitor_ctx *ctx)
{
	pcap_close(ctx->pcap);
	pcap_dump_close(ctx->dumper);

	close(ctx->wake_fd);

	free(ctx);
}

/* Stop the network traffic monitor.
 *
 * ctx: the context returned by traffic_monitor_start()
 */
void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
	__u64 w = 1;

	if (!ctx)
		return;

	/* Stop the monitor thread */
	ctx->done = true;
	/* Wake up the background thread. */
	write(ctx->wake_fd, &w, sizeof(w));
	pthread_join(ctx->thread, NULL);

	printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);

	traffic_monitor_release(ctx);
}
#endif /* TRAFFIC_MONITOR */
+18 −0
Original line number Diff line number Diff line
@@ -136,4 +136,22 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
	return csum_fold((__u32)s);
}

struct tmonitor_ctx;

#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
					   const char *subtest_name);
void traffic_monitor_stop(struct tmonitor_ctx *ctx);
#else
static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
							 const char *subtest_name)
{
	return NULL;
}

static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
}
#endif

#endif