在 Ubuntu 22.04 上,我设置了以下iptables
规则:
iptables -I OUTPUT -d 192.168.0.0/16 -j LOG --log-prefix "CHECK1 "
iptables -I FORWARD -d 192.168.0.0/16 -j LOG --log-prefix "CHECK2 "
为了验证我的设置是否正确,我192.168.0.0/16
在浏览器中导航到 website.com(位于子网中),并看到一条CHECK1
消息显示在/var/log/kern.log
.
然后,我运行了一个 C 程序,该程序创建了tun
一个 IP 地址为 的接口172.30.0.1
,并从中发出了一个手工制作的 TCP-SYN 数据包。该数据包的源地址为172.30.0.1
,目标地址为192.168.255.8
(website.com 的地址)。它出现在 Wireshark 中。但是,在 中没有相应的日志消息/var/log/kern.log
。
这个数据包发生了什么?
这是iptables-save
:
*filter
:INPUT ACCEPT [0:0]
:FORWARD DROP [0:0]
:OUTPUT ACCEPT [0:0]
:DOCKER - [0:0]
:DOCKER-ISOLATION-STAGE-1 - [0:0]
:DOCKER-ISOLATION-STAGE-2 - [0:0]
:DOCKER-USER - [0:0]
-A FORWARD -d 192.168.0.0/16 -j LOG --log-prefix "CHECK2 "
-A FORWARD -j DOCKER-USER
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A OUTPUT -d 192.168.0.0/16 -j LOG --log-prefix "CHECK1 "
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -j RETURN
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
-A DOCKER-ISOLATION-STAGE-2 -j RETURN
-A DOCKER-USER -j RETURN
COMMIT
*nat
:PREROUTING ACCEPT [0:0]
:INPUT ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
:POSTROUTING ACCEPT [0:0]
:DOCKER - [0:0]
-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER
-A OUTPUT ! -D 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER
-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE
-A DOCKER -i docker0 -j RETURN
COMMIT
这是 C 代码(为了简洁起见,我省略了错误检查):
#include <arpa/inet.h>
#include <fcntl.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <netinet/in.h>
#include <stdio.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
static int
tunAlloc(void)
{
int fd;
struct ifreq ifr = {.ifr_name = "tun0", .ifr_flags = IFF_TUN | IFF_NO_PI};
fd = open("/dev/net/tun", O_RDWR);
ioctl(fd, TUNSETIFF, (void*)&ifr);
ioctl(fd, TUNSETOWNER, geteuid());
return fd;
}
static void
bringInterfaceUp(void)
{
int sock;
struct sockaddr_in addr = {.sin_family = AF_INET};
struct ifreq ifr = {.ifr_name = "tun0"};
inet_aton("172.30.0.1", &addr.sin_addr);
memcpy(&ifr.ifr_addr, &addr, sizeof(struct sockaddr));
sock = socket(AF_INET, SOCK_DGRAM, 0);
ioctl(sock, SIOCSIFADDR, &ifr);
ioctl(sock, SIOCGIFFLAGS, &ifr);
ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
ioctl(sock, SIOCSIFFLAGS, &ifr);
close(sock);
}
static void
emitPacket(int tap_fd)
{
unsigned char packet[] = {0x45, 0x00, 0x00, 0x3c, 0xd8, 0x6f, 0x40, 0x00, 0x3f, 0x06, 0x08, 0x91,
172, 30, 0, 1, 192, 168, 255, 8, 0xa2, 0x9a, 0x27, 0x11,
0x80, 0x0b, 0x63, 0x79, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x02, 0xfa, 0xf0,
0x89, 0xd8, 0x00, 0x00, 0x02, 0x04, 0x05, 0xb4, 0x04, 0x02, 0x08, 0x0a,
0x5b, 0x76, 0x5f, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x03, 0x07};
write(tap_fd, packet, sizeof(packet));
}
int
main()
{
int tap_fd;
tap_fd = tunAlloc();
bringInterfaceUp();
emitPacket(tap_fd);
close(tap_fd);
return 0;
}
答案1
使用您的示例代码,我看到了您所描述的相同问题。
“小包,你在哪儿?” (普沃鲁)是一个很好的诊断工具,当数据包似乎神秘消失时,当我运行:
docker run --privileged --rm -t --pid=host \
-v /sys/kernel/debug/:/sys/kernel/debug/ \
cilium/pwru --filter-port 10001
当您的代码发出数据包时,我看到以下内容:
0xffff8cd640b80500 5 [sendpacket] netif_receive_skb
0xffff8cd640b80500 5 [sendpacket] skb_defer_rx_timestamp
0xffff8cd640b80500 5 [sendpacket] __netif_receive_skb
0xffff8cd640b80500 5 [sendpacket] __netif_receive_skb_one_core
0xffff8cd640b80500 5 [sendpacket] ip_rcv
0xffff8cd640b80500 5 [sendpacket] ip_rcv_core
0xffff8cd640b80500 5 [sendpacket] kfree_skb_reason(SKB_DROP_REASON_IP_CSUM)
0xffff8cd640b80500 5 [sendpacket] skb_release_head_state
0xffff8cd640b80500 5 [sendpacket] sock_wfree
0xffff8cd640b80500 5 [sendpacket] skb_release_data
0xffff8cd640b80500 5 [sendpacket] skb_free_head
0xffff8cd640b80500 5 [sendpacket] kfree_skbmem
这表明数据包已被丢弃,因为它包含不正确的校验和。
如果我们使用wireshark检查数据包捕获,它会告诉我们正确的校验和。解决这些问题可以让我们:
static void emitPacket(int tap_fd) {
│ unsigned char packet[] = {
│ │ 0x45, 0x00, 0x00, 0x3c, 0xd8, 0x6f, 0x40, 0x00, 0x3f, 0x06, 0xf7, 0x7b,
│ │ 172, 30, 0, 1, 192, 168, 255, 8, 0xa2, 0x9a, 0x27, 0x11,
│ │ 0x80, 0x0b, 0x63, 0x79, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x02, 0xfa, 0xf0,
│ │ 0x78, 0xc3, 0x00, 0x00, 0x02, 0x04, 0x05, 0xb4, 0x04, 0x02, 0x08, 0x0a,
│ │ 0x5b, 0x76, 0x5f, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x03, 0x07};
│
│ write(tap_fd, packet, sizeof(packet));
}
但这在路由逻辑中失败了:
0xffff8cd8bca97100 0 [sendpacket] netif_receive_skb
0xffff8cd8bca97100 0 [sendpacket] skb_defer_rx_timestamp
0xffff8cd8bca97100 0 [sendpacket] __netif_receive_skb
0xffff8cd8bca97100 0 [sendpacket] __netif_receive_skb_one_core
0xffff8cd8bca97100 0 [sendpacket] ip_rcv
0xffff8cd8bca97100 0 [sendpacket] ip_rcv_core
0xffff8cd8bca97100 0 [sendpacket] sock_wfree
0xffff8cd8bca97100 0 [sendpacket] nf_hook_slow
0xffff8cd8bca97100 0 [sendpacket] nf_checksum
0xffff8cd8bca97100 0 [sendpacket] nf_ip_checksum
0xffff8cd8bca97100 0 [sendpacket] __skb_checksum_complete
0xffff8cd8bca97100 0 [sendpacket] tcp_v4_early_demux
0xffff8cd8bca97100 0 [sendpacket] ip_route_input_noref
0xffff8cd8bca97100 0 [sendpacket] ip_route_input_slow
0xffff8cd8bca97100 0 [sendpacket] fib_validate_source
0xffff8cd8bca97100 0 [sendpacket] __fib_validate_source
0xffff8cd8bca97100 0 [sendpacket] ip_handle_martian_source
0xffff8cd8bca97101 0 [sendpacket] kfree_skb_reason(SKB_DROP_REASON_NOT_SPECIFIED)
0xffff8cd8bca97100 0 [sendpacket] skb_release_head_state
0xffff8cd8bca97100 0 [sendpacket] skb_release_data
0xffff8cd8bca97100 0 [sendpacket] skb_free_head
0xffff8cd8bca97100 0 [sendpacket] kfree_skbmem
事实上如果你已经log_martians
在系统上启用了此功能,你会看到:
Feb 14 12:14:03 madhatter kernel: IPv4: martian source 192.168.255.8 from
172.30.0.1, on dev tun0
...我们得到那错误,因为有一个数据包进入与该接口具有相同地址的接口,这是一个问题(我们永远无法正确响应它)。
tun0
如果我们修改您的代码以 (a) 设置与数据包中不同的地址,并且 (b) 使用/24
网络掩码,以便我们获得到适当网络的自动路由:
static void bringInterfaceUp(void) {
int sock;
struct sockaddr_in addr = {.sin_family = AF_INET};
struct ifreq ifr = {.ifr_name = "tun0"};
inet_aton("172.30.0.10", &addr.sin_addr);
memcpy(&ifr.ifr_addr, &addr, sizeof(struct sockaddr));
sock = socket(AF_INET, SOCK_DGRAM, 0);
must(ioctl(sock, SIOCSIFADDR, &ifr));
/*
I don't know if this is entirely kosher -- it's the result of a quick
glance over the netdevice(7) man page -- but it seems to work.
*/
inet_aton("255.255.255.0", &addr.sin_addr);
memcpy(&ifr.ifr_addr, &addr, sizeof(struct sockaddr));
must(ioctl(sock, SIOCSIFNETMASK, &ifr));
must(ioctl(sock, SIOCGIFFLAGS, &ifr));
ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
must(ioctl(sock, SIOCSIFFLAGS, &ifr));
close(sock);
}
我们现在看到在 上发出的数据包eth0
:
$ sudo tcpdump -i any -nn port 10001
tcpdump: data link type LINUX_SLL2
dropped privs to tcpdump
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
12:33:26.877514 tun0 In IP 172.30.0.1.41626 > 192.168.255.8.10001: Flags [S], seq 2148230009, win 64240, options [mss 1460,sackOK,TS val 1534484436 ecr 0,nop,wscale 7], length 0
12:33:26.877536 eth0 Out IP 172.30.0.1.41626 > 192.168.255.8.10001: Flags [S], seq 2148230009, win 64240, options [mss 1460,sackOK,TS val 1534484436 ecr 0,nop,wscale 7], length 0