SO_BINDTODEVICE 後連線 local server 失敗

機器自己連自己開的 server,使用者無論是設定機器本身 IP 還是 127.0.0.1 都不行。以下是 client 連線部份的程式:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/select.h>

int main(int argc , char *argv[])
{
    int sockfd = 0;
    sockfd = socket(PF_INET , SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);

    if (sockfd == -1)
    {
        printf("Fail to create a socket.\n");
    }

	printf("socket ok\n");

    struct sockaddr_in info = {0};
    info.sin_family = AF_INET;

	info.sin_addr.s_addr = inet_addr("127.0.0.1");
    info.sin_port = htons(502);
    char *buffer = "eth1";

    if(setsockopt(sockfd, SOL_SOCKET, SO_BINDTODEVICE, buffer, strlen(buffer)))
	{
	}	

	int nFcntlFlag = 0;
    nFcntlFlag = fcntl(sockfd, F_GETFL, 0);
	nFcntlFlag |= O_NONBLOCK;
    fcntl(sockfd, F_SETFL, nFcntlFlag);

	printf("connect...\n");
    int err = connect(sockfd,(struct sockaddr *)&info, sizeof(info));
    if (err==-1 && errno == EINPROGRESS)
	{
		fd_set wset;
		struct timeval timeout = {0};

		FD_ZERO(&wset);
		FD_SET(sockfd, &wset);
		timeout.tv_sec = 1;
		timeout.tv_usec = 0;

		int ret = select(sockfd + 1, NULL, &wset, NULL, &timeout);
		if (ret > 0)
		{
			if (FD_ISSET(sockfd, &wset))
			{
				int err;
				socklen_t len = sizeof(int);
				if (0 == getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &err, &len))
				{
					if (err == 0) // connected
					{
						printf("tcp_open, connected, socket %d\n", sockfd);
					}
					else
					{
						printf("err is not zero, err %d\n", err);
					}
				}
			}
			else
			{
				printf("fd is not set\n");
			}
		}
		else
		{
			printf("select fail\n");
		}
	}
	else if (err == -1)
		printf("connect fail\n");
	else if (err == 0)
		printf("connect ok\n");

    printf("close Socket\n");
    close(sockfd);
    return 0;
}

主要是 client 會 bind 使用者指定的網路界面,這邊是 eth1。然而,無論是使用機器 IP 還是 127.0.0.1 都不能連線成功。透過 perf 觀察到的網路界面似乎都是 lo 在作用:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# perf_4.19 trace --no-syscalls --event 'net:*' ./socket
start
socket ok
connect...
sh: 1: /perf-read-vdso32: not found
     0.000 net:net_dev_queue:dev=lo skbaddr=0xffff9907f9bbcee8 len=74
     0.140 net:net_dev_start_xmit:dev=lo queue_mapping=0 skbaddr=0xffff9907f9bbcee8 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 len=74 data_len=0 network_offset=14 transport_offset_valid=1 transport_offset=34 tx_flags=0 gso_size=0 gso_segs=1 gso_type=0x1
     0.164 net:netif_rx_entry:dev=lo napi_id=0x2 queue_mapping=0 skbaddr=0xffff9907f9bbcee8 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 hash=0xfca661f3 l4_hash=1 len=60 data_len=0 truesize=1280 mac_header_valid=1 mac_header=-14 nr_frags=0 gso_size=0 gso_type=0x1
     0.186 net:netif_rx:dev=lo skbaddr=0xffff9907f9bbcee8 len=60
     0.208 net:net_dev_xmit:dev=lo skbaddr=0xffff9907f9bbcee8 len=74 rc=0
root@RootfsDebian:~# select ret 0
select fail
close Socket

如果 client 程式改成不 bind 就連線成功。perf 的結果如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# perf_4.19 trace --no-syscalls --event 'net:*' ./socket
start
socket ok
connect...
select ret 1
tcp_open, connected, socket 3
close Socket
sh: 1: /perf-read-vdso32: not found
     0.000 net:net_dev_queue:dev=lo skbaddr=0xffff9907f83a1ee8 len=74
     1.436 net:net_dev_start_xmit:dev=lo queue_mapping=0 skbaddr=0xffff9907f83a1ee8 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 len=74 data_len=0 network_offset=14 transport_offset_valid=1 transport_offset=34 tx_flags=0 gso_size=0 gso_segs=1 gso_type=0x1
     1.486 net:netif_rx_entry:dev=lo napi_id=0x2 queue_mapping=0 skbaddr=0xffff9907f83a1ee8 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 hash=0xd4094cf6 l4_hash=1 len=60 data_len=0 truesize=1280 mac_header_valid=1 mac_header=-14 nr_frags=0 gso_size=0 gso_type=0x1
     1.508 net:netif_rx:dev=lo skbaddr=0xffff9907f83a1ee8 len=60
     1.529 net:net_dev_xmit:dev=lo skbaddr=0xffff9907f83a1ee8 len=74 rc=0
     1.630 net:net_dev_queue:dev=lo skbaddr=0xffff9907f5eb8600 len=66
     1.653 net:net_dev_start_xmit:dev=lo queue_mapping=0 skbaddr=0xffff9907f5eb8600 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 len=66 data_len=0 network_offset=14 transport_offset_valid=1 transport_offset=34 tx_flags=0 gso_size=0 gso_segs=1 gso_type=0x1
     1.673 net:netif_rx_entry:dev=lo napi_id=0x2 queue_mapping=0 skbaddr=0xffff9907f5eb8600 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 hash=0xd4094cf6 l4_hash=1 len=52 data_len=0 truesize=2 mac_header_valid=1 mac_header=-14 nr_frags=0 gso_size=0 gso_type=0x1
     1.692 net:netif_rx:dev=lo skbaddr=0xffff9907f5eb8600 len=52
     1.712 net:net_dev_xmit:dev=lo skbaddr=0xffff9907f5eb8600 len=66 rc=0
     1.950 net:net_dev_queue:dev=lo skbaddr=0xffff9907f83a1ee8 len=66
     1.977 net:net_dev_start_xmit:dev=lo queue_mapping=0 skbaddr=0xffff9907f83a1ee8 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 len=66 data_len=0 network_offset=14 transport_offset_valid=1 transport_offset=34 tx_flags=0 gso_size=0 gso_segs=1 gso_type=0x1
     1.998 net:netif_rx_entry:dev=lo napi_id=0x2 queue_mapping=0 skbaddr=0xffff9907f83a1ee8 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=3 hash=0xd4094cf6 l4_hash=1 len=52 data_len=0 truesize=1280 mac_header_valid=1 mac_header=-14 nr_frags=0 gso_size=0 gso_type=0x1
     2.016 net:netif_rx:dev=lo skbaddr=0xffff9907f83a1ee8 len=52
     2.039 net:net_dev_xmit:dev=lo skbaddr=0xffff9907f83a1ee8 len=66 rc=0

實際原理其實有點模糊,感覺系統知道是自己機器後,固定由 lo 處理。但我們又 bind 要由 eth1 處理,就卡住了。

或許處理方式是判斷到是 127.0.0.1 或者自己機器 IP 時,不要做 bind。

留言

熱門文章