`
simohayha
  • 浏览: 1386527 次
  • 性别: Icon_minigender_1
  • 来自: 火星
社区版块
存档分类
最新评论

tcp connection setup的实现(三)

阅读更多
先来看下accept的实现.

其实accept的作用很简单,就是从accept队列中取出三次握手完成的socket,并将它关联到vfs上(其实操作和调用sys_socket时新建一个socket类似).然后返回.这里还有个要注意的,如果这个传递给accept的socket是非阻塞的话,就算accept队列为空,也会直接返回,而是阻塞的话就会休眠掉,等待accept队列有数据后唤醒他.

接下来我们就来看它的实现,accept对应的系统调用是 sys_accept,而他则会调用do_accept,因此我们直接来看do_accept:


long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
	       int __user *upeer_addrlen, int flags)
{
	struct socket *sock, *newsock;
	struct file *newfile;
	int err, len, newfd, fput_needed;
	struct sockaddr_storage address;
.............................................
///这个函数前面已经分析过了,也就是通过fd,得到相应的socket.
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;

	err = -ENFILE;
///新建一个socket,也就是这个函数将要返回的socket.这里注意我们得到的是一个socket,而不是sock.下面会解释为什么这么做.
	if (!(newsock = sock_alloc()))
		goto out_put;

	newsock->type = sock->type;
	newsock->ops = sock->ops;

	/*
	 * We don't need try_module_get here, as the listening socket (sock)
	 * has the protocol module (sock->ops->owner) held.
	 */
	__module_get(newsock->ops->owner);
///找到一个新的可用的文件句柄,以及file结构.是为了与刚才新建的socket关联起来.
	newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
	if (unlikely(newfd < 0)) {
		err = newfd;
		sock_release(newsock);
		goto out_put;
	}
///将新的socket和file关联起来.(这里所做的和我们第一篇所分析的信件socket的步骤是一样的,不理解的,可以去看我前面的blog
	err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
	if (err < 0)
		goto out_fd_simple;

	err = security_socket_accept(sock, newsock);
	if (err)
		goto out_fd;
///调用inet_accept
	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
	if (err < 0)
		goto out_fd;
///这里也就是取得accept到的句柄的源地址.也就是填充传递进来的upeer_sockaddr.
	if (upeer_sockaddr) {
		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
					  &len, 2) < 0) {
			err = -ECONNABORTED;
			goto out_fd;
		}
		err = move_addr_to_user((struct sockaddr *)&address,
					len, upeer_sockaddr, upeer_addrlen);
		if (err < 0)
			goto out_fd;
	}

	/* File flags are not inherited via accept() unlike another OSes. */
///最终将新的file结构和fd关联起来,其实也就是最终将这个fd关联到当前进程的files中.
	fd_install(newfd, newfile);
	err = newfd;

	security_socket_post_accept(sock, newsock);

out_put:
///文件描述符的引用计数加一.
	fput_light(sock->file, fput_needed);
out:
///返回句柄.
	return err;
.......................................
}


可以看到流程很简单,最终的实现都集中在inet_accept中了.而inet_accept主要做的就是

1 调用inet_csk_accept来进行对accept队列的操作.它会返回取得的sock.

2 将从inet_csk_accept返回的sock链接到传递进来的(也就是在do_accept中new的socket)中.这里就知道我们上面为什么只需要new一个socket而不是sock了.因为sock我们是直接从accept队列中取得的.


3 设置新的socket的状态为SS_CONNECTED.

int inet_accept(struct socket *sock, struct socket *newsock, int flags)
{
	struct sock *sk1 = sock->sk;
	int err = -EINVAL;
///调用inet_csk_accept.
	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);

	if (!sk2)
		goto do_err;

	lock_sock(sk2);
///测试tcp连接的状态.
	WARN_ON(!((1 << sk2->sk_state) &
		  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
///将返回的sock链接到socket.
	sock_graft(sk2, newsock);
///设置状态.
	newsock->state = SS_CONNECTED;
	err = 0;
	release_sock(sk2);
do_err:
	return err;
}


inet_csk_accept就是从accept队列中取出sock然后返回.

在看他的源码之前先来看几个相关函数的实现:

首先是reqsk_queue_empty,他用来判断accept队列是否为空:

static inline int reqsk_queue_empty(struct request_sock_queue *queue)
{
	return queue->rskq_accept_head == NULL;
}


然后是reqsk_queue_get_child,他主要是从accept队列中得到一个sock:

static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
						 struct sock *parent)
{
///首先从accept队列中remove这个socket并返回.
	struct request_sock *req = reqsk_queue_remove(queue);
///取得socket.
	struct sock *child = req->sk;

	WARN_ON(child == NULL);
///这里主要是将sk_ack_backlog减一,也就是accept当前的数目减一.
	sk_acceptq_removed(parent);
	__reqsk_free(req);
	return child;
}


这里还有一个inet_csk_wait_for_connect,它是用来在accept队列为空的情况下,休眠掉一段时间 (这里每个socket都有一个等待队列的(等待队列的用法请google,我这里就不阐述了).这里是每个调用的进程都会声明一个wait队列,然后将它连接到主的socket的等待队列链表中,然后休眠,等到唤醒.

static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
///定义一个waitqueue.
	DEFINE_WAIT(wait);
	int err;
..................................................
	for (;;) {
///这里也就是把当前的进程的等待队列挂入sk中的sk_sleep队列,sk也就是主的那个socket.
		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
					  TASK_INTERRUPTIBLE);
		release_sock(sk);
///再次判断是否为空.
		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
///这个函数里面会休眠timeo时间(调用schedule让出cpu),或者被当accept队列有数据时唤醒(我们前面也有介绍这个)主的等待队列链表.,
			timeo = schedule_timeout(timeo);
		lock_sock(sk);
		err = 0;
///非空则跳出.
		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
			break;
		err = -EINVAL;
		if (sk->sk_state != TCP_LISTEN)
			break;
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
///设置错误号.
		err = -EAGAIN;
///时间为0则直接退出.
		if (!timeo)
			break;
	}
///这里也就会从sk_sleep中remove掉当前的wait队列.
	finish_wait(sk->sk_sleep, &wait);
	return err;
}


然后来看inet_csk_accept的源码,这里有个阻塞和非阻塞的问题.非阻塞的话会直接返回的,就算accept队列为空.这个时侯设置errno为-EAGAIN.

struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct sock *newsk;
	int error;

	lock_sock(sk);

	/* We need to make sure that this socket is listening,
	 * and that it has something pending.
	 */
	error = -EINVAL;
///sk也就是主socket,他的状态我们前面也讲过会一直是TCP_LISTEN.
	if (sk->sk_state != TCP_LISTEN)
		goto out_err;

///然后判断accept队列是否为空
	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
///如果是O_NONBLOCK,则返回0,此时下面的inet_csk_wait_for_connect也就会立即返回.
		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);

		/* If this is a non blocking socket don't sleep */
		error = -EAGAIN;
		if (!timeo)
			goto out_err;
///休眠或者立即返回.
		error = inet_csk_wait_for_connect(sk, timeo);
		if (error)
			goto out_err;
	}
///得到sock并从accept队列中remove.
	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
	WARN_ON(newsk->sk_state == TCP_SYN_RECV);
out:
	release_sock(sk);
	return newsk;
out_err:
	newsk = NULL;
	*err = error;
	goto out;
}


最后来大概分析下connect的实现.它的具体流程是:

1 由fd得到socket,并且将地址复制到内核空间

2 调用inet_stream_connect进行主要的处理.

这里要注意connect也有个阻塞和非阻塞的区别,阻塞的话调用inet_wait_for_connect休眠,等待握手完成,否则直接返回.


asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
			    int addrlen)
{
	struct socket *sock;
	struct sockaddr_storage address;
	int err, fput_needed;
///得到socket.
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;
///拷贝地址.
	err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
	if (err < 0)
		goto out_put;

	err =
	    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
	if (err)
		goto out_put;
///调用处理函数.
	err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
				 sock->file->f_flags);
out_put:
	fput_light(sock->file, fput_needed);
out:
	return err;
}



然后来看inet_stream_connect,他的主要工作是:

1 判断socket的状态.只有当为SS_UNCONNECTED也就是非连接状态时才调用tcp_v4_connect来进行连接处理.

2 判断tcp的状态sk_state只能为TCPF_SYN_SENT或者TCPF_SYN_RECV,才进入相关处理.

3 如果状态合适并且socket为阻塞模式则调用inet_wait_for_connect进入休眠等待握手完成,否则直接返回,并设置错误号为EINPROGRESS.


int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
			int addr_len, int flags)
{
	struct sock *sk = sock->sk;
	int err;
	long timeo;

	lock_sock(sk);
............................................

	switch (sock->state) {
	default:
		err = -EINVAL;
		goto out;
	case SS_CONNECTED:
		err = -EISCONN;
		goto out;
	case SS_CONNECTING:
		err = -EALREADY;
		/* Fall out of switch with err, set for this state */
		break;
	case SS_UNCONNECTED:
		err = -EISCONN;
		if (sk->sk_state != TCP_CLOSE)
			goto out;
///调用tcp_v4_connect来处理连接.主要是发送syn.
		err = sk->sk_prot->connect(sk, uaddr, addr_len);
		if (err < 0)
			goto out;
///设置状态.
		sock->state = SS_CONNECTING;
///设置错误号.
		err = -EINPROGRESS;
		break;
	}
///和上面的处理一样,如果非阻塞返回0,否则返回timeo.
	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);

	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
///如果非阻塞则直接返回.否则进入休眠等待三次握手完成并唤醒他.(这个函数和上面的inet_csk_wait_for_connect函数实现很类似,因此这里就不分析了)
		if (!timeo || !inet_wait_for_connect(sk, timeo))
			goto out;

		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			goto out;
	}

	/* Connection was closed by RST, timeout, ICMP error
	 * or another process disconnected us.
	 */
	if (sk->sk_state == TCP_CLOSE)
		goto sock_error;
///设置socket状态.为已连接.
	sock->state = SS_CONNECTED;
	err = 0;
out:
	release_sock(sk);
	return err;

sock_error:
	err = sock_error(sk) ? : -ECONNABORTED;
	sock->state = SS_UNCONNECTED;
	if (sk->sk_prot->disconnect(sk, flags))
		sock->state = SS_DISCONNECTING;
	goto out;
}


tcp_v4_connect的源码就不分析了,我这里只大概的介绍下他的流程:

1 判断地址的一些合法性.

2 调用ip_route_connect来查找出去的路由(包括查找临时端口等等).

3 设置sock的状态为TCP_SYN_SENT,并调用inet_hash_connect来查找一个临时端口(也就是我们出去的端口),并加入到对应的hash链表(具体操作和get_port很相似).

4 调用tcp_connect来完成最终的操作.这个函数主要用来初始化将要发送的syn包(包括窗口大小isn等等),然后将这个sk_buffer加入到socket的写队列.最终调用tcp_transmit_skb传输到3层.再往下的操作就可以看我前面的blog了.

最后来看下3次握手的客户端的状态变化,还是看tcp_rcv_state_process函数,这里我们进来的socket假设就是TCP_SYN_SENT状态,也就是在等待syn和ack分节:


int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
			  struct tcphdr *th, unsigned len)
{
..........................................

	switch (sk->sk_state) {
	case TCP_CLOSE:
		goto discard;

	case TCP_LISTEN:
		..................................

	case TCP_SYN_SENT:
///进入对应的状态机处理函数.
		queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
		if (queued >= 0)
			return queued;

		/* Do step6 onward by hand. */
		tcp_urg(sk, skb, th);
		__kfree_skb(skb);
		tcp_data_snd_check(sk);
		return 0;
	}



然后来看tcp_rcv_synsent_state_process中的状态变化:


static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
					 struct tcphdr *th, unsigned len)
{
..................

	if (th->ack) {
....................................
///如果是rst分节,则进行相关处理,
		if (th->rst) {
			tcp_reset(sk);
			goto discard;
		}
///如果过来的ack分节没有syn分节则直接丢掉这个包,然后返回.
		if (!th->syn)
			goto discard_and_undo;

..................................................
///如果校验都通过则设置状态为TCP_ESTABLISHED,下面就会发送最后一个ack分节.
		tcp_set_state(sk, TCP_ESTABLISHED);

		.......................................
	}

....................................................

	if (th->syn) {
///如果只有syn分节,则此时设置状态为TCP_SYN_RECV.
		tcp_set_state(sk, TCP_SYN_RECV);

...................................
///发送ack分节给对方.
		tcp_send_synack(sk);
		goto discard;
#endif
	}
...................
}


这里如果只接受到syn,则三次握手还没完成,我们还在等待最后一个ack,因此此时有数据报的话,会再次落入tcp_rcv_state_process函数:


if (th->ack) {
///是否这个ack可被接受.
		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);

		switch (sk->sk_state) {
		case TCP_SYN_RECV:
			if (acceptable) {

				tp->copied_seq = tp->rcv_nxt;
				smp_mb();
///设置为TCP_ESTABLISHED,三次握手完成.
				tcp_set_state(sk, TCP_ESTABLISHED);
				sk->sk_state_change(sk);
///唤醒休眠在connect的队列.
				if (sk->sk_socket)
					sk_wake_async(sk,
						      SOCK_WAKE_IO, POLL_OUT);

			........................................
			} else {
				return 1;
			}
			break;

分享到:
评论

相关推荐

    modjn:Netty用Java实现Modbus

    使用Netty 4.x的Java中的Modbus TCP客户端/服务器实现 当前实现的Modbus功能 读线圈| 0x01 读取离散输入| 0x02 阅读保存寄存器| 0x03 读取输入寄存器| 0x04 写单线圈| 0x05 写单寄存器| 0x06 写多个线圈| 0x0F...

    PICO2000品科监控卡软件

    PICO2000监控软件,含客户端软件 **********************************...** Please refer to Section 4 of The installation guide for the detailed information about the TCP/IP setup for both SERVER and CLIENT.

    win 3.11 for workgroup tcpip支持

    mechanism is used only with TCP (connection-oriented traffic). Therefore, utilities like PING will only use the first default gateway. Notice that t his only applies to IP datagrams that have to be ...

    SecureBridge v6.2.3 for Delphi & BCB Full Source

    It protects any TCP traffic using SSH or SSL secure transport layer protocols, that provide authentication for both client and server, strong data encryption, and data integrity verification....

    SecureBridge v5.5.1 Full Source for XE5

    SecureBridge is a set of network security and data protection solutions that can protect any TCP traffic using SSH or SSL secure transport layer protocols that provide authentication for both client ...

    iperf-3.9-win64.zip

    最新版本iperf,官网下载直接编译,适合win10系统。... --connect-timeout # timeout for control connection setup (ms) -b, --bitrate #[KMG][/#] target bitrate in bits/sec (0 for unlimited)

    WCDMA KPI监控和优化指导书

    4.5.4 UE收到RRC Connection Setup消息没有发出RRC Setup Complete消息 41 4.5.5 UE发出RRC Setup Complete消息RNC没有收到 41 4.6 鉴权问题分析 41 4.6.1 MAC Failure 41 4.6.2 Sync Failure 42 4.7 安全模式问题...

    S7A驱动720版本

    - The setup program created a wrong e-doc link in Windows start menu. Now the proper link to S7A.CHM will be installed - From the S7A power tool it wasn't possible to open the online help file. ...

    CMU_Application_Testing_with_CMU200.pdf

    Although the option of a live internet connection is supported for each of the network simulations, this document describes a test setup which, instead, uses a "Server PC" to provide the various ...

    计算机网络第六版答案

    22. Five generic tasks are error control, flow control, segmentation and reassembly, multiplexing, and connection setup. Yes, these tasks can be duplicated at different layers. For example, error ...

    ezyfox-server-android-client:ezyfox-server-android-client

    1.创建一个TCP客户端 val clients = EzyClients .getInstance() val client = clients.newClient(config) 2.设置客户端 val setup = client.setup() setup.addEventHandler( EzyEventType . CONNECTION_SUCCESS , ...

    pySTR4500:带有 SimPLEX 的 STR4500 GPSSBAS 模拟器的实用程序

    带有 SimPLEX over TCP 的 STR4500 GPS/SBAS 模拟器的实用程序。 开发构建和设置 希望, sudo pip install ElementTree --allow-external ElementTree --allow-unverified ElementTree python setup.py install ...

    Windows MicroXP 0.82[Microsoft Windows XP SP3原版加工成的微型XP系统,=99.9%个完整XP]

    TCP/IP NetBIOS Helper Telephony Windows Audio Windows Installer Wireless Zero Configuration All these services are enabled except "rint Spooler" which you will need to enable in "services.msc". To ...

    Linux Networking Subsystem

    2.1 Function do basic setup() . . . . . . . . . . . . . . . . . . . . 3 2.2 Function sock init() . . . . . . . . . . . . . . . . . . . . . . . . 5 2.2.1 Function sk init() . . . . . . . . . . . . . . ...

    基恩士条码枪

    Click Properties, then double click [internet protocol V4 (TCP/IP)], enable “use static IP address”. And set its IP address as below image. Click ok. 17 | Confidential © 2013 Cognex Corporation FTP...

    SIP - Understanding the Session Initiation Protocol, 2nd Ed - 1459

    2.5.2 TCP Transport 40 2.5.3 TLS Transport 40 2.5.4 SCTP Transport 41 References 42 3 SIP Clients and Servers 43 3.1 SIP User Agents 43 3.2 Presence Agents 44 3.3 Back-to-Back User Agents 45 3.4 SIP ...

    VB编程资源大全(英文源码 网络)

    You can send messages with a client/server type setup.&lt;END&gt;&lt;br&gt;57 , al40.zip Apparently, if you use AOL to connect to the Internet and you do not touch it for 45 minutes it will timeout and drop ...

    a project model for the FreeBSD Project.7z

    A project model for the FreeBSD Project Niklas Saers ... ...Table of Contents Foreword 1 Overview 2 Definitions ...3-1....3-2....4-1....4-2....4-3....5-1....6-1....6-2....6-3....6-4....6-5....6-6....6-7....6-8....6-9....8-1....Up until now, the FreeBSD ...

    BURNINTEST--硬件检测工具

    - A network connection and the TCP/IP networking software installed for the Network Tests Pro version only: - A serial port loop back plug for the serial port test. - A parallel port loop back plug...

    python3.6.5参考手册 chm

    Python参考手册,官方正式版参考手册,chm版。以下摘取部分内容:Navigation index modules | next | Python » 3.6.5 Documentation » Python Documentation contents What’s New in Python ...

Global site tag (gtag.js) - Google Analytics