svcsock.c 42.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7
/*
 * linux/net/sunrpc/svcsock.c
 *
 * These are the RPC server socket internals.
 *
 * The server scheduling algorithm does not always distribute the load
 * evenly when servicing a single client. May need to modify the
8
 * svc_xprt_enqueue procedure...
Linus Torvalds's avatar
Linus Torvalds committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * TCP support is largely untested and may be a little slow. The problem
 * is that we currently do two separate recvfrom's, one for the 4-byte
 * record length, and the second for the actual record. This could possibly
 * be improved by always reading a minimum size of around 100 bytes and
 * tucking any superfluous bytes away in a temporary store. Still, that
 * leaves write requests out in the rain. An alternative may be to peek at
 * the first skb in the queue, and if it matches the next TCP sequence
 * number, to extract the record marker. Yuck.
 *
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */

22
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
23
#include <linux/sched.h>
24
#include <linux/module.h>
Linus Torvalds's avatar
Linus Torvalds committed
25 26 27 28 29 30
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/udp.h>
31
#include <linux/tcp.h>
Linus Torvalds's avatar
Linus Torvalds committed
32 33 34 35
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
36
#include <linux/file.h>
37
#include <linux/freezer.h>
Linus Torvalds's avatar
Linus Torvalds committed
38 39 40
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
41
#include <net/ipv6.h>
42
#include <net/udp.h>
43
#include <net/tcp.h>
44
#include <net/tcp_states.h>
45
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
46
#include <asm/ioctls.h>
47
#include <trace/events/skb.h>
Linus Torvalds's avatar
Linus Torvalds committed
48 49

#include <linux/sunrpc/types.h>
50
#include <linux/sunrpc/clnt.h>
Linus Torvalds's avatar
Linus Torvalds committed
51
#include <linux/sunrpc/xdr.h>
52
#include <linux/sunrpc/msg_prot.h>
Linus Torvalds's avatar
Linus Torvalds committed
53 54
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
55
#include <linux/sunrpc/xprt.h>
Linus Torvalds's avatar
Linus Torvalds committed
56

57 58
#include "sunrpc.h"

59
#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
Linus Torvalds's avatar
Linus Torvalds committed
60 61 62


static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
63
					 int flags);
Linus Torvalds's avatar
Linus Torvalds committed
64 65
static int		svc_udp_recvfrom(struct svc_rqst *);
static int		svc_udp_sendto(struct svc_rqst *);
66
static void		svc_sock_detach(struct svc_xprt *);
67
static void		svc_tcp_sock_detach(struct svc_xprt *);
68
static void		svc_sock_free(struct svc_xprt *);
Linus Torvalds's avatar
Linus Torvalds committed
69

70
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
71 72
					  struct net *, struct sockaddr *,
					  int, int);
73
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
74 75 76 77
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
					     struct net *, struct sockaddr *,
					     int, int);
static void svc_bc_sock_free(struct svc_xprt *xprt);
78
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
79

80 81 82 83
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
static struct lock_class_key svc_slock_key[2];

84
static void svc_reclassify_socket(struct socket *sock)
85 86
{
	struct sock *sk = sock->sk;
87

88
	if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
89 90
		return;

91 92 93
	switch (sk->sk_family) {
	case AF_INET:
		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
94 95 96
					      &svc_slock_key[0],
					      "sk_xprt.xpt_lock-AF_INET-NFSD",
					      &svc_key[0]);
97 98 99 100
		break;

	case AF_INET6:
		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
101 102 103
					      &svc_slock_key[1],
					      "sk_xprt.xpt_lock-AF_INET6-NFSD",
					      &svc_key[1]);
104 105 106 107 108 109 110
		break;

	default:
		BUG();
	}
}
#else
111
static void svc_reclassify_socket(struct socket *sock)
112 113 114 115
{
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
116 117 118
/*
 * Release an skbuff after use
 */
119
static void svc_release_skb(struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
120
{
121
	struct sk_buff *skb = rqstp->rq_xprt_ctxt;
Linus Torvalds's avatar
Linus Torvalds committed
122 123

	if (skb) {
124 125
		struct svc_sock *svsk =
			container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
126
		rqstp->rq_xprt_ctxt = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
127 128

		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
129
		skb_free_datagram_locked(svsk->sk_sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
130 131 132
	}
}

133 134 135 136 137 138 139 140 141 142 143 144
static void svc_release_udp_skb(struct svc_rqst *rqstp)
{
	struct sk_buff *skb = rqstp->rq_xprt_ctxt;

	if (skb) {
		rqstp->rq_xprt_ctxt = NULL;

		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
		consume_skb(skb);
	}
}

145 146 147 148
union svc_pktinfo_u {
	struct in_pktinfo pkti;
	struct in6_pktinfo pkti6;
};
149 150
#define SVC_PKTINFO_SPACE \
	CMSG_SPACE(sizeof(union svc_pktinfo_u))
151 152 153

static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
{
154 155 156
	struct svc_sock *svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
	switch (svsk->sk_sk->sk_family) {
157 158 159 160 161 162
	case AF_INET: {
			struct in_pktinfo *pki = CMSG_DATA(cmh);

			cmh->cmsg_level = SOL_IP;
			cmh->cmsg_type = IP_PKTINFO;
			pki->ipi_ifindex = 0;
163 164
			pki->ipi_spec_dst.s_addr =
				 svc_daddr_in(rqstp)->sin_addr.s_addr;
165 166 167
			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
		}
		break;
168

169 170
	case AF_INET6: {
			struct in6_pktinfo *pki = CMSG_DATA(cmh);
171
			struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
172 173 174

			cmh->cmsg_level = SOL_IPV6;
			cmh->cmsg_type = IPV6_PKTINFO;
175
			pki->ipi6_ifindex = daddr->sin6_scope_id;
176
			pki->ipi6_addr = daddr->sin6_addr;
177 178 179 180 181 182
			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
		}
		break;
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
183
/*
184
 * send routine intended to be shared by the fore- and back-channel
Linus Torvalds's avatar
Linus Torvalds committed
185
 */
186 187 188
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
		    struct page *headpage, unsigned long headoffset,
		    struct page *tailpage, unsigned long tailoffset)
Linus Torvalds's avatar
Linus Torvalds committed
189 190 191 192 193 194
{
	int		result;
	int		size;
	struct page	**ppage = xdr->pages;
	size_t		base = xdr->page_base;
	unsigned int	pglen = xdr->page_len;
195
	unsigned int	flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
196 197
	int		slen;
	int		len = 0;
Linus Torvalds's avatar
Linus Torvalds committed
198 199 200 201 202 203

	slen = xdr->len;

	/* send head */
	if (slen == xdr->head[0].iov_len)
		flags = 0;
204
	len = kernel_sendpage(sock, headpage, headoffset,
205
				  xdr->head[0].iov_len, flags);
Linus Torvalds's avatar
Linus Torvalds committed
206 207 208 209 210 211 212 213 214 215 216
	if (len != xdr->head[0].iov_len)
		goto out;
	slen -= xdr->head[0].iov_len;
	if (slen == 0)
		goto out;

	/* send page data */
	size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
	while (pglen > 0) {
		if (slen == size)
			flags = 0;
217
		result = kernel_sendpage(sock, *ppage, base, size, flags);
Linus Torvalds's avatar
Linus Torvalds committed
218 219 220 221 222 223 224 225 226 227
		if (result > 0)
			len += result;
		if (result != size)
			goto out;
		slen -= size;
		pglen -= size;
		size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
		base = 0;
		ppage++;
	}
228

Linus Torvalds's avatar
Linus Torvalds committed
229 230
	/* send tail */
	if (xdr->tail[0].iov_len) {
231 232
		result = kernel_sendpage(sock, tailpage, tailoffset,
				   xdr->tail[0].iov_len, 0);
Linus Torvalds's avatar
Linus Torvalds committed
233 234 235
		if (result > 0)
			len += result;
	}
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270

out:
	return len;
}


/*
 * Generic sendto routine
 */
static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
{
	struct svc_sock	*svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
	struct socket	*sock = svsk->sk_sock;
	union {
		struct cmsghdr	hdr;
		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
	} buffer;
	struct cmsghdr *cmh = &buffer.hdr;
	int		len = 0;
	unsigned long tailoff;
	unsigned long headoff;
	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);

	if (rqstp->rq_prot == IPPROTO_UDP) {
		struct msghdr msg = {
			.msg_name	= &rqstp->rq_addr,
			.msg_namelen	= rqstp->rq_addrlen,
			.msg_control	= cmh,
			.msg_controllen	= sizeof(buffer),
			.msg_flags	= MSG_MORE,
		};

		svc_set_cmsg_data(rqstp, cmh);

271
		if (sock_sendmsg(sock, &msg) < 0)
272 273 274 275 276 277 278 279
			goto out;
	}

	tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
	headoff = 0;
	len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
			       rqstp->rq_respages[0], tailoff);

Linus Torvalds's avatar
Linus Torvalds committed
280
out:
281
	dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n",
282
		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
283
		xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
Linus Torvalds's avatar
Linus Torvalds committed
284 285 286 287

	return len;
}

288 289 290
/*
 * Report socket names for nfsdfs
 */
291
static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
292
{
293 294 295
	const struct sock *sk = svsk->sk_sk;
	const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
							"udp" : "tcp";
296 297
	int len;

298
	switch (sk->sk_family) {
299 300
	case PF_INET:
		len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
301
				proto_name,
302 303
				&inet_sk(sk)->inet_rcv_saddr,
				inet_sk(sk)->inet_num);
304
		break;
305
#if IS_ENABLED(CONFIG_IPV6)
306 307
	case PF_INET6:
		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
308
				proto_name,
309
				&sk->sk_v6_rcv_saddr,
310
				inet_sk(sk)->inet_num);
311
		break;
312
#endif
313
	default:
314
		len = snprintf(buf, remaining, "*unknown-%d*\n",
315
				sk->sk_family);
316
	}
317 318 319 320

	if (len >= remaining) {
		*buf = '\0';
		return -ENAMETOOLONG;
321 322 323 324
	}
	return len;
}

Linus Torvalds's avatar
Linus Torvalds committed
325 326 327
/*
 * Generic recvfrom routine.
 */
328 329
static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
			int buflen)
Linus Torvalds's avatar
Linus Torvalds committed
330
{
331 332
	struct svc_sock *svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
333 334 335 336
	struct msghdr msg = {
		.msg_flags	= MSG_DONTWAIT,
	};
	int len;
Linus Torvalds's avatar
Linus Torvalds committed
337

338 339
	rqstp->rq_xprt_hlen = 0;

340
	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
341 342
	len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
				msg.msg_flags);
343 344 345 346 347
	/* If we read a full record, then assume there may be more
	 * data to read (stream based sockets only!)
	 */
	if (len == buflen)
		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
348

349
	dprintk("svc: socket %p recvfrom(%p, %zu) = %d\n",
350
		svsk, iov[0].iov_base, iov[0].iov_len, len);
Linus Torvalds's avatar
Linus Torvalds committed
351 352 353
	return len;
}

354 355 356 357 358
static int svc_partial_recvfrom(struct svc_rqst *rqstp,
				struct kvec *iov, int nr,
				int buflen, unsigned int base)
{
	size_t save_iovlen;
359
	void *save_iovbase;
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
	unsigned int i;
	int ret;

	if (base == 0)
		return svc_recvfrom(rqstp, iov, nr, buflen);

	for (i = 0; i < nr; i++) {
		if (iov[i].iov_len > base)
			break;
		base -= iov[i].iov_len;
	}
	save_iovlen = iov[i].iov_len;
	save_iovbase = iov[i].iov_base;
	iov[i].iov_len -= base;
	iov[i].iov_base += base;
	ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
	iov[i].iov_len = save_iovlen;
	iov[i].iov_base = save_iovbase;
	return ret;
}

Linus Torvalds's avatar
Linus Torvalds committed
381 382 383
/*
 * Set socket snd and rcv buffer lengths
 */
384 385
static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
				unsigned int rcv)
Linus Torvalds's avatar
Linus Torvalds committed
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
{
#if 0
	mm_segment_t	oldfs;
	oldfs = get_fs(); set_fs(KERNEL_DS);
	sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
			(char*)&snd, sizeof(snd));
	sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
			(char*)&rcv, sizeof(rcv));
#else
	/* sock_setsockopt limits use to sysctl_?mem_max,
	 * which isn't acceptable.  Until that is made conditional
	 * on not having CAP_SYS_RESOURCE or similar, we go direct...
	 * DaveM said I could!
	 */
	lock_sock(sock->sk);
	sock->sk->sk_sndbuf = snd * 2;
	sock->sk->sk_rcvbuf = rcv * 2;
403
	sock->sk->sk_write_space(sock->sk);
Linus Torvalds's avatar
Linus Torvalds committed
404 405 406
	release_sock(sock->sk);
#endif
}
407 408 409 410 411 412

static int svc_sock_secure_port(struct svc_rqst *rqstp)
{
	return svc_port_is_privileged(svc_addr(rqstp));
}

Linus Torvalds's avatar
Linus Torvalds committed
413 414 415
/*
 * INET callback when data has been received on the socket.
 */
416
static void svc_data_ready(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
417
{
418
	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
419

420
	if (svsk) {
421 422
		dprintk("svc: socket %p(inet %p), busy=%d\n",
			svsk, sk,
423
			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
424 425 426

		/* Refer to svc_setup_socket() for details. */
		rmb();
427
		svsk->sk_odata(sk);
428 429
		if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
			svc_xprt_enqueue(&svsk->sk_xprt);
430
	}
Linus Torvalds's avatar
Linus Torvalds committed
431 432 433 434 435
}

/*
 * INET callback when space is newly available on the socket.
 */
436
static void svc_write_space(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
437 438 439 440 441
{
	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);

	if (svsk) {
		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
442
			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
443 444 445

		/* Refer to svc_setup_socket() for details. */
		rmb();
446
		svsk->sk_owspace(sk);
447
		svc_xprt_enqueue(&svsk->sk_xprt);
Linus Torvalds's avatar
Linus Torvalds committed
448 449 450
	}
}

451 452
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
453
	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
454 455 456

	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
		return 1;
457
	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
458 459
}

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
{
	struct svc_sock *svsk;
	struct socket *sock;
	struct linger no_linger = {
		.l_onoff = 1,
		.l_linger = 0,
	};

	svsk = container_of(xprt, struct svc_sock, sk_xprt);
	sock = svsk->sk_sock;
	kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
			  (char *)&no_linger, sizeof(no_linger));
}

475 476 477 478 479 480 481
/*
 * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
 */
static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
				     struct cmsghdr *cmh)
{
	struct in_pktinfo *pki = CMSG_DATA(cmh);
482 483
	struct sockaddr_in *daddr = svc_daddr_in(rqstp);

484 485
	if (cmh->cmsg_type != IP_PKTINFO)
		return 0;
486 487 488

	daddr->sin_family = AF_INET;
	daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr;
489 490 491 492
	return 1;
}

/*
493
 * See net/ipv6/datagram.c : ip6_datagram_recv_ctl
494 495 496 497 498
 */
static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
				     struct cmsghdr *cmh)
{
	struct in6_pktinfo *pki = CMSG_DATA(cmh);
499 500
	struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);

501 502
	if (cmh->cmsg_type != IPV6_PKTINFO)
		return 0;
503 504

	daddr->sin6_family = AF_INET6;
505
	daddr->sin6_addr = pki->ipi6_addr;
506
	daddr->sin6_scope_id = pki->ipi6_ifindex;
507 508 509
	return 1;
}

510 511 512 513 514 515 516
/*
 * Copy the UDP datagram's destination address to the rqstp structure.
 * The 'destination' address in this case is the address to which the
 * peer sent the datagram, i.e. our local address. For multihomed
 * hosts, this can change from msg to msg. Note that only the IP
 * address changes, the port number should remain the same.
 */
517 518
static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
				    struct cmsghdr *cmh)
519
{
520 521 522 523 524
	switch (cmh->cmsg_level) {
	case SOL_IP:
		return svc_udp_get_dest_address4(rqstp, cmh);
	case SOL_IPV6:
		return svc_udp_get_dest_address6(rqstp, cmh);
525
	}
526 527

	return 0;
528 529
}

Linus Torvalds's avatar
Linus Torvalds committed
530 531 532
/*
 * Receive a datagram from a UDP socket.
 */
533
static int svc_udp_recvfrom(struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
534
{
535 536
	struct svc_sock	*svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
537
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
Linus Torvalds's avatar
Linus Torvalds committed
538
	struct sk_buff	*skb;
539 540 541 542 543
	union {
		struct cmsghdr	hdr;
		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
	} buffer;
	struct cmsghdr *cmh = &buffer.hdr;
544 545 546 547 548 549
	struct msghdr msg = {
		.msg_name = svc_addr(rqstp),
		.msg_control = cmh,
		.msg_controllen = sizeof(buffer),
		.msg_flags = MSG_DONTWAIT,
	};
550 551
	size_t len;
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
552

553
	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
Linus Torvalds's avatar
Linus Torvalds committed
554 555 556
	    /* udp sockets need large rcvbuf as all pending
	     * requests are still in that buffer.  sndbuf must
	     * also be large enough that there is enough space
557 558 559 560
	     * for one reply per thread.  We count all threads
	     * rather than threads in a particular pool, which
	     * provides an upper bound on the number of threads
	     * which will access the socket.
Linus Torvalds's avatar
Linus Torvalds committed
561 562
	     */
	    svc_sock_setbufsize(svsk->sk_sock,
563 564
				(serv->sv_nrthreads+3) * serv->sv_max_mesg,
				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
Linus Torvalds's avatar
Linus Torvalds committed
565

566
	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
567 568 569 570
	skb = NULL;
	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
			     0, 0, MSG_PEEK | MSG_DONTWAIT);
	if (err >= 0)
571
		skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
572 573 574 575 576

	if (skb == NULL) {
		if (err != -EAGAIN) {
			/* possibly an icmp error */
			dprintk("svc: recvfrom returned error %d\n", -err);
577
			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
578
		}
579
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
580
	}
581 582
	len = svc_addr_len(svc_addr(rqstp));
	rqstp->rq_addrlen = len;
583
	if (skb->tstamp == 0) {
584
		skb->tstamp = ktime_get_real();
585
		/* Don't enable netstamp, sunrpc doesn't
Linus Torvalds's avatar
Linus Torvalds committed
586 587
		   need that much accuracy */
	}
588
	sock_write_timestamp(svsk->sk_sk, skb->tstamp);
589
	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
Linus Torvalds's avatar
Linus Torvalds committed
590

591
	len  = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
592 593
	rqstp->rq_arg.len = len;

594
	rqstp->rq_prot = IPPROTO_UDP;
595

596
	if (!svc_udp_get_dest_address(rqstp, cmh)) {
597 598
		net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
				     cmh->cmsg_level, cmh->cmsg_type);
599
		goto out_free;
600
	}
601
	rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
Linus Torvalds's avatar
Linus Torvalds committed
602 603 604 605 606 607 608

	if (skb_is_nonlinear(skb)) {
		/* we have to copy */
		local_bh_disable();
		if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
			local_bh_enable();
			/* checksum error */
609
			goto out_free;
Linus Torvalds's avatar
Linus Torvalds committed
610 611
		}
		local_bh_enable();
612
		consume_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
613 614
	} else {
		/* we can use it in-place */
615
		rqstp->rq_arg.head[0].iov_base = skb->data;
Linus Torvalds's avatar
Linus Torvalds committed
616
		rqstp->rq_arg.head[0].iov_len = len;
617 618
		if (skb_checksum_complete(skb))
			goto out_free;
619
		rqstp->rq_xprt_ctxt = skb;
Linus Torvalds's avatar
Linus Torvalds committed
620 621 622 623 624 625
	}

	rqstp->rq_arg.page_base = 0;
	if (len <= rqstp->rq_arg.head[0].iov_len) {
		rqstp->rq_arg.head[0].iov_len = len;
		rqstp->rq_arg.page_len = 0;
626
		rqstp->rq_respages = rqstp->rq_pages+1;
Linus Torvalds's avatar
Linus Torvalds committed
627 628
	} else {
		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
629
		rqstp->rq_respages = rqstp->rq_pages + 1 +
630
			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
631
	}
632
	rqstp->rq_next_page = rqstp->rq_respages+1;
Linus Torvalds's avatar
Linus Torvalds committed
633 634 635 636 637

	if (serv->sv_stats)
		serv->sv_stats->netudpcnt++;

	return len;
638
out_free:
639
	kfree_skb(skb);
640
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
}

static int
svc_udp_sendto(struct svc_rqst *rqstp)
{
	int		error;

	error = svc_sendto(rqstp, &rqstp->rq_res);
	if (error == -ECONNREFUSED)
		/* ICMP error on earlier request. */
		error = svc_sendto(rqstp, &rqstp->rq_res);

	return error;
}

Tom Tucker's avatar
Tom Tucker committed
656 657 658 659
static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
{
}

660 661 662
static int svc_udp_has_wspace(struct svc_xprt *xprt)
{
	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
663
	struct svc_serv	*serv = xprt->xpt_server;
664 665 666 667 668 669 670
	unsigned long required;

	/*
	 * Set the SOCK_NOSPACE flag before checking the available
	 * sock space.
	 */
	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
671
	required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
672 673 674 675 676 677
	if (required*2 > sock_wspace(svsk->sk_sk))
		return 0;
	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
	return 1;
}

678 679 680 681 682 683
static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
{
	BUG();
	return NULL;
}

684 685 686 687
static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt)
{
}

688
static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
689
				       struct net *net,
690 691 692
				       struct sockaddr *sa, int salen,
				       int flags)
{
693
	return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
694 695
}

696
static const struct svc_xprt_ops svc_udp_ops = {
697
	.xpo_create = svc_udp_create,
698 699
	.xpo_recvfrom = svc_udp_recvfrom,
	.xpo_sendto = svc_udp_sendto,
700
	.xpo_release_rqst = svc_release_udp_skb,
701 702
	.xpo_detach = svc_sock_detach,
	.xpo_free = svc_sock_free,
Tom Tucker's avatar
Tom Tucker committed
703
	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
704
	.xpo_has_wspace = svc_udp_has_wspace,
705
	.xpo_accept = svc_udp_accept,
706
	.xpo_secure_port = svc_sock_secure_port,
707
	.xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
708 709 710 711
};

static struct svc_xprt_class svc_udp_class = {
	.xcl_name = "udp",
712
	.xcl_owner = THIS_MODULE,
713
	.xcl_ops = &svc_udp_ops,
714
	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
715
	.xcl_ident = XPRT_TRANSPORT_UDP,
716 717
};

718
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
Linus Torvalds's avatar
Linus Torvalds committed
719
{
720
	int err, level, optname, one = 1;
721

722 723
	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
		      &svsk->sk_xprt, serv);
724
	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
725
	svsk->sk_sk->sk_data_ready = svc_data_ready;
Linus Torvalds's avatar
Linus Torvalds committed
726 727 728
	svsk->sk_sk->sk_write_space = svc_write_space;

	/* initialise setting must have enough space to
729
	 * receive and respond to one request.
Linus Torvalds's avatar
Linus Torvalds committed
730 731 732
	 * svc_udp_recvfrom will re-adjust if necessary
	 */
	svc_sock_setbufsize(svsk->sk_sock,
733 734
			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
Linus Torvalds's avatar
Linus Torvalds committed
735

736 737
	/* data might have come in before data_ready set up */
	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
738
	set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
739 740

	/* make sure we get destination address info */
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
	switch (svsk->sk_sk->sk_family) {
	case AF_INET:
		level = SOL_IP;
		optname = IP_PKTINFO;
		break;
	case AF_INET6:
		level = SOL_IPV6;
		optname = IPV6_RECVPKTINFO;
		break;
	default:
		BUG();
	}
	err = kernel_setsockopt(svsk->sk_sock, level, optname,
					(char *)&one, sizeof(one));
	dprintk("svc: kernel_setsockopt returned %d\n", err);
Linus Torvalds's avatar
Linus Torvalds committed
756 757 758 759 760 761
}

/*
 * A data_ready event on a listening socket means there's a connection
 * pending. Do not use state_change as a substitute for it.
 */
762
static void svc_tcp_listen_data_ready(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
763
{
764
	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
765 766

	dprintk("svc: socket %p TCP (listen) state change %d\n",
767
		sk, sk->sk_state);
Linus Torvalds's avatar
Linus Torvalds committed
768

769 770 771
	if (svsk) {
		/* Refer to svc_setup_socket() for details. */
		rmb();
772
		svsk->sk_odata(sk);
773 774
	}

775 776 777 778 779 780 781 782 783 784 785 786
	/*
	 * This callback may called twice when a new connection
	 * is established as a child socket inherits everything
	 * from a parent LISTEN socket.
	 * 1) data_ready method of the parent socket will be called
	 *    when one of child sockets become ESTABLISHED.
	 * 2) data_ready method of the child socket may be called
	 *    when it receives data before the socket is accepted.
	 * In case of 2, we should ignore it silently.
	 */
	if (sk->sk_state == TCP_LISTEN) {
		if (svsk) {
787
			set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
788
			svc_xprt_enqueue(&svsk->sk_xprt);
789 790
		} else
			printk("svc: socket %p: no user data\n", sk);
Linus Torvalds's avatar
Linus Torvalds committed
791 792 793 794 795 796
	}
}

/*
 * A state change on a connected socket means it's dying or dead.
 */
797
static void svc_tcp_state_change(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
798
{
799
	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
Linus Torvalds's avatar
Linus Torvalds committed
800 801

	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
802
		sk, sk->sk_state, sk->sk_user_data);
Linus Torvalds's avatar
Linus Torvalds committed
803

804
	if (!svsk)
Linus Torvalds's avatar
Linus Torvalds committed
805
		printk("svc: socket %p: no user data\n", sk);
806
	else {
807 808
		/* Refer to svc_setup_socket() for details. */
		rmb();
809
		svsk->sk_ostate(sk);
810 811 812 813
		if (sk->sk_state != TCP_ESTABLISHED) {
			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
			svc_xprt_enqueue(&svsk->sk_xprt);
		}
Linus Torvalds's avatar
Linus Torvalds committed
814 815 816 817 818 819
	}
}

/*
 * Accept a TCP connection
 */
820
static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
Linus Torvalds's avatar
Linus Torvalds committed
821
{
822
	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
823 824
	struct sockaddr_storage addr;
	struct sockaddr	*sin = (struct sockaddr *) &addr;
825
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
Linus Torvalds's avatar
Linus Torvalds committed
826 827 828 829
	struct socket	*sock = svsk->sk_sock;
	struct socket	*newsock;
	struct svc_sock	*newsvsk;
	int		err, slen;
830
	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
Linus Torvalds's avatar
Linus Torvalds committed
831 832 833

	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
	if (!sock)
834
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
835

836
	clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
837 838
	err = kernel_accept(sock, &newsock, O_NONBLOCK);
	if (err < 0) {
Linus Torvalds's avatar
Linus Torvalds committed
839 840 841
		if (err == -ENOMEM)
			printk(KERN_WARNING "%s: no more sockets!\n",
			       serv->sv_name);
842 843 844
		else if (err != -EAGAIN)
			net_warn_ratelimited("%s: accept failed (err %d)!\n",
					     serv->sv_name, -err);
845
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
846
	}
847
	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
Linus Torvalds's avatar
Linus Torvalds committed
848

849
	err = kernel_getpeername(newsock, sin, &slen);
Linus Torvalds's avatar
Linus Torvalds committed
850
	if (err < 0) {
851 852
		net_warn_ratelimited("%s: peername failed (err %d)!\n",
				     serv->sv_name, -err);
Linus Torvalds's avatar
Linus Torvalds committed
853 854 855 856
		goto failed;		/* aborted connection or whatever */
	}

	/* Ideally, we would want to reject connections from unauthorized
857 858
	 * hosts here, but when we get encryption, the IP of the host won't
	 * tell us anything.  For now just warn about unpriv connections.
Linus Torvalds's avatar
Linus Torvalds committed
859
	 */
860
	if (!svc_port_is_privileged(sin)) {
861
		dprintk("%s: connect from unprivileged port: %s\n",
862
			serv->sv_name,
863
			__svc_print_addr(sin, buf, sizeof(buf)));
Linus Torvalds's avatar
Linus Torvalds committed
864
	}
865
	dprintk("%s: connect from %s\n", serv->sv_name,
866
		__svc_print_addr(sin, buf, sizeof(buf)));
Linus Torvalds's avatar
Linus Torvalds committed
867

868 869 870 871 872
	/* Reset the inherited callbacks before calling svc_setup_socket */
	newsock->sk->sk_state_change = svsk->sk_ostate;
	newsock->sk->sk_data_ready = svsk->sk_odata;
	newsock->sk->sk_write_space = svsk->sk_owspace;

Linus Torvalds's avatar
Linus Torvalds committed
873 874 875 876 877
	/* make sure that a write doesn't block forever when
	 * low on memory
	 */
	newsock->sk->sk_sndtimeo = HZ*30;

878 879 880
	newsvsk = svc_setup_socket(serv, newsock,
				 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY));
	if (IS_ERR(newsvsk))
Linus Torvalds's avatar
Linus Torvalds committed
881
		goto failed;
882
	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
883 884 885 886 887
	err = kernel_getsockname(newsock, sin, &slen);
	if (unlikely(err < 0)) {
		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
		slen = offsetof(struct sockaddr, sa_data);
	}
888
	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
889

890 891 892 893
	if (sock_is_loopback(newsock->sk))
		set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
	else
		clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
894 895 896 897 898 899 900 901 902 903
	if (serv->sv_stats)
		serv->sv_stats->nettcpconn++;

	return &newsvsk->sk_xprt;

failed:
	sock_release(newsock);
	return NULL;
}

904 905 906 907
static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
	unsigned int i, len, npages;

908
	if (svsk->sk_datalen == 0)
909
		return 0;
910
	len = svsk->sk_datalen;
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926
	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	for (i = 0; i < npages; i++) {
		if (rqstp->rq_pages[i] != NULL)
			put_page(rqstp->rq_pages[i]);
		BUG_ON(svsk->sk_pages[i] == NULL);
		rqstp->rq_pages[i] = svsk->sk_pages[i];
		svsk->sk_pages[i] = NULL;
	}
	rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
	return len;
}

static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
	unsigned int i, len, npages;

927
	if (svsk->sk_datalen == 0)
928
		return;
929
	len = svsk->sk_datalen;
930 931 932 933 934 935 936 937 938 939 940
	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	for (i = 0; i < npages; i++) {
		svsk->sk_pages[i] = rqstp->rq_pages[i];
		rqstp->rq_pages[i] = NULL;
	}
}

static void svc_tcp_clear_pages(struct svc_sock *svsk)
{
	unsigned int i, len, npages;

941
	if (svsk->sk_datalen == 0)
942
		goto out;
943
	len = svsk->sk_datalen;
944 945
	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	for (i = 0; i < npages; i++) {
946 947 948 949
		if (svsk->sk_pages[i] == NULL) {
			WARN_ON_ONCE(1);
			continue;
		}
950 951 952 953 954
		put_page(svsk->sk_pages[i]);
		svsk->sk_pages[i] = NULL;
	}
out:
	svsk->sk_tcplen = 0;
955
	svsk->sk_datalen = 0;
956 957
}

Linus Torvalds's avatar
Linus Torvalds committed
958
/*
959
 * Receive fragment record header.
960
 * If we haven't gotten the record length yet, get the next four bytes.
Linus Torvalds's avatar
Linus Torvalds committed
961
 */
962
static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
Linus Torvalds's avatar
Linus Torvalds committed
963
{
964
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
965
	unsigned int want;
966
	int len;
Linus Torvalds's avatar
Linus Torvalds committed
967

968
	if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
Linus Torvalds's avatar
Linus Torvalds committed
969 970
		struct kvec	iov;

971
		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
Linus Torvalds's avatar
Linus Torvalds committed
972 973 974 975 976 977 978
		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
		iov.iov_len  = want;
		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
			goto error;
		svsk->sk_tcplen += len;

		if (len < want) {
979 980
			dprintk("svc: short recvfrom while reading record "
				"length (%d of %d)\n", len, want);
981
			return -EAGAIN;
Linus Torvalds's avatar
Linus Torvalds committed
982 983
		}

984
		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
985 986
		if (svc_sock_reclen(svsk) + svsk->sk_datalen >
							serv->sv_max_mesg) {
J. Bruce Fields's avatar
J. Bruce Fields committed
987 988
			net_notice_ratelimited("RPC: fragment too large: %d\n",
					svc_sock_reclen(svsk));
Linus Torvalds's avatar
Linus Torvalds committed
989 990 991 992
			goto err_delete;
		}
	}

993
	return svc_sock_reclen(svsk);
994 995
error:
	dprintk("RPC: TCP recv_record got %d\n", len);
996
	return len;
997
err_delete:
998 999 1000 1001
	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
	return -EAGAIN;
}

1002
static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
1003
{
1004
	struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
1005
	struct rpc_rqst *req = NULL;
1006 1007
	struct kvec *src, *dst;
	__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1008 1009
	__be32 xid;
	__be32 calldir;
1010 1011 1012 1013

	xid = *p++;
	calldir = *p;

1014
	if (!bc_xprt)
1015
		return -EAGAIN;
1016
	spin_lock(&bc_xprt->recv_lock);
1017 1018 1019
	req = xprt_lookup_rqst(bc_xprt, xid);
	if (!req)
		goto unlock_notfound;
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029

	memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
	/*
	 * XXX!: cheating for now!  Only copying HEAD.
	 * But we know this is good enough for now (in fact, for any
	 * callback reply in the forseeable future).
	 */
	dst = &req->rq_private_buf.head[0];
	src = &rqstp->rq_arg.head[0];
	if (dst->iov_len < src->iov_len)
1030
		goto unlock_eagain; /* whatever; just giving up. */
1031
	memcpy(dst->iov_base, src->iov_base, src->iov_len);
1032
	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
1033
	rqstp->rq_arg.len = 0;
1034
	spin_unlock(&bc_xprt->recv_lock);
1035
	return 0;
1036 1037 1038 1039 1040 1041 1042
unlock_notfound:
	printk(KERN_NOTICE
		"%s: Got unrecognized reply: "
		"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
		__func__, ntohl(calldir),
		bc_xprt, ntohl(xid));
unlock_eagain:
1043
	spin_unlock(&bc_xprt->recv_lock);
1044
	return -EAGAIN;
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
}

static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
{
	int i = 0;
	int t = 0;

	while (t < len) {
		vec[i].iov_base = page_address(pages[i]);
		vec[i].iov_len = PAGE_SIZE;
		i++;
		t += PAGE_SIZE;
	}
	return i;
1059 1060
}

1061 1062 1063 1064 1065 1066 1067 1068 1069
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
	/* If we have more data, signal svc_xprt_enqueue() to try again */
	dprintk("svc: TCP %s record (%d bytes)\n",
		svc_sock_final_rec(svsk) ? "final" : "nonfinal",
		svc_sock_reclen(svsk));
	svsk->sk_tcplen = 0;
	svsk->sk_reclen = 0;
}
1070

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
/*
 * Receive data from a TCP socket.
 */
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
	struct svc_sock	*svsk =
		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
	int		len;
	struct kvec *vec;
1081
	unsigned int want, base;
1082 1083
	__be32 *p;
	__be32 calldir;
1084
	int pnum;
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094

	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));

	len = svc_tcp_recv_record(svsk, rqstp);
	if (len < 0)
		goto error;

1095
	base = svc_tcp_restore_pages(svsk, rqstp);
1096
	want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
1097

1098
	vec = rqstp->rq_vec;
1099

1100
	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
1101
						svsk->sk_datalen + want);
1102

1103
	rqstp->rq_respages = &rqstp->rq_pages[pnum];
1104
	rqstp->rq_next_page = rqstp->rq_respages + 1;
Linus Torvalds's avatar
Linus Torvalds committed
1105 1106

	/* Now receive data */
1107
	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
1108
	if (len >= 0) {
1109
		svsk->sk_tcplen += len;
1110 1111
		svsk->sk_datalen += len;
	}
1112
	if (len != want || !svc_sock_final_rec(svsk)) {
1113
		svc_tcp_save_pages(svsk, rqstp);
1114
		if (len < 0 && len != -EAGAIN)
1115
			goto err_delete;
1116 1117 1118
		if (len == want)
			svc_tcp_fragment_received(svsk);
		else
J. Bruce Fields's avatar
J. Bruce Fields committed
1119 1120
			dprintk("svc: incomplete TCP record (%d of %d)\n",
				(int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
1121
				svc_sock_reclen(svsk));
1122 1123
		goto err_noclose;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1124

1125
	if (svsk->sk_datalen < 8) {
1126
		svsk->sk_datalen = 0;
1127
		goto err_delete; /* client is nuts. */
1128
	}