[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-virtualization
Subject:    Re: [RFC] virtio: orphan skbs if we're relying on timer to free them
From:       Rusty Russell <rusty () rustcorp ! com ! au>
Date:       2009-05-25 11:13:47
Message-ID: 200905252031.47868.rusty () rustcorp ! com ! au
[Download RAW message or body]

On Thu, 21 May 2009 04:45:03 pm David Miller wrote:
> Probably the most profitable avenue is to see if this is a real issue
> afterall (see above).  If we can get away with having the socket
> buffer represent socket --> device space only, that's the most ideal
> solution.  It will probably also improve performance a lot across the
> board, especially on NUMA/SMP boxes as our TX complete events tend to
> be in difference places than the SKB producer.

OK, hacked in skb_orphan() and couldn't see any behavior change here.

Details: I wrote a little test program to do a 1 byte ping pong on a TCP
socket while doing 1k writes to a number of udp sockets.  We never completely
starve the TCP connection, though that may be because it begins at the same
time as the others and gets some traffic before the buffers fill up.

Running it with 0 to 20 sockets, from my gigabit laptop to my 100Mbit
desktop.  Sorry about the attachment, but sometimes a graph is worth about
1k words.

Not sure why write rate shoots up on serious overload though.  Perhaps we're
discarding earlier in the path?

Cheers,
Rusty.

diff --git a/net/core/dev.c b/net/core/dev.c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1677,6 +1677,8 @@ int dev_hard_start_xmit(struct sk_buff *
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc;
 
+	skb_orphan(skb);
+
 	if (likely(!skb->next)) {
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);

Sender: for i in `seq 0 20`; do ~/devel/junk/udpstress 9999 $i debussy 5; sleep 1; done
Receiver: for i in `seq 0 20`; do ./udpstress -r 9999 $i; done

/* udpstress.c: Simple server & client to test multiple UDP streams. */
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sendfile.h>
#include <sys/time.h>
#include <time.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <err.h>
#include <netdb.h>

static volatile unsigned int bytes;

static void dumpstats(int signal)
{
	char buffer[128];

	sprintf(buffer, "%u\n", bytes);
	write(STDOUT_FILENO, buffer, strlen(buffer));
	_exit(0);
}

static void tcp_dumpstats(int signal)
{
	char buffer[128];

	sprintf(buffer, "TCP: %u\n", bytes);
	write(STDOUT_FILENO, buffer, strlen(buffer));
	_exit(0);
}

static void udp_receiver(unsigned int port)
{
	struct sockaddr_in saddr;
	int sock, ret;
	char buffer[65536];
	int val = 1;

	sock = socket(PF_INET, SOCK_DGRAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(&val)) != 0)
		err(1, "setsockopt(SO_REUSEADDR)");

	/* Wait for incoming. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = INADDR_ANY;
	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0)
		err(1, "bind to port %u", port);

	/* Read packets until we get signal. */
	signal(SIGALRM, dumpstats);

	while ((ret = read(sock, buffer, sizeof(buffer))) > 0)
		bytes += ret;
	err(1, "Short read %i", ret);
}

static void udp_client(unsigned int ip, unsigned int port, int fd)
{
	/* We're going to send UDP packets to that addr. */
	struct sockaddr_in saddr;
	int sock, ret;
	char buffer[1024];

	sock = socket(PF_INET, SOCK_DGRAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	/* Connect to remote. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = ip;
	if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)))
		err(1, "connecting socket");

	/* Write packets until we get signal. */
	signal(SIGALRM, dumpstats);

	/* Wait for "go" signal. */
	if (read(fd, buffer, 1) != 1)
		err(1, "Reading go signal");

	while ((ret = write(sock, buffer, sizeof(buffer))) > 0)
		bytes += ret;
	err(1, "Short write %i", ret);
}

/* Exits when tcp socket closes. */
static void tcp_receiver(unsigned int port)
{
	/* We're going to send TCP packets to that addr. */
	struct sockaddr_in saddr;
	int sock;
	char c;
	int val = 1;

	sock = socket(PF_INET, SOCK_STREAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(&val)) != 0)
		err(1, "setsockopt(SO_REUSEADDR)");

	/* Wait for incoming. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = INADDR_ANY;
	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0)
		err(1, "bind to port %u", port);

	if (listen(sock, 1) != 0)
		err(1, "listen");

	sock = accept(sock, NULL, NULL);
	if (sock < 0)
		err(1, "Accepting");

	while (read(sock, &c, 1) > 0) {
		bytes++;
		if (write(sock, &c, 1) != 1)
			break;
	}
}

static void tcp_client(unsigned int ip, unsigned int port, int fd)
{
	/* We're going to send TCP packets to that addr. */
	struct sockaddr_in saddr;
	int sock;
	char c;

	sock = socket(PF_INET, SOCK_STREAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	/* Connect to remote. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = ip;
	if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)))
		err(1, "connecting socket");

	/* Write packets until we get signal. */
	signal(SIGALRM, tcp_dumpstats);

	/* Wait for "go" signal. */
	if (read(fd, &c, 1) != 1)
		err(1, "Reading go signal");

	while (write(sock, &c, 1) > 0) {
		bytes++;
		if (read(sock, &c, 1) != 1)
			err(1, "Short read");
	}
	err(1, "Short write");
}

static unsigned int host_to_ipaddr(const char *name)
{
	struct hostent *host;
	unsigned int ip = 0;

	host = gethostbyname(name);
	if (!host)
		err(1, "gethostbyname(%s)", name);

	if (host->h_addrtype != AF_INET ||
	    host->h_length != sizeof(struct in_addr))
		errx(1, "gethostbyname gave wrong type");

	memcpy(&ip, host->h_addr_list[0], 4);
	return ip;
}

static void usage(void)
{
	errx(1,
	     "Usage: udpstress -r portnum number OR\n"
	     "	udpstress portnum number ipaddr timeout");
}

int main(int argc, char *argv[])
{
	unsigned int ip, i, port, kids;
	int fds[2];

	if (argc < 4)
		usage();

	if (pipe(fds) != 0)
		err(1, "Creating pipe");

	signal(SIGALRM, SIG_IGN);
	setsid();

	if (strcmp(argv[1], "-r") == 0) {
		port = atoi(argv[2]);
		kids = atoi(argv[3]);
		for (i = 0; i < kids; i++) {
			switch (fork()) {
			case 0:
				udp_receiver(port + i);
			case -1:
				err(1, "forking");
			}
		}

		/* Finally, receive TCP until the sender finishes. */
		tcp_receiver(port);
		{
			char buffer[128];

			/* Don't just use printf here; gets overwritten by
			 * other children when writing to file. */
			sprintf(buffer, "TCP: %u\n", bytes);
			write(STDOUT_FILENO, buffer, strlen(buffer));
		}
	} else {
		char buffer[kids = atoi(argv[2])+1];
		port = atoi(argv[1]);
		ip = host_to_ipaddr(argv[3]);
		for (i = 0; i < kids - 1; i++) {
			switch (fork()) {
			case 0:
				udp_client(ip, port + i, fds[0]);
			case -1:
				err(1, "forking");
			}
		}
		/* Finally, TCP sender. */
		switch (fork()) {
		case 0:
			tcp_client(ip, port, fds[0]);
		case -1:
			err(1, "forking");
		}

		/* Wait for them to settle. */
		sleep(1);

		if (write(fds[1], buffer, kids) != kids)
			errx(1, "Short write on pipe");
		sleep(atoi(argv[4]));
	}

	/* Kill kids. */
	kill(-getpid(), SIGALRM);

	/* We want to exit last. */
	for (i = 0; i < kids; i++)
		wait(NULL);
	return 0;
}


["results.gnumeric" (application/x-gnumeric)]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic