[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    Re: mgre(4): point-to-multipoint gre tunnels
From:       David Gwynne <david () gwynne ! id ! au>
Date:       2018-02-27 4:55:53
Message-ID: 20180227045553.GM20611 () animata ! net
[Download RAW message or body]

On Mon, Feb 26, 2018 at 04:12:26PM +0100, Martin Pieuchot wrote:
> On 26/02/18(Mon) 11:32, David Gwynne wrote:

> > mgre on the other hand is more like an ethernet interface, but with
> > ip addresses instead of mac addresses. mgre(4) only needs configuration
> > of a local tunnel address (but takes src and dst at the moment cos
> > we have no way of only configuring a local address), and you configure
> > a subnet on the interface: eg, to configure mgre0 instead of gre0:
> > 
> > # ifconfig mgre0 tunnel 192.0.2.1 192.0.2.1 # dst is accepted but ignored
> 
> I think this should be changed based on the IFF_MULTIPOINT suggestion
> claudio@ made privately.  If we don't need a destination then let's not
> require one ;)

the diff below fixes that. it changes ifconfig and mgre so only the
local address can be configured via SIOCSLIFPHYADDR:

dlg@m4k1 ~$ ifconfig mgre0                                                   
mgre0: flags=0<> mtu 1476
	index 9 priority 0 llprio 3
	encap: vnetid none
	groups: mgre
	tunnel: (unset) ttl 64 nodf
	inet 192.168.0.1 netmask 0xffffff00
dlg@m4k1 ~$ sudo ifconfig mgre0 tunnel 192.0.2.1 198.51.100.1
ifconfig: SIOCSLIFPHYADDR: Invalid argument
dlg@m4k1 ~$ ifconfig mgre0                                                     
mgre0: flags=0<> mtu 1476
	index 9 priority 0 llprio 3
	encap: vnetid none
	groups: mgre
	tunnel: (unset) ttl 64 nodf
	inet 192.168.0.1 netmask 0xffffff00
dlg@m4k1 ~$ sudo ifconfig mgre0 tunneladdr 192.0.2.1
dlg@m4k1 ~$ ifconfig mgre0                                                     
mgre0: flags=0<> mtu 1476
	index 9 priority 0 llprio 3
	encap: vnetid none
	groups: mgre
	tunnel: inet 192.0.2.1 ttl 64 nodf
	inet 192.168.0.1 netmask 0xffffff00

> 
> > Index: if_gre.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_gre.c,v
> > retrieving revision 1.114
> > diff -u -p -r1.114 if_gre.c
> > --- if_gre.c	25 Feb 2018 01:52:25 -0000	1.114
> > +++ if_gre.c	25 Feb 2018 21:42:54 -0000
> > @@ -187,6 +187,9 @@ struct gre_tunnel {
> >  };
> >  
> >  static int
> > +		gre_cmp_src(const struct gre_tunnel *,
> > +		    const struct gre_tunnel *);
> > +static int
> >  		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
> >  
> >  static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
> > @@ -217,11 +220,11 @@ static int	gre_tunnel_ioctl(struct ifnet
> >   */
> >  
> >  struct gre_softc {
> > -	struct ifnet		sc_if;
> > -
> > -	struct gre_tunnel	sc_tunnel;
> > +	struct gre_tunnel	sc_tunnel; /* must be first */
> 
> This scares me a bit.  All other interfaces start with a `struct ifnet'.
> I'm afraid we might have some code assuming it is the first element.

driver with ifnet as the first element of their softc are in the
minority, by far. most network drivers are for hardware, which must
have struct device up front.

> It looks like a micro-optimisation for a memcmp() that I wouldn't do.

it's so l3 input handling can get at the tunnel properties for gre
and mgre in a common way, which is used for the inbound ttl and
flowid mapping.

> 
> >  	TAILQ_ENTRY(gre_softc)	sc_entry;
> >  
> > +	struct ifnet		sc_if;
> > +
> >  	struct timeout		sc_ka_send;
> >  	struct timeout		sc_ka_hold;
> >  
> > @@ -264,13 +267,48 @@ static void	gre_link_state(struct gre_so
> >  
> >  static int	gre_input_key(struct mbuf **, int *, int, int,
> >  		    struct gre_tunnel *);
> > -static struct gre_softc *
> > -		gre_find(const struct gre_tunnel *);
> >  
> >  static void	gre_keepalive_send(void *);
> >  static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
> >  static void	gre_keepalive_hold(void *);
> >  
> > +static struct mbuf *
> > +		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
> > +		    struct mbuf *m, sa_family_t);
> > +
> > +#define gre_l3_encap(_t, _m, _af) \
> > +		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
> > +
> > +struct mgre_softc {
> > +	struct gre_tunnel	sc_tunnel; /* must be first */
> > +	RBT_ENTRY(mgre_softc)	sc_entry;
> > +
> > +	struct ifnet		sc_if;
> 
> Obviously same here.

before i sent this diff out i tried putting gre_tunnel and struct
ifnet together into struct grecom, like arpcom for ethernet. it
made the diff huge, and the code was uglier imo.

> > @@ -498,6 +537,59 @@ gre_clone_destroy(struct ifnet *ifp)
> >  }
> >  
> >  static int
> > +mgre_clone_create(struct if_clone *ifc, int unit)
> > +{
> > +	struct mgre_softc *sc;
> > +	struct ifnet *ifp;
> > +
> > +	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
> > +	ifp = &sc->sc_if;
> > +
> > +	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
> > +	    "%s%d", ifc->ifc_name, unit);
> > +
> > +	ifp->if_softc = sc;
> > +	ifp->if_type = IFT_L3IPVLAN;
> > +	ifp->if_hdrlen = 24; /* IP + GRE */
> 
> Define maybe?

sure. if_hdrlen isnt used in the kernel, so part of me thinks we
should just get rid of it.

> > +	ifp->if_mtu = GREMTU;
> > +	ifp->if_flags = 0; /* it's not p2p, and can't mcast or bcast */
> > +	ifp->if_xflags = IFXF_CLONED;
> > +	ifp->if_rtrequest = p2p_rtrequest; /* maybe? */;
> 
> Better have an empty mgre_rtrequest() to not inherit old hacks.  Plus
> if you plan to keep packets on a hold list while an entry is being
> resolve you already have the function to empty the list when the
> entry is added to the kernel.

ill work on that next.

> > @@ -681,20 +773,32 @@ gre_find(const struct gre_tunnel *key)
> >  		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
> >  			continue;
> >  
> > -		return (sc);
> > +		return (&sc->sc_if);
> >  	}
> >  
> >  	return (NULL);
> >  }
> >  
> > +static inline struct ifnet *
> > +mgre_find(const struct gre_tunnel *key)
> > +{
> > +	struct mgre_softc *sc;
> 
> I'd appreciate if functions like this one, accessing a global data
> structure currently protected by the NET_LOCK() would contain a
> NET_ASSERT_LOCKED() ;)

ok. there's a few of those i can tweak.

> > +	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
> > +		error = ENETUNREACH;
> > +		goto drop;
> > +	}
> > +
> > +	rt = rt_getll(rt0);
> 
> You only need rt_getll() if `rt0' can be a RTF_GATEWAY entry.  You're
> doping them below, so you can simply "s/rt0/rt/" ;)

that makes it hard to do the right return code for RTF_REJECT routes,
just below:

> > +	/* chech rt_expire? */
> > +	if (ISSET(rt->rt_flags, RTF_REJECT)) {
> > +		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
> > +		goto drop;
> > +	}

here's the ifconfig tunneladdr diff:

Index: sbin/ifconfig/ifconfig.c
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.360
diff -u -p -r1.360 ifconfig.c
--- sbin/ifconfig/ifconfig.c	20 Feb 2018 15:33:16 -0000	1.360
+++ sbin/ifconfig/ifconfig.c	27 Feb 2018 04:39:16 -0000
@@ -200,6 +200,7 @@ void	unsetifnwflag(const char *, int);
 void	setifnetmask(const char *, int);
 void	setifprefixlen(const char *, int);
 void	settunnel(const char *, const char *);
+void	settunneladdr(const char *, int);
 void	deletetunnel(const char *, int);
 void	settunnelinst(const char *, int);
 void	settunnelttl(const char *, int);
@@ -451,6 +452,7 @@ const struct	cmd {
 	{ "defer",	1,		0,		setpfsync_defer },
 	{ "-defer",	0,		0,		setpfsync_defer },
 	{ "tunnel",	NEXTARG2,	0,		NULL, settunnel },
+	{ "tunneladdr",	NEXTARG,	0,		settunneladdr },
 	{ "-tunnel",	0,		0,		deletetunnel },
 	/* deletetunnel is for backward compat, remove during 6.4-current */
 	{ "deletetunnel",  0,		0,		deletetunnel },
@@ -2734,42 +2736,76 @@ print_media_word(uint64_t ifmw, int prin
 		printf(" instance %lld", IFM_INST(ifmw));
 }
 
-/* ARGSUSED */
 static void
-phys_status(int force)
+print_tunnel(const struct if_laddrreq *req)
 {
 	char psrcaddr[NI_MAXHOST];
 	char pdstaddr[NI_MAXHOST];
 	const char *ver = "";
 	const int niflag = NI_NUMERICHOST;
-	struct if_laddrreq req;
-	in_port_t dstport = 0;
+
+	if (req == NULL) {
+		printf("(unset)");
+		return;
+	}
 
 	psrcaddr[0] = pdstaddr[0] = '\0';
 
-	memset(&req, 0, sizeof(req));
-	(void) strlcpy(req.iflr_name, name, sizeof(req.iflr_name));
-	if (ioctl(s, SIOCGLIFPHYADDR, (caddr_t)&req) < 0)
-		return;
-	if (getnameinfo((struct sockaddr *)&req.addr, req.addr.ss_len,
+	if (getnameinfo((struct sockaddr *)&req->addr, req->addr.ss_len,
 	    psrcaddr, sizeof(psrcaddr), 0, 0, niflag) != 0)
 		strlcpy(psrcaddr, "<error>", sizeof(psrcaddr));
-	if (req.addr.ss_family == AF_INET6)
+	if (req->addr.ss_family == AF_INET6)
 		ver = "6";
 
-	if (req.dstaddr.ss_family == AF_INET)
-		dstport = ((struct sockaddr_in *)&req.dstaddr)->sin_port;
-	else if (req.dstaddr.ss_family == AF_INET6)
-		dstport = ((struct sockaddr_in6 *)&req.dstaddr)->sin6_port;
-	if (getnameinfo((struct sockaddr *)&req.dstaddr, req.dstaddr.ss_len,
-	    pdstaddr, sizeof(pdstaddr), 0, 0, niflag) != 0)
-		strlcpy(pdstaddr, "<error>", sizeof(pdstaddr));
+	printf("inet%s %s", ver, psrcaddr);
+
+	if (req->dstaddr.ss_family != AF_UNSPEC) {
+		in_port_t dstport = 0;
+		const struct sockaddr_in *sin;
+		const struct sockaddr_in6 *sin6;
+
+		if (getnameinfo((struct sockaddr *)&req->dstaddr,
+		    req->dstaddr.ss_len, pdstaddr, sizeof(pdstaddr),
+		    0, 0, niflag) != 0)
+			strlcpy(pdstaddr, "<error>", sizeof(pdstaddr));
+
+		printf(" -> %s", pdstaddr);
+
+		switch (req->dstaddr.ss_family) {
+		case AF_INET:
+			sin = (const struct sockaddr_in *)&req->dstaddr;
+			dstport = sin->sin_port;
+			break;
+		case AF_INET6:
+			sin6 = (const struct sockaddr_in6 *)&req->dstaddr;
+			dstport = sin6->sin6_port;
+			break;
+		}
+
+		if (dstport)
+			printf(":%u", ntohs(dstport));
+	}
+}
+
+/* ARGSUSED */
+static void
+phys_status(int force)
+{
+	struct if_laddrreq req;
+	struct if_laddrreq *r = &req;
+
+	memset(&req, 0, sizeof(req));
+	(void) strlcpy(req.iflr_name, name, sizeof(req.iflr_name));
+	if (ioctl(s, SIOCGLIFPHYADDR, (caddr_t)&req) < 0) {
+		if (errno != EADDRNOTAVAIL)
+			return;
 
-	printf("\ttunnel: inet%s %s -> %s", ver,
-	    psrcaddr, pdstaddr);
+		r = NULL;
+	}
+
+	printf("\ttunnel: ");
+	print_tunnel(r);
 
-	if (dstport)
-		printf(":%u", ntohs(dstport));
 	if (ioctl(s, SIOCGLIFPHYTTL, (caddr_t)&ifr) == 0) {
 		if (ifr.ifr_ttl == -1)
 			printf(" ttl copy");
@@ -3269,6 +3305,40 @@ settunnel(const char *src, const char *d
 
 	freeaddrinfo(srcres);
 	freeaddrinfo(dstres);
+}
+
+void
+settunneladdr(const char *addr, int ignored)
+{
+	struct addrinfo hints, *res;
+	struct if_laddrreq req;
+	ssize_t len;
+	int rv;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_UNSPEC;
+	hints.ai_socktype = SOCK_DGRAM;
+	hints.ai_protocol = 0;
+	hints.ai_flags = AI_PASSIVE;
+
+	rv = getaddrinfo(addr, NULL, &hints, &res);
+	if (rv != 0)
+		errx(1, "tunneladdr %s: %s", addr, gai_strerror(rv));
+
+	memset(&req, 0, sizeof(req));
+	len = strlcpy(req.iflr_name, name, sizeof(req.iflr_name));
+	if (len >= sizeof(req.iflr_name))
+		errx(1, "%s: Interface name too long", name);
+
+	memcpy(&req.addr, res->ai_addr, res->ai_addrlen);
+
+	req.dstaddr.ss_len = 2;
+	req.dstaddr.ss_family = AF_UNSPEC;
+
+	if (ioctl(s, SIOCSLIFPHYADDR, &req) < 0)
+		warn("tunneladdr %s", addr);
+
+	freeaddrinfo(res);
 }
 
 /* ARGSUSED */
Index: sys/net/if_gre.c
===================================================================
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.115
diff -u -p -r1.115 if_gre.c
--- sys/net/if_gre.c	27 Feb 2018 04:36:18 -0000	1.115
+++ sys/net/if_gre.c	27 Feb 2018 04:39:16 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: if_gre.c,v 1.115 2018/02/27 04:36:18 dlg Exp $ */
+/*	$OpenBSD: if_gre.c,v 1.114 2018/02/25 01:52:25 dlg Exp $ */
 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
 
 /*
@@ -304,6 +304,8 @@ static int	mgre_output(struct ifnet *, s
 static void	mgre_start(struct ifnet *);
 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
 
+static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
+static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
 static int	mgre_up(struct mgre_softc *);
 static int	mgre_down(struct mgre_softc *);
 
@@ -2026,7 +2028,16 @@ mgre_ioctl(struct ifnet *ifp, u_long cmd
 		break;
 
 	case SIOCSLIFPHYADDR:
-		/* XXX */
+		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+			error = EBUSY;
+			break;
+		}
+		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
+		break;
+	case SIOCGLIFPHYADDR:
+		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
+		break;
+
 	case SIOCSVNETID:
 	case SIOCDVNETID:
 	case SIOCDIFPHYADDR:
@@ -2043,6 +2054,104 @@ mgre_ioctl(struct ifnet *ifp, u_long cmd
  	}
 
 	return (error);
+}
+
+static int
+mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
+{
+	struct gre_tunnel *tunnel = &sc->sc_tunnel;
+	struct sockaddr *addr = (struct sockaddr *)&req->addr;
+	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
+	struct sockaddr_in *addr4;
+#ifdef INET6
+	struct sockaddr_in6 *addr6;
+	int error;
+#endif
+
+	if (dstaddr->sa_family != AF_UNSPEC)
+		return (EINVAL);
+
+	/* validate */
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (addr->sa_len != sizeof(*addr4))
+			return (EINVAL);
+
+		addr4 = (struct sockaddr_in *)addr;
+		if (in_nullhost(addr4->sin_addr) ||
+		    IN_MULTICAST(addr4->sin_addr.s_addr))
+			return (EINVAL);
+
+		tunnel->t_src4 = addr4->sin_addr;
+		tunnel->t_dst4.s_addr = INADDR_ANY;
+
+		break;
+#ifdef INET6
+	case AF_INET6:
+		if (addr->sa_len != sizeof(*addr6))
+			return (EINVAL);
+
+		addr6 = (struct sockaddr_in6 *)addr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
+		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
+			return (EINVAL);
+
+		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
+		if (error != 0)
+			return (error);
+
+		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
+
+		break;
+#endif
+	default:
+		return (EAFNOSUPPORT);
+	}
+
+	/* commit */
+	tunnel->t_af = addr->sa_family;
+
+	return (0);
+}
+
+static int
+mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
+{
+	struct gre_tunnel *tunnel = &sc->sc_tunnel;
+	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
+	struct sockaddr_in *sin;
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+#endif
+
+	switch (tunnel->t_af) {
+	case AF_UNSPEC:
+		return (EADDRNOTAVAIL);
+	case AF_INET:
+		sin = (struct sockaddr_in *)&req->addr;
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = tunnel->t_src4;
+		break;
+
+#ifdef INET6
+	case AF_INET6:
+		sin6 = (struct sockaddr_in6 *)&req->addr;
+		memset(sin6, 0, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		in6_recoverscope(sin6, &tunnel->t_src6);
+		break;
+#endif
+	default:
+		unhandled_af(tunnel->t_af);
+	}
+
+	dstaddr->sa_len = 2;
+	dstaddr->sa_family = AF_UNSPEC;
+
+	return (0);
 }
 
 static int

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic