[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    Re: dwqe Rx IP header checksum offload
From:       Mark Kettenis <mark.kettenis () xs4all ! nl>
Date:       2024-04-30 22:59:39
Message-ID: 87o79qe9qk.fsf () bloch ! sibelius ! xs4all ! nl
[Download RAW message or body]

> Date: Wed, 01 May 2024 00:56:48 +0200
> From: Mark Kettenis <mark.kettenis@xs4all.nl>
> 
> > Date: Wed, 24 Apr 2024 14:47:46 +0200
> > From: Stefan Sperling <stsp@stsp.name>
> > 
> > On Wed, Apr 24, 2024 at 11:55:12AM +0200, Stefan Sperling wrote:
> > > On Sat, Apr 13, 2024 at 02:16:42AM +0200, Uwe Stuehler wrote:
> > > > Here's an updated diff that checks if the information in rxd->sd_tdes1
> > > > is valid and then whether (RDES1_IP_HDR_ERROR | RDES1_IP_CSUM_ERROR) is
> > > > set before marking the packet as hardware checksum failed and that all
> > > > three bits are clear before it marks the packet as hardware checksummed.
> > > 
> > > I would like to start with a simpler change: Let's adjust our macros
> > > for Rx descriptors. They are currently a confusing mix between the
> > > read and writeback formats. And they are incomplete for purposes
> > > such as checksum offloading.
> > > 
> > > This patch does not change the driver's behaviour but should help
> > > with making sense of things going forward.
> > 
> > New diff, built on top of the previous dwqereg.h diff, which
> > adds checksumming offload support.
> > 
> > Seems to work in my testing, based on debug prints I added locally.
> > I did not try to trigger the error path yet.
> > 
> > dwqe_rx_csum: UDP payload detected
> > dwqe_rx_csum: IPv4 checksum OK
> > dwqe_rx_csum: UDP checksum OK
> > dwqe_rx_csum: UDP payload detected
> > dwqe_rx_csum: IPv4 checksum OK
> > dwqe_rx_csum: UDP checksum OK
> > dwqe_rx_csum: checksumming engine bypassed
> > dwqe_rx_csum: UDP payload detected
> > dwqe_rx_csum: IPv4 checksum OK
> > dwqe_rx_csum: UDP checksum OK
> > dwqe_rx_csum: checksumming engine bypassed
> > 
> >  M  sys/dev/ic/dwqe.c     |  67+  0-
> >  M  sys/dev/ic/dwqereg.h  |   3+  0-
> > 
> > 2 files changed, 70 insertions(+), 0 deletions(-)
> 
> Tested on a RK3566 board, which does implement the feature.  Not an
> expert in this area, but looks reasonable to me.
> 
> ok kettenis@

Oh, and it does help a bit with tcpbench throughput going from around
560 Mbps to 660 Mbps.

> > diff refs/heads/dwqe-rxbits 454ea1e979b249c662541436b74f4168b86c9ab0
> > commit - d5cd332729e0a5bb8c36ede324cb8340cfa43a8f
> > commit + 454ea1e979b249c662541436b74f4168b86c9ab0
> > blob - 0467a4c72472b70f889e8fcdced0660fa6665a8f
> > blob + d06711527881f5a175d360e1e77b6bf47790dce6
> > --- sys/dev/ic/dwqe.c
> > +++ sys/dev/ic/dwqe.c
> > @@ -641,7 +641,67 @@ dwqe_tx_proc(struct dwqe_softc *sc)
> >  	}
> >  }
> >  
> > +int
> > +dwqe_have_rx_csum_offload(struct dwqe_softc *sc)
> > +{
> > +	return (sc->sc_hw_feature[0] & GMAC_MAC_HW_FEATURE0_RXCOESEL);
> > +}
> > +
> >  void
> > +dwqe_rx_csum(struct dwqe_softc *sc, struct mbuf *m, struct dwqe_desc *rxd)
> > +{
> > +	uint16_t csum_flags = 0;
> > +
> > +	/*
> > +	 * Checksum offload must be supported, the Last-Descriptor bit
> > +	 * must be set, RDES1 must be valid, and checksumming must not
> > +	 * have been bypassed (happens for unknown packet types), and
> > +	 * an IP header must have been detected.
> > +	 */
> > +	if (!dwqe_have_rx_csum_offload(sc) ||
> > +	    (rxd->sd_tdes3 & RDES3_LD) == 0 ||
> > +	    (rxd->sd_tdes3 & RDES3_RDES1_VALID) == 0 ||
> > +	    (rxd->sd_tdes1 & RDES1_IP_CSUM_BYPASS) ||
> > +	    (rxd->sd_tdes1 & (RDES1_IPV4_HDR | RDES1_IPV6_HDR)) == 0)
> > +		return;
> > +
> > +	/* If the IP header checksum is invalid then the payload is ignored. */
> > +	if (rxd->sd_tdes1 & RDES1_IP_HDR_ERROR) {
> > +		if (rxd->sd_tdes1 & RDES1_IPV4_HDR)
> > +			csum_flags |= M_IPV4_CSUM_IN_BAD;
> > +	} else {
> > +		if (rxd->sd_tdes1 & RDES1_IPV4_HDR)
> > +			csum_flags |= M_IPV4_CSUM_IN_OK;
> > +
> > +		/* Detect payload type and corresponding checksum errors. */
> > +		switch (rxd->sd_tdes1 & RDES1_IP_PAYLOAD_TYPE) {
> > +		case RDES1_IP_PAYLOAD_UDP:
> > +			if (rxd->sd_tdes1 & RDES1_IP_PAYLOAD_ERROR)
> > +				csum_flags |= M_UDP_CSUM_IN_BAD;
> > +			else
> > +				csum_flags |= M_UDP_CSUM_IN_OK;
> > +			break;
> > +		case RDES1_IP_PAYLOAD_TCP:
> > +			if (rxd->sd_tdes1 & RDES1_IP_PAYLOAD_ERROR)
> > +				csum_flags |= M_TCP_CSUM_IN_BAD;
> > +			else
> > +				csum_flags |= M_TCP_CSUM_IN_OK;
> > +			break;
> > +		case RDES1_IP_PAYLOAD_ICMP:
> > +			if (rxd->sd_tdes1 & RDES1_IP_PAYLOAD_ERROR)
> > +				csum_flags |= M_ICMP_CSUM_IN_BAD;
> > +			else
> > +				csum_flags |= M_ICMP_CSUM_IN_OK;
> > +			break;
> > +		default:
> > +			break;
> > +		}
> > +	}
> > +
> > +	m->m_pkthdr.csum_flags |= csum_flags;
> > +}
> > +
> > +void
> >  dwqe_rx_proc(struct dwqe_softc *sc)
> >  {
> >  	struct ifnet *ifp = &sc->sc_ac.ac_if;
> > @@ -689,6 +749,7 @@ dwqe_rx_proc(struct dwqe_softc *sc)
> >  
> >  			m->m_pkthdr.len = m->m_len = len;
> >  
> > +			dwqe_rx_csum(sc, m, rxd);
> >  			ml_enqueue(&ml, m);
> >  		}
> >  
> > @@ -864,6 +925,12 @@ dwqe_up(struct dwqe_softc *sc)
> >  
> >  	if (!sc->sc_fixed_link)
> >  		timeout_add_sec(&sc->sc_phy_tick, 1);
> > +
> > +	if (dwqe_have_rx_csum_offload(sc)) {
> > +		reg = dwqe_read(sc, GMAC_MAC_CONF);
> > +		reg |= GMAC_MAC_CONF_IPC;
> > +		dwqe_write(sc, GMAC_MAC_CONF, reg);
> > +	}
> >  }
> >  
> >  void
> > blob - f51d3b0a4717e4e0fd6dd1dd33b9301e27ff261f
> > blob + 532cbe3c60f46e507b7a1fd1770a0dc1744eb2d3
> > --- sys/dev/ic/dwqereg.h
> > +++ sys/dev/ic/dwqereg.h
> > @@ -17,6 +17,7 @@
> >   */
> >  
> >  #define GMAC_MAC_CONF		0x0000
> > +#define  GMAC_MAC_CONF_IPC		(1 << 27)
> >  #define  GMAC_MAC_CONF_CST		(1 << 21)
> >  #define  GMAC_MAC_CONF_ACS		(1 << 20)
> >  #define  GMAC_MAC_CONF_BE		(1 << 18)
> > @@ -61,6 +62,8 @@
> >  #define GMAC_VERSION		0x0110
> >  #define  GMAC_VERSION_SNPS_MASK		0xff
> >  #define GMAC_MAC_HW_FEATURE(x)	(0x011c + (x) * 0x4)
> > +#define  GMAC_MAC_HW_FEATURE0_TXCOESEL	(1 << 14)
> > +#define  GMAC_MAC_HW_FEATURE0_RXCOESEL	(1 << 16)
> >  #define  GMAC_MAC_HW_FEATURE1_TXFIFOSIZE(x) (((x) >> 6) & 0x1f)
> >  #define  GMAC_MAC_HW_FEATURE1_RXFIFOSIZE(x) (((x) >> 0) & 0x1f)
> >  #define GMAC_MAC_MDIO_ADDR	0x0200
> > 
> > 
> 
> 

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic