[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    bge: reliable tx with msi and tagged status (tests needed)
From:       Mike Belopuhov <mike () belopuhov ! com>
Date:       2013-06-25 15:52:25
Message-ID: 20130625155225.GD17485 () gir ! theapt ! org
[Download RAW message or body]

Hi,

This is a mail I've sent to dlg@ and kettenis@ and didn't
get an objection, but I would like a more widespread testing
to happen with this diff.

Please try it on all bge's you can. Your help is much
appreciated!

--------8<--------

I find the transmit path on 5719 here to be of questionable
reliability and it's certainly not something we can use in
production here.  If I run tcpbench in different rdomains
on the same machine with BCM5719 against an Intel 82580, I
can see the following picture for nearly every TCP stream
that originates from the bge:  http://pastie.org/8004262

I have verified that doing so between two 82580's or from
82580 to a bnx on the other machine doesn't produce this
result.  In fact even sending from the em towards bge
works fine, but if bge wants to transmit a lot I loose.

The culprit seems to be that the BGE_STATFLAG_UPDATED bit
in the statusword in the bge status block doesn't get
properly updated every time and we sometimes end up with
a stray interrupt.

Other systems don't use this since they're doing MSI for
newer controllers which implies tagged status and this bit
is no longer relevant there.  Making us do the same does
fix the issue for me.

The bge_intr chunk of the diff looks complicated, but in
fact it's due to the indentation change.  The condition
to return without acknowledging the interrupt is changed
to check the last status AND read the pci interrupt status
(like Linux does) for the MSI case and nothing is changed
for the !MSI case.

--------8<--------

diff --git sys/dev/pci/if_bge.c sys/dev/pci/if_bge.c
index 05bfaa4..f87b1fe 100644
--- sys/dev/pci/if_bge.c
+++ sys/dev/pci/if_bge.c
@@ -1623,16 +1623,19 @@ bge_phy_addr(struct bge_softc *sc)
  */
 void
 bge_chipinit(struct bge_softc *sc)
 {
 	struct pci_attach_args	*pa = &(sc->bge_pa);
-	u_int32_t dma_rw_ctl, mode_ctl;
+	u_int32_t dma_rw_ctl, misc_ctl, mode_ctl;
 	int i;
 
 	/* Set endianness before we access any non-PCI registers. */
+	misc_ctl = BGE_INIT;
+	if (sc->bge_flags & BGE_TAGGED_STATUS)
+		misc_ctl |= BGE_PCIMISCCTL_TAGGED_STATUS;
 	pci_conf_write(pa->pa_pc, pa->pa_tag, BGE_PCI_MISC_CTL,
-	    BGE_INIT);
+	    misc_ctl);
 
 	/*
 	 * Clear the MAC statistics block in the NIC's
 	 * internal memory.
 	 */
@@ -2485,19 +2488,10 @@ bge_attach(struct device *parent, struct device *self, void *aux)
 	    &sc->bge_bhandle, NULL, &size, 0)) {
 		printf(": can't find mem space\n");
 		return;
 	}
 
-	DPRINTFN(5, ("pci_intr_map\n"));
-	if (pci_intr_map(pa, &ih)) {
-		printf(": couldn't map interrupt\n");
-		goto fail_1;
-	}
-
-	DPRINTFN(5, ("pci_intr_string\n"));
-	intrstr = pci_intr_string(pc, ih);
-
 	/*
 	 * Kludge for 5700 Bx bug: a hardware bug (PCIX byte enable?)
 	 * can clobber the chip's PCI config-space power control registers,
 	 * leaving the card in D3 powersave state.
 	 * We do not have memory-mapped registers in this state,
@@ -2749,10 +2743,21 @@ bge_attach(struct device *parent, struct device *self, void *aux)
 	    BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5761 ||
 	    BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5785 ||
 	    BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM57780)
 		sc->bge_flags |= BGE_CPMU_PRESENT;
 
+	DPRINTFN(5, ("pci_intr_map\n"));
+	if (BGE_IS_5717_PLUS(sc) && pci_intr_map_msi(pa, &ih) == 0)
+		sc->bge_flags |= BGE_TAGGED_STATUS;
+	else if (pci_intr_map(pa, &ih)) {
+		printf(": couldn't map interrupt\n");
+		goto fail_1;
+	}
+
+	DPRINTFN(5, ("pci_intr_string\n"));
+	intrstr = pci_intr_string(pc, ih);
+
 	/* Try to reset the chip. */
 	DPRINTFN(5, ("bge_reset\n"));
 	bge_sig_pre_reset(sc, BGE_RESET_START);
 	bge_reset(sc);
 
@@ -2936,10 +2941,18 @@ bge_attach(struct device *parent, struct device *self, void *aux)
 		    sc->bge_flags |= BGE_PHY_FIBER_TBI;
 		else
 		    sc->bge_flags |= BGE_PHY_FIBER_MII;
 	}
 
+	/* Take advantage of single-shot MSI. */
+	if ((BGE_IS_5717_PLUS(sc) && sc->bge_flags & BGE_TAGGED_STATUS)) {
+		reg = CSR_READ_4(sc, BGE_MSI_MODE);
+		reg &= ~BGE_MSIMODE_ONE_SHOT_DISABLE;
+		reg |= BGE_MSIMODE_ENABLE;
+		CSR_WRITE_4(sc, BGE_MSI_MODE, reg);
+	}
+
 	/* Hookup IRQ last. */
 	DPRINTFN(5, ("pci_intr_establish\n"));
 	sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET, bge_intr, sc,
 	    sc->bge_dev.dv_xname);
 	if (sc->bge_intrhand == NULL) {
@@ -3505,68 +3518,68 @@ bge_txeof(struct bge_softc *sc)
 int
 bge_intr(void *xsc)
 {
 	struct bge_softc *sc;
 	struct ifnet *ifp;
-	u_int32_t statusword;
-	u_int32_t intrmask = BGE_PCISTATE_INTR_NOT_ACTIVE;
+	u_int32_t statusword, statustag;
 
 	sc = xsc;
 	ifp = &sc->arpcom.ac_if;
 
-	if (BGE_IS_5717_PLUS(sc))
-		intrmask = 0;
-
-	/* It is possible for the interrupt to arrive before
-	 * the status block is updated prior to the interrupt.
-	 * Reading the PCI State register will confirm whether the
-	 * interrupt is ours and will flush the status block.
-	 */
-
 	/* read status word from status block */
 	bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
 	    offsetof(struct bge_ring_data, bge_status_block),
 	    sizeof (struct bge_status_block),
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	statusword = sc->bge_rdata->bge_status_block.bge_status;
+	statustag = sc->bge_rdata->bge_status_block.bge_status_tag << 24;
 
-	if ((statusword & BGE_STATFLAG_UPDATED) ||
-	    (~CSR_READ_4(sc, BGE_PCI_PCISTATE) & intrmask)) {
-
+	if (sc->bge_flags & BGE_TAGGED_STATUS) {
+		if (sc->bge_lasttag == statustag &&
+		    (CSR_READ_4(sc, BGE_PCI_PCISTATE) &
+		     BGE_PCISTATE_INTR_NOT_ACTIVE))
+			return (0);
+		sc->bge_lasttag = statustag;
+	} else {
+		if (!(statusword & BGE_STATFLAG_UPDATED) &&
+		    (CSR_READ_4(sc, BGE_PCI_PCISTATE) &
+		     BGE_PCISTATE_INTR_NOT_ACTIVE))
+			return (0);
 		/* Ack interrupt and stop others from occurring. */
 		bge_writembx(sc, BGE_MBX_IRQ0_LO, 1);
-			
-		/* clear status word */
-		sc->bge_rdata->bge_status_block.bge_status = 0;
+		statustag = 0;
+	}
 
-		bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
-		    offsetof(struct bge_ring_data, bge_status_block),
-		    sizeof (struct bge_status_block),
-		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	/* clear status word */
+	sc->bge_rdata->bge_status_block.bge_status = 0;
 
-		if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
-		    statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
-		    BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
-			bge_link_upd(sc);
+	bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
+	    offsetof(struct bge_ring_data, bge_status_block),
+	    sizeof (struct bge_status_block),
+	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-		if (ifp->if_flags & IFF_RUNNING) {
-			/* Check RX return ring producer/consumer */
-			bge_rxeof(sc);
+	if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
+	    statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
+	    BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
+		bge_link_upd(sc);
 
-			/* Check TX ring producer/consumer */
-			bge_txeof(sc);
-		}
+	/* Re-enable interrupts. */
+	bge_writembx(sc, BGE_MBX_IRQ0_LO, statustag);
 
-		/* Re-enable interrupts. */
-		bge_writembx(sc, BGE_MBX_IRQ0_LO, 0);
+	if (ifp->if_flags & IFF_RUNNING) {
+		/* Check RX return ring producer/consumer */
+		bge_rxeof(sc);
 
-		bge_start(ifp);
+		/* Check TX ring producer/consumer */
+		bge_txeof(sc);
 
-		return (1);
-	} else
-		return (0);
+		if (!IFQ_IS_EMPTY(&ifp->if_snd))
+			bge_start(ifp);
+	}
+
+	return (1);
 }
 
 void
 bge_tick(void *xsc)
 {
diff --git sys/dev/pci/if_bgereg.h sys/dev/pci/if_bgereg.h
index 3685f14..72eba41 100644
--- sys/dev/pci/if_bgereg.h
+++ sys/dev/pci/if_bgereg.h
@@ -223,10 +223,11 @@
 #define	BGE_PCIMISCCTL_ENDIAN_WORDSWAP	0x00000008
 #define	BGE_PCIMISCCTL_PCISTATE_RW	0x00000010
 #define	BGE_PCIMISCCTL_CLOCKCTL_RW	0x00000020
 #define	BGE_PCIMISCCTL_REG_WORDSWAP	0x00000040
 #define	BGE_PCIMISCCTL_INDIRECT_ACCESS	0x00000080
+#define	BGE_PCIMISCCTL_TAGGED_STATUS	0x00000200
 #define	BGE_PCIMISCCTL_ASICREV		0xFFFF0000
 #define	BGE_PCIMISCCTL_ASICREV_SHIFT	16
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 #define	BGE_DMA_SWAP_OPTIONS \
@@ -1917,12 +1918,11 @@
 #define	BGE_MSIMODE_RESET		0x00000001
 #define	BGE_MSIMODE_ENABLE		0x00000002
 #define	BGE_MSIMODE_PCI_TGT_ABRT_ATTN	0x00000004
 #define	BGE_MSIMODE_PCI_MSTR_ABRT_ATTN	0x00000008
 #define	BGE_MSIMODE_PCI_PERR_ATTN	0x00000010
-#define	BGE_MSIMODE_MSI_FIFOUFLOW_ATTN	0x00000020
-#define	BGE_MSIMODE_MSI_FIFOOFLOW_ATTN	0x00000040
+#define	BGE_MSIMODE_ONE_SHOT_DISABLE	0x00000020
 
 /* MSI status register */
 #define	BGE_MSISTAT_PCI_TGT_ABRT_ATTN	0x00000004
 #define	BGE_MSISTAT_PCI_MSTR_ABRT_ATTN	0x00000008
 #define	BGE_MSISTAT_PCI_PERR_ATTN	0x00000010
@@ -2399,11 +2399,11 @@ struct bge_sts_idx {
 #endif
 };
 
 struct bge_status_block {
 	u_int32_t		bge_status;
-	u_int32_t		bge_rsvd0;
+	u_int32_t		bge_status_tag;
 #if BYTE_ORDER == LITTLE_ENDIAN
 	u_int16_t		bge_rx_jumbo_cons_idx;
 	u_int16_t		bge_rx_std_cons_idx;
 	u_int16_t		bge_rx_mini_cons_idx;
 	u_int16_t		bge_rsvd1;
@@ -2808,10 +2808,11 @@ struct bge_softc {
 	struct mii_data		bge_mii;
 	struct ifmedia		bge_ifmedia;	/* media info */
 	u_int32_t		bge_expcap;
 	u_int32_t		bge_mps;
 	u_int32_t		bge_expmrq;
+	u_int32_t		bge_lasttag;
 	u_int32_t		bge_flags;
 #define	BGE_TXRING_VALID	0x00000001
 #define	BGE_RXRING_VALID	0x00000002
 #define	BGE_JUMBO_RXRING_VALID	0x00000004
 #define	BGE_RX_ALIGNBUG		0x00000008
@@ -2839,10 +2840,11 @@ struct bge_softc {
 #define	BGE_5700_FAMILY		0x02000000
 #define	BGE_5717_PLUS		0x04000000
 #define	BGE_57765_PLUS		0x08000000
 #define	BGE_APE			0x10000000
 #define	BGE_CPMU_PRESENT	0x20000000
+#define	BGE_TAGGED_STATUS	0x40000000
 
 	bus_dma_tag_t		bge_dmatag;
 	u_int32_t		bge_mfw_flags;  /* Management F/W flags */
 #define	BGE_MFW_ON_RXCPU	0x00000001
 #define	BGE_MFW_ON_APE		0x00000002

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic