[prev in list] [next in list] [prev in thread] [next in thread]
List: openbsd-tech
Subject: bge: reliable tx with msi and tagged status (tests needed)
From: Mike Belopuhov <mike () belopuhov ! com>
Date: 2013-06-25 15:52:25
Message-ID: 20130625155225.GD17485 () gir ! theapt ! org
[Download RAW message or body]
Hi,
This is a mail I've sent to dlg@ and kettenis@ and didn't
get an objection, but I would like a more widespread testing
to happen with this diff.
Please try it on all bge's you can. Your help is much
appreciated!
--------8<--------
I find the transmit path on 5719 here to be of questionable
reliability and it's certainly not something we can use in
production here. If I run tcpbench in different rdomains
on the same machine with BCM5719 against an Intel 82580, I
can see the following picture for nearly every TCP stream
that originates from the bge: http://pastie.org/8004262
I have verified that doing so between two 82580's or from
82580 to a bnx on the other machine doesn't produce this
result. In fact even sending from the em towards bge
works fine, but if bge wants to transmit a lot I loose.
The culprit seems to be that the BGE_STATFLAG_UPDATED bit
in the statusword in the bge status block doesn't get
properly updated every time and we sometimes end up with
a stray interrupt.
Other systems don't use this since they're doing MSI for
newer controllers which implies tagged status and this bit
is no longer relevant there. Making us do the same does
fix the issue for me.
The bge_intr chunk of the diff looks complicated, but in
fact it's due to the indentation change. The condition
to return without acknowledging the interrupt is changed
to check the last status AND read the pci interrupt status
(like Linux does) for the MSI case and nothing is changed
for the !MSI case.
--------8<--------
diff --git sys/dev/pci/if_bge.c sys/dev/pci/if_bge.c
index 05bfaa4..f87b1fe 100644
--- sys/dev/pci/if_bge.c
+++ sys/dev/pci/if_bge.c
@@ -1623,16 +1623,19 @@ bge_phy_addr(struct bge_softc *sc)
*/
void
bge_chipinit(struct bge_softc *sc)
{
struct pci_attach_args *pa = &(sc->bge_pa);
- u_int32_t dma_rw_ctl, mode_ctl;
+ u_int32_t dma_rw_ctl, misc_ctl, mode_ctl;
int i;
/* Set endianness before we access any non-PCI registers. */
+ misc_ctl = BGE_INIT;
+ if (sc->bge_flags & BGE_TAGGED_STATUS)
+ misc_ctl |= BGE_PCIMISCCTL_TAGGED_STATUS;
pci_conf_write(pa->pa_pc, pa->pa_tag, BGE_PCI_MISC_CTL,
- BGE_INIT);
+ misc_ctl);
/*
* Clear the MAC statistics block in the NIC's
* internal memory.
*/
@@ -2485,19 +2488,10 @@ bge_attach(struct device *parent, struct device *self, void *aux)
&sc->bge_bhandle, NULL, &size, 0)) {
printf(": can't find mem space\n");
return;
}
- DPRINTFN(5, ("pci_intr_map\n"));
- if (pci_intr_map(pa, &ih)) {
- printf(": couldn't map interrupt\n");
- goto fail_1;
- }
-
- DPRINTFN(5, ("pci_intr_string\n"));
- intrstr = pci_intr_string(pc, ih);
-
/*
* Kludge for 5700 Bx bug: a hardware bug (PCIX byte enable?)
* can clobber the chip's PCI config-space power control registers,
* leaving the card in D3 powersave state.
* We do not have memory-mapped registers in this state,
@@ -2749,10 +2743,21 @@ bge_attach(struct device *parent, struct device *self, void *aux)
BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5761 ||
BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5785 ||
BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM57780)
sc->bge_flags |= BGE_CPMU_PRESENT;
+ DPRINTFN(5, ("pci_intr_map\n"));
+ if (BGE_IS_5717_PLUS(sc) && pci_intr_map_msi(pa, &ih) == 0)
+ sc->bge_flags |= BGE_TAGGED_STATUS;
+ else if (pci_intr_map(pa, &ih)) {
+ printf(": couldn't map interrupt\n");
+ goto fail_1;
+ }
+
+ DPRINTFN(5, ("pci_intr_string\n"));
+ intrstr = pci_intr_string(pc, ih);
+
/* Try to reset the chip. */
DPRINTFN(5, ("bge_reset\n"));
bge_sig_pre_reset(sc, BGE_RESET_START);
bge_reset(sc);
@@ -2936,10 +2941,18 @@ bge_attach(struct device *parent, struct device *self, void *aux)
sc->bge_flags |= BGE_PHY_FIBER_TBI;
else
sc->bge_flags |= BGE_PHY_FIBER_MII;
}
+ /* Take advantage of single-shot MSI. */
+ if ((BGE_IS_5717_PLUS(sc) && sc->bge_flags & BGE_TAGGED_STATUS)) {
+ reg = CSR_READ_4(sc, BGE_MSI_MODE);
+ reg &= ~BGE_MSIMODE_ONE_SHOT_DISABLE;
+ reg |= BGE_MSIMODE_ENABLE;
+ CSR_WRITE_4(sc, BGE_MSI_MODE, reg);
+ }
+
/* Hookup IRQ last. */
DPRINTFN(5, ("pci_intr_establish\n"));
sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET, bge_intr, sc,
sc->bge_dev.dv_xname);
if (sc->bge_intrhand == NULL) {
@@ -3505,68 +3518,68 @@ bge_txeof(struct bge_softc *sc)
int
bge_intr(void *xsc)
{
struct bge_softc *sc;
struct ifnet *ifp;
- u_int32_t statusword;
- u_int32_t intrmask = BGE_PCISTATE_INTR_NOT_ACTIVE;
+ u_int32_t statusword, statustag;
sc = xsc;
ifp = &sc->arpcom.ac_if;
- if (BGE_IS_5717_PLUS(sc))
- intrmask = 0;
-
- /* It is possible for the interrupt to arrive before
- * the status block is updated prior to the interrupt.
- * Reading the PCI State register will confirm whether the
- * interrupt is ours and will flush the status block.
- */
-
/* read status word from status block */
bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
offsetof(struct bge_ring_data, bge_status_block),
sizeof (struct bge_status_block),
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
statusword = sc->bge_rdata->bge_status_block.bge_status;
+ statustag = sc->bge_rdata->bge_status_block.bge_status_tag << 24;
- if ((statusword & BGE_STATFLAG_UPDATED) ||
- (~CSR_READ_4(sc, BGE_PCI_PCISTATE) & intrmask)) {
-
+ if (sc->bge_flags & BGE_TAGGED_STATUS) {
+ if (sc->bge_lasttag == statustag &&
+ (CSR_READ_4(sc, BGE_PCI_PCISTATE) &
+ BGE_PCISTATE_INTR_NOT_ACTIVE))
+ return (0);
+ sc->bge_lasttag = statustag;
+ } else {
+ if (!(statusword & BGE_STATFLAG_UPDATED) &&
+ (CSR_READ_4(sc, BGE_PCI_PCISTATE) &
+ BGE_PCISTATE_INTR_NOT_ACTIVE))
+ return (0);
/* Ack interrupt and stop others from occurring. */
bge_writembx(sc, BGE_MBX_IRQ0_LO, 1);
-
- /* clear status word */
- sc->bge_rdata->bge_status_block.bge_status = 0;
+ statustag = 0;
+ }
- bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
- offsetof(struct bge_ring_data, bge_status_block),
- sizeof (struct bge_status_block),
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /* clear status word */
+ sc->bge_rdata->bge_status_block.bge_status = 0;
- if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
- statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
- BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
- bge_link_upd(sc);
+ bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
+ offsetof(struct bge_ring_data, bge_status_block),
+ sizeof (struct bge_status_block),
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- if (ifp->if_flags & IFF_RUNNING) {
- /* Check RX return ring producer/consumer */
- bge_rxeof(sc);
+ if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
+ statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
+ BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
+ bge_link_upd(sc);
- /* Check TX ring producer/consumer */
- bge_txeof(sc);
- }
+ /* Re-enable interrupts. */
+ bge_writembx(sc, BGE_MBX_IRQ0_LO, statustag);
- /* Re-enable interrupts. */
- bge_writembx(sc, BGE_MBX_IRQ0_LO, 0);
+ if (ifp->if_flags & IFF_RUNNING) {
+ /* Check RX return ring producer/consumer */
+ bge_rxeof(sc);
- bge_start(ifp);
+ /* Check TX ring producer/consumer */
+ bge_txeof(sc);
- return (1);
- } else
- return (0);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd))
+ bge_start(ifp);
+ }
+
+ return (1);
}
void
bge_tick(void *xsc)
{
diff --git sys/dev/pci/if_bgereg.h sys/dev/pci/if_bgereg.h
index 3685f14..72eba41 100644
--- sys/dev/pci/if_bgereg.h
+++ sys/dev/pci/if_bgereg.h
@@ -223,10 +223,11 @@
#define BGE_PCIMISCCTL_ENDIAN_WORDSWAP 0x00000008
#define BGE_PCIMISCCTL_PCISTATE_RW 0x00000010
#define BGE_PCIMISCCTL_CLOCKCTL_RW 0x00000020
#define BGE_PCIMISCCTL_REG_WORDSWAP 0x00000040
#define BGE_PCIMISCCTL_INDIRECT_ACCESS 0x00000080
+#define BGE_PCIMISCCTL_TAGGED_STATUS 0x00000200
#define BGE_PCIMISCCTL_ASICREV 0xFFFF0000
#define BGE_PCIMISCCTL_ASICREV_SHIFT 16
#if BYTE_ORDER == LITTLE_ENDIAN
#define BGE_DMA_SWAP_OPTIONS \
@@ -1917,12 +1918,11 @@
#define BGE_MSIMODE_RESET 0x00000001
#define BGE_MSIMODE_ENABLE 0x00000002
#define BGE_MSIMODE_PCI_TGT_ABRT_ATTN 0x00000004
#define BGE_MSIMODE_PCI_MSTR_ABRT_ATTN 0x00000008
#define BGE_MSIMODE_PCI_PERR_ATTN 0x00000010
-#define BGE_MSIMODE_MSI_FIFOUFLOW_ATTN 0x00000020
-#define BGE_MSIMODE_MSI_FIFOOFLOW_ATTN 0x00000040
+#define BGE_MSIMODE_ONE_SHOT_DISABLE 0x00000020
/* MSI status register */
#define BGE_MSISTAT_PCI_TGT_ABRT_ATTN 0x00000004
#define BGE_MSISTAT_PCI_MSTR_ABRT_ATTN 0x00000008
#define BGE_MSISTAT_PCI_PERR_ATTN 0x00000010
@@ -2399,11 +2399,11 @@ struct bge_sts_idx {
#endif
};
struct bge_status_block {
u_int32_t bge_status;
- u_int32_t bge_rsvd0;
+ u_int32_t bge_status_tag;
#if BYTE_ORDER == LITTLE_ENDIAN
u_int16_t bge_rx_jumbo_cons_idx;
u_int16_t bge_rx_std_cons_idx;
u_int16_t bge_rx_mini_cons_idx;
u_int16_t bge_rsvd1;
@@ -2808,10 +2808,11 @@ struct bge_softc {
struct mii_data bge_mii;
struct ifmedia bge_ifmedia; /* media info */
u_int32_t bge_expcap;
u_int32_t bge_mps;
u_int32_t bge_expmrq;
+ u_int32_t bge_lasttag;
u_int32_t bge_flags;
#define BGE_TXRING_VALID 0x00000001
#define BGE_RXRING_VALID 0x00000002
#define BGE_JUMBO_RXRING_VALID 0x00000004
#define BGE_RX_ALIGNBUG 0x00000008
@@ -2839,10 +2840,11 @@ struct bge_softc {
#define BGE_5700_FAMILY 0x02000000
#define BGE_5717_PLUS 0x04000000
#define BGE_57765_PLUS 0x08000000
#define BGE_APE 0x10000000
#define BGE_CPMU_PRESENT 0x20000000
+#define BGE_TAGGED_STATUS 0x40000000
bus_dma_tag_t bge_dmatag;
u_int32_t bge_mfw_flags; /* Management F/W flags */
#define BGE_MFW_ON_RXCPU 0x00000001
#define BGE_MFW_ON_APE 0x00000002
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic