[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-pci
Subject:    Re: [PATCH V10 08/12] powerpc/powernv: Support EEH reset for VF PE
From:       Alexey Kardashevskiy <aik () ozlabs ! ru>
Date:       2015-10-30 8:05:05
Message-ID: 563324B1.7040009 () ozlabs ! ru
[Download RAW message or body]

On 10/30/2015 06:18 PM, Wei Yang wrote:
> On Fri, Oct 30, 2015 at 03:11:20PM +1100, Alexey Kardashevskiy wrote:
> > On 10/26/2015 02:15 PM, Wei Yang wrote:
> > > PEs for VFs don't have primary bus. So they have to have their own reset
> > > backend, which is used during EEH recovery. The patch implements the reset
> > > backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained
> > > in the PE.
> > > 
> > > [gwshan: changelog and code refactoring]
> > > Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> > > Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> > > ---
> > > arch/powerpc/include/asm/eeh.h               |   1 +
> > > arch/powerpc/platforms/powernv/eeh-powernv.c | 134 ++++++++++++++++++++++++++-
> > > 2 files changed, 134 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> > > index ec21f8f..331c856 100644
> > > --- a/arch/powerpc/include/asm/eeh.h
> > > +++ b/arch/powerpc/include/asm/eeh.h
> > > @@ -136,6 +136,7 @@ struct eeh_dev {
> > > 	int pcix_cap;			/* Saved PCIx capability	*/
> > > 	int pcie_cap;			/* Saved PCIe capability	*/
> > > 	int aer_cap;			/* Saved AER capability		*/
> > > +	int af_cap;			/* Saved AF capability		*/
> > > 	struct eeh_pe *pe;		/* Associated PE		*/
> > > 	struct list_head list;		/* Form link list in the PE	*/
> > > 	struct pci_controller *phb;	/* Associated PHB		*/
> > > diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c \
> > > b/arch/powerpc/platforms/powernv/eeh-powernv.c index cfd55dd..017cd72 100644
> > > --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> > > +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> > > @@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
> > > 	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
> > > 	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
> > > 	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
> > > +	edev->af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
> > > 	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
> > > 		edev->mode |= EEH_DEV_BRIDGE;
> > > 		if (edev->pcie_cap) {
> > > @@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int \
> > > option)  return 0;
> > > }
> > > 
> > > +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos,
> > > +				     u16 mask, bool af_flr_rst)

Missed this - @af_flr_rst is only used for warnings so better do:
s/bool af_flr_rst/const char *reset_type/
to make it explicit.


> > > +{
> > > +	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
> > > +	int status, i;
> > > +
> > > +	/* Wait for Transaction Pending bit to be cleared */
> > > +	for (i = 0; i < 4; i++) {
> > > +		eeh_ops->read_config(pdn, pos, 2, &status);
> > 
> > 
> > gcc should have complained on using uninitialized @status here.
> > 
> 
> I remove the obj file and re-compile the file, not the warning.

Hm. Does not warn me either.

> And took a look at other places where read_config() is called. The laster
> parameter is not initialized before called.

So? It does not make it right.

> You see the error during build?

Why does it matter? We have an undefined behavior here which we should not. 
You could test the return values from read_config() but you do not so at 
least initialize local variables.


> 
> > 
> > > +		if (!(status & mask))
> > > +			return;
> > > +
> > > +		msleep((1 << i) * 100);
> > > +	}
> > > +
> > > +	pr_warn("%s: Pending transaction while issuing %s FLR to "
> > > +		"%04x:%02x:%02x.%01x\n",
> > 
> > Do not wrap user-visible strings.
> > 
> 
> Will change this.
> 
> > 
> > > +		__func__, af_flr_rst ? "AF" : "",
> > > +		edev->phb->global_number, pdn->busno,
> > > +		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
> > > +}
> > > +
> > > +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
> > > +{
> > > +	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
> > > +	u32 reg;
> > > +
> > > +	if (!edev->pcie_cap)
> > > +		return -ENOTTY;
> > 
> > 
> > Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that
> > far? WARN_ON_ONCE() may be?
> > 
> 
> So you suggest to add a WARN_ON_ONCE() in this condition, right?

I am asking a question here whether it makes sense or not to add a 
WARN_ON_ONCE or replace "if" with WARN_ON_ONCE or not having pcie_cap 
initialized is possible in this code - which one is it?


> 
> > 
> > > +
> > > +	eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
> > 
> > 
> > ... and here about uninitialized @reg.
> > 
> > 
> > > +	if (!(reg & PCI_EXP_DEVCAP_FLR))
> > > +		return -ENOTTY;
> > > +
> > > +	switch (option) {
> > > +	case EEH_RESET_HOT:
> > > +	case EEH_RESET_FUNDAMENTAL:
> > > +		pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA,
> > > +					 PCI_EXP_DEVSTA_TRPND, false);
> > > +		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
> > > +				     4, &reg);
> > > +		reg |= PCI_EXP_DEVCTL_BCR_FLR;
> > > +		eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
> > > +				      4, reg);
> > > +		msleep(EEH_PE_RST_HOLD_TIME);
> > > +		break;
> > > +	case EEH_RESET_DEACTIVATE:
> > > +		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
> > > +				     4, &reg);
> > > +		reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
> > > +		eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
> > > +				      4, reg);
> > > +		msleep(EEH_PE_RST_SETTLE_TIME);
> > > +		break;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
> > > +{
> > > +	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
> > > +	u32 cap;
> > > +
> > > +	if (!edev->af_cap)
> > > +		return -ENOTTY;
> > > +
> > > +	eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap);
> > 
> > 
> > ... and here about @cap.
> > 
> > > +	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
> > > +		return -ENOTTY;
> > > +
> > > +	switch (option) {
> > > +	case EEH_RESET_HOT:
> > > +	case EEH_RESET_FUNDAMENTAL:
> > > +		/*
> > > +		 * Wait for Transaction Pending bit to clear. A word-aligned
> > > +		 * test is used, so we use the conrol offset rather than status
> > > +		 * and shift the test bit to match.
> > 
> > 
> > Why word-aligned (not byte or double word)?
> > 
> 
> I copied this words from pci_af_flr(). Actually, I don't tried to understand
> this reason.

Ok. I looked at pci_af_flr().

In this patch, the comment before pnv_eeh_wait_for_pending() is missing, 
something like "pnv_eeh_wait_for_pending() uses a word-size accessor so 
@pos must be work-aligned".


> 
> > > +		 */
> > > +		pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL,
> > > +					 PCI_AF_STATUS_TP << 8, true);
> > > +		eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL,
> > > +				      1, PCI_AF_CTRL_FLR);
> > > +		msleep(EEH_PE_RST_HOLD_TIME);
> > > +		break;
> > > +	case EEH_RESET_DEACTIVATE:
> > > +		eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0);
> > > +		msleep(EEH_PE_RST_SETTLE_TIME);
> > 
> > 
> > btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined
> > as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason
> > this is actually doubled so there is another reset somewhere).
> > 
> 
> I don't know the reason for this value. This code keeps aligned with other
> reset functions, like pnv_eeh_bridge_reset().


Are they all in POWERNV/EEH or generic PCI uses same values on, for 
example, x86?



> > Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for
> > such a huge timeout?
> > 
> > 
> > > +		break;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option)
> > > +{
> > > +	int ret;
> > > +
> > > +	ret = pnv_eeh_do_flr(pdn, option);
> > > +	if (ret != -ENOTTY)
> > > +		return ret;
> > > +
> > > +	return pnv_eeh_do_af_flr(pdn, option);
> > > +}
> > > +
> > > +static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option)
> > > +{
> > > +	struct eeh_dev *edev, *tmp;
> > > +	struct pci_dn *pdn;
> > > +	int ret;
> > > +
> > > +	eeh_pe_for_each_dev(pe, edev, tmp) {
> > > +		pdn = eeh_dev_to_pdn(edev);
> > > +		ret = pnv_eeh_reset_vf(pdn, option);
> > > +		if (ret)
> > > +			return ret;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
> > > {
> > > 	struct pci_controller *hose;
> > > @@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
> > > 		}
> > > 
> > > 		bus = eeh_pe_bus_get(pe);
> > > -		if (pci_is_root_bus(bus) ||
> > > +		if (pe->type & EEH_PE_VF)
> > > +			ret = pnv_eeh_vf_pe_reset(pe, option);
> > > +		else if (pci_is_root_bus(bus) ||
> > > 			pci_is_root_bus(bus->parent))
> > > 			ret = pnv_eeh_root_reset(hose, option);
> > > 		else
> > > @@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn \
> > > *pdn)  if (!edev || !edev->pe)
> > > 		return false;
> > > 
> > > +	/*
> > > +	 * We will issue FLR or AF FLR to all VFs, which are contained
> > > +	 * in VF PE. It relies on the EEH PCI config accessors. So we
> > > +	 * can't block them during the window.
> > > +	 */
> > > +	if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET))
> > 
> > 
> > Extra braces around edev->physfn.
> > 
> 
> Will remove it.
> 
> > 
> > 
> > > +		return false;
> > > +
> > > 	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
> > > 		return true;
> > > 
> > > 


-- 
Alexey
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic