[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-edac
Subject:    Re: [PATCH] powerpc/edac: Call work queue clean up routine only in polling mode
From:       Borislav Petkov <bp () alien8 ! de>
Date:       2016-01-27 9:26:51
Message-ID: 20160127092651.GE30712 () pd ! tnic
[Download RAW message or body]

On Wed, Jan 27, 2016 at 09:58:57AM +0100, Borislav Petkov wrote:
> Are you fixing a bug you're seeing where the box explodes because the
> workqueue is not initialized?

If so, you could try the below diff (only build-tested). But I've
been meaning to unify the PCI and MC workqueue handling for a while
now so I got rid of the setup/teardown functions and do that directly
in the ->add function where we decide whether we should be polling
(OP_RUNNING_POLL) or not and based on that decision we setup the
workqueue.

The same decision should be used in the ->del function to decide whether
there's even a workqueue which needs to be deleted. This makes the code
much more balanced and straightforward (that might be not that visible
from the diff itself but when it gets applied, it is clear - I'll split
it in smaller hunks if it turns out to make sense. It does at least to
me now).

Thanks.

---
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 8adfc167c2e3..2d1f0091d3da 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -535,57 +535,18 @@ static void edac_mc_workq_function(struct work_struct *work_req)
 
 	mutex_lock(&mem_ctls_mutex);
 
-	/* if this control struct has movd to offline state, we are done */
-	if (mci->op_state == OP_OFFLINE) {
+	/* if this control struct has moved to offline state, we are done */
+	if (mci->op_state != OP_RUNNING_POLL) {
 		mutex_unlock(&mem_ctls_mutex);
 		return;
 	}
 
-	/* Only poll controllers that are running polled and have a check */
-	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
+	if (edac_mc_assert_error_check_and_clear() && mci->edac_check)
 		mci->edac_check(mci);
 
-	mutex_unlock(&mem_ctls_mutex);
-
-	/* Reschedule */
 	edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
-}
-
-/*
- * edac_mc_workq_setup
- *	initialize a workq item for this mci
- *	passing in the new delay period in msec
- *
- *	locking model:
- *
- *		called with the mem_ctls_mutex held
- */
-static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
-{
-	edac_dbg(0, "\n");
-
-	/* if this instance is not in the POLL state, then simply return */
-	if (mci->op_state != OP_RUNNING_POLL)
-		return;
-
-	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
-
-	edac_queue_work(&mci->work, msecs_to_jiffies(msec));
-}
 
-/*
- * edac_mc_workq_teardown
- *	stop the workq processing on this mci
- *
- *	locking model:
- *
- *		called WITHOUT lock held
- */
-static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
-{
-	mci->op_state = OP_OFFLINE;
-
-	edac_stop_work(&mci->work);
+	mutex_unlock(&mem_ctls_mutex);
 }
 
 /*
@@ -776,7 +737,8 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
 		/* This instance is NOW RUNNING */
 		mci->op_state = OP_RUNNING_POLL;
 
-		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
+		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
+		edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
 	} else {
 		mci->op_state = OP_RUNNING_INTERRUPT;
 	}
@@ -823,15 +785,15 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 		return NULL;
 	}
 
+	mci->op_state = OP_OFFLINE;
+
 	if (!del_mc_from_global_list(mci))
 		edac_mc_owner = NULL;
-	mutex_unlock(&mem_ctls_mutex);
 
-	/* flush workq processes */
-	edac_mc_workq_teardown(mci);
+	mutex_unlock(&mem_ctls_mutex);
 
-	/* marking MCI offline */
-	mci->op_state = OP_OFFLINE;
+	if (mci->edac_check)
+		edac_stop_work(&mci->work);
 
 	/* remove from sysfs */
 	edac_remove_sysfs_mci_device(mci);
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 99685388d3fb..e152fda87bff 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -195,55 +195,24 @@ static void edac_pci_workq_function(struct work_struct *work_req)
 
 	mutex_lock(&edac_pci_ctls_mutex);
 
-	if (pci->op_state == OP_RUNNING_POLL) {
-		/* we might be in POLL mode, but there may NOT be a poll func
-		 */
-		if ((pci->edac_check != NULL) && edac_pci_get_check_errors())
-			pci->edac_check(pci);
-
-		/* if we are on a one second period, then use round */
-		msec = edac_pci_get_poll_msec();
-		if (msec == 1000)
-			delay = round_jiffies_relative(msecs_to_jiffies(msec));
-		else
-			delay = msecs_to_jiffies(msec);
-
-		/* Reschedule only if we are in POLL mode */
-		edac_queue_work(&pci->work, delay);
+	if (pci->op_state != OP_RUNNING_POLL) {
+		mutex_unlock(&edac_pci_ctls_mutex);
+		return;
 	}
 
-	mutex_unlock(&edac_pci_ctls_mutex);
-}
-
-/*
- * edac_pci_workq_setup()
- * 	initialize a workq item for this edac_pci instance
- * 	passing in the new delay period in msec
- *
- *	locking model:
- *		called when 'edac_pci_ctls_mutex' is locked
- */
-static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
-				 unsigned int msec)
-{
-	edac_dbg(0, "\n");
-
-	INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
+	if (pci->edac_check && edac_pci_get_check_errors())
+		pci->edac_check(pci);
 
-	edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec()));
-}
+	/* if we are on a one second period, then use round */
+	msec = edac_pci_get_poll_msec();
+	if (msec == 1000)
+		delay = round_jiffies_relative(msecs_to_jiffies(msec));
+	else
+		delay = msecs_to_jiffies(msec);
 
-/*
- * edac_pci_workq_teardown()
- * 	stop the workq processing on this edac_pci instance
- */
-static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
-{
-	edac_dbg(0, "\n");
-
-	pci->op_state = OP_OFFLINE;
+	edac_queue_work(&pci->work, delay);
 
-	edac_stop_work(&pci->work);
+	mutex_unlock(&edac_pci_ctls_mutex);
 }
 
 /*
@@ -292,7 +261,8 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
 	if (pci->edac_check != NULL) {
 		pci->op_state = OP_RUNNING_POLL;
 
-		edac_pci_workq_setup(pci, 1000);
+		INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
+		edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec()));
 	} else {
 		pci->op_state = OP_RUNNING_INTERRUPT;
 	}
@@ -350,8 +320,8 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
 
 	mutex_unlock(&edac_pci_ctls_mutex);
 
-	/* stop the workq timer */
-	edac_pci_workq_teardown(pci);
+	if (pci->edac_check)
+		edac_stop_work(&pci->work);
 
 	edac_printk(KERN_INFO, EDAC_PCI,
 		"Removed device %d for %s %s: DEV %s\n",

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic