[prev in list] [next in list] [prev in thread] [next in thread]
List: mesos-commits
Subject: [3/3] git commit: Fixed master to remove and rescind offers when a slave is disconnected.
From: vinodkone () apache ! org
Date: 2014-05-29 0:20:28
Message-ID: 77934da81461461bbc650c10b8e8f49c () git ! apache ! org
[Download RAW message or body]
Fixed master to remove and rescind offers when a slave is
disconnected.
Review: https://reviews.apache.org/r/21961
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/60865b2f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/60865b2f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/60865b2f
Branch: refs/heads/master
Commit: 60865b2f473d9898f1a18eee10f9a59a4862893a
Parents: 0b8f30e
Author: Vinod Kone <vinod@twitter.com>
Authored: Mon May 26 22:25:19 2014 -0700
Committer: Vinod Kone <vinod@twitter.com>
Committed: Wed May 28 17:17:20 2014 -0700
----------------------------------------------------------------------
src/master/master.cpp | 43 ++++++++++++--------------------
src/master/master.hpp | 5 ++--
src/tests/fault_tolerance_tests.cpp | 8 ++++++
3 files changed, 26 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/60865b2f/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index fcbbc26..766a0e3 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -720,7 +720,21 @@ void Master::exited(const UPID& pid)
} else if (!slave->disconnected) {
// Checkpointing slaves can just be disconnected.
disconnect(slave);
- removeFrameworksAndOffers(slave);
+
+ // Remove all non-checkpointing frameworks.
+ hashset<FrameworkID> frameworkIds =
+ slave->tasks.keys() | slave->executors.keys();
+
+ foreach (const FrameworkID& frameworkId, frameworkIds) {
+ Framework* framework = getFramework(frameworkId);
+ if (framework != NULL && !framework->info.checkpoint()) {
+ LOG(INFO) << "Removing framework " << frameworkId
+ << " from disconnected slave " << *slave
+ << " because the framework is not checkpointing";
+
+ removeFramework(slave, framework);
+ }
+ }
} else {
LOG(WARNING) << "Ignoring duplicate exited() notification for "
<< "checkpointing slave " << *slave;
@@ -1336,37 +1350,12 @@ void Master::disconnect(Slave* slave)
// Remove the slave from authenticated. This is safe because
// a slave will always reauthenticate before (re-)registering.
authenticated.erase(slave->pid);
-}
-
-
-void Master::removeFrameworksAndOffers(Slave* slave)
-{
- CHECK_NOTNULL(slave);
-
- // If a slave is checkpointing, remove all non-checkpointing
- // frameworks from the slave. If the slave is not checkpointing,
- // remove all of its frameworks.
- hashset<FrameworkID> frameworkIds =
- slave->tasks.keys() | slave->executors.keys();
-
- foreach (const FrameworkID& frameworkId, frameworkIds) {
- Framework* framework = getFramework(frameworkId);
- if (framework != NULL &&
- (!framework->info.checkpoint() || !slave->info.checkpoint())) {
- LOG(INFO) << "Removing framework " << frameworkId
- << " from disconnected slave " << *slave << " because "
- << (!slave->info.checkpoint() ? "slave" : "framework")
- << " is not checkpointing";
-
- removeFramework(slave, framework);
- }
- }
+ // Remove and rescind offers.
foreach (Offer* offer, utils::copy(slave->offers)) {
allocator->resourcesRecovered(
offer->framework_id(), slave->id, offer->resources());
- // Remove and rescind offers.
removeOffer(offer, true); // Rescind!
}
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/60865b2f/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 4c21d9e..d4ef4be 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -286,14 +286,13 @@ protected:
// reschedule offers that were assigned to this framework.
void removeFramework(Framework* framework);
- // Remove a framework from the slave, i.e., kill all of its tasks,
- // remove its offers and reallocate its resources.
+ // Remove a framework from the slave, i.e., remove its tasks and
+ // executors and recover the resources.
void removeFramework(Slave* slave, Framework* framework);
// TODO(adam-mesos): Rename deactivate to disconnect, or v.v.
void deactivate(Framework* framework);
void disconnect(Slave* slave);
- void removeFrameworksAndOffers(Slave* slave);
// Add a slave.
void addSlave(Slave* slave, bool reregister = false);
http://git-wip-us.apache.org/repos/asf/mesos/blob/60865b2f/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index e484a8a..4c6a5c4 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -1834,6 +1834,10 @@ TEST_F(FaultToleranceTest, SlaveReregisterOnZKExpiration)
AWAIT_READY(resourceOffers);
+ Future<Nothing> offerRescinded;
+ EXPECT_CALL(sched, offerRescinded(_, _))
+ .WillOnce(FutureSatisfy(&offerRescinded));
+
Future<SlaveReregisteredMessage> slaveReregisteredMessage =
FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
@@ -1841,6 +1845,10 @@ TEST_F(FaultToleranceTest, SlaveReregisterOnZKExpiration)
// expiration) at the slave.
detector.appoint(master.get());
+ // Since an authenticating slave re-registration results in
+ // disconnecting the slave, its resources should be rescinded.
+ AWAIT_READY(offerRescinded);
+
AWAIT_READY(slaveReregisteredMessage);
driver.stop();
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic