[prev in list] [next in list] [prev in thread] [next in thread] 

List:       mesos-commits
Subject:    [3/3] git commit: Fixed master to remove and rescind offers when a slave is disconnected.
From:       vinodkone () apache ! org
Date:       2014-05-29 0:20:28
Message-ID: 77934da81461461bbc650c10b8e8f49c () git ! apache ! org
[Download RAW message or body]

Fixed master to remove and rescind offers when a slave is
disconnected.

Review: https://reviews.apache.org/r/21961


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/60865b2f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/60865b2f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/60865b2f

Branch: refs/heads/master
Commit: 60865b2f473d9898f1a18eee10f9a59a4862893a
Parents: 0b8f30e
Author: Vinod Kone <vinod@twitter.com>
Authored: Mon May 26 22:25:19 2014 -0700
Committer: Vinod Kone <vinod@twitter.com>
Committed: Wed May 28 17:17:20 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp               | 43 ++++++++++++--------------------
 src/master/master.hpp               |  5 ++--
 src/tests/fault_tolerance_tests.cpp |  8 ++++++
 3 files changed, 26 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/60865b2f/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index fcbbc26..766a0e3 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -720,7 +720,21 @@ void Master::exited(const UPID& pid)
       } else if (!slave->disconnected) {
         // Checkpointing slaves can just be disconnected.
         disconnect(slave);
-        removeFrameworksAndOffers(slave);
+
+        // Remove all non-checkpointing frameworks.
+        hashset<FrameworkID> frameworkIds =
+          slave->tasks.keys() | slave->executors.keys();
+
+        foreach (const FrameworkID& frameworkId, frameworkIds) {
+          Framework* framework = getFramework(frameworkId);
+          if (framework != NULL && !framework->info.checkpoint()) {
+            LOG(INFO) << "Removing framework " << frameworkId
+                      << " from disconnected slave " << *slave
+                      << " because the framework is not checkpointing";
+
+            removeFramework(slave, framework);
+          }
+        }
       } else {
         LOG(WARNING) << "Ignoring duplicate exited() notification for "
                      << "checkpointing slave " << *slave;
@@ -1336,37 +1350,12 @@ void Master::disconnect(Slave* slave)
   // Remove the slave from authenticated. This is safe because
   // a slave will always reauthenticate before (re-)registering.
   authenticated.erase(slave->pid);
-}
-
-
-void Master::removeFrameworksAndOffers(Slave* slave)
-{
-  CHECK_NOTNULL(slave);
-
-  // If a slave is checkpointing, remove all non-checkpointing
-  // frameworks from the slave. If the slave is not checkpointing,
-  // remove all of its frameworks.
-  hashset<FrameworkID> frameworkIds =
-    slave->tasks.keys() | slave->executors.keys();
-
-  foreach (const FrameworkID& frameworkId, frameworkIds) {
-    Framework* framework = getFramework(frameworkId);
-    if (framework != NULL &&
-        (!framework->info.checkpoint() || !slave->info.checkpoint())) {
-      LOG(INFO) << "Removing framework " << frameworkId
-                << " from disconnected slave " << *slave << " because "
-                << (!slave->info.checkpoint() ? "slave" : "framework")
-                << " is not checkpointing";
-
-      removeFramework(slave, framework);
-    }
-  }
 
+  // Remove and rescind offers.
   foreach (Offer* offer, utils::copy(slave->offers)) {
     allocator->resourcesRecovered(
         offer->framework_id(), slave->id, offer->resources());
 
-    // Remove and rescind offers.
     removeOffer(offer, true); // Rescind!
   }
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/60865b2f/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 4c21d9e..d4ef4be 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -286,14 +286,13 @@ protected:
   // reschedule offers that were assigned to this framework.
   void removeFramework(Framework* framework);
 
-  // Remove a framework from the slave, i.e., kill all of its tasks,
-  // remove its offers and reallocate its resources.
+  // Remove a framework from the slave, i.e., remove its tasks and
+  // executors and recover the resources.
   void removeFramework(Slave* slave, Framework* framework);
 
   // TODO(adam-mesos): Rename deactivate to disconnect, or v.v.
   void deactivate(Framework* framework);
   void disconnect(Slave* slave);
-  void removeFrameworksAndOffers(Slave* slave);
 
   // Add a slave.
   void addSlave(Slave* slave, bool reregister = false);

http://git-wip-us.apache.org/repos/asf/mesos/blob/60865b2f/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index e484a8a..4c6a5c4 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -1834,6 +1834,10 @@ TEST_F(FaultToleranceTest, SlaveReregisterOnZKExpiration)
 
   AWAIT_READY(resourceOffers);
 
+  Future<Nothing> offerRescinded;
+  EXPECT_CALL(sched, offerRescinded(_, _))
+    .WillOnce(FutureSatisfy(&offerRescinded));
+
   Future<SlaveReregisteredMessage> slaveReregisteredMessage =
     FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
@@ -1841,6 +1845,10 @@ TEST_F(FaultToleranceTest, SlaveReregisterOnZKExpiration)
   // expiration) at the slave.
   detector.appoint(master.get());
 
+  // Since an authenticating slave re-registration results in
+  // disconnecting the slave, its resources should be rescinded.
+  AWAIT_READY(offerRescinded);
+
   AWAIT_READY(slaveReregisteredMessage);
 
   driver.stop();


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic