[prev in list] [next in list] [prev in thread] [next in thread] 

List:       ceph-commit
Subject:    [ceph-commit] branch wip-kefu-testing updated. v9.0.0-1146-gbb38f2e
From:       ceph-commit () ceph ! com (ceph ! git)
Date:       2015-05-31 17:31:06
Message-ID: 20150531173107.C0CE13F528 () ds3426 ! dreamservers ! com
[Download RAW message or body]

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "".

The branch, wip-kefu-testing has been updated
  discards  564d8287c56abcf1a52bd618e68f87c8b6d07514 (commit)
  discards  15b196f170e5f0b253424bd75230f9f2fbd6733a (commit)
  discards  11f438c188da52039408950bfe9f3e7dd2f98e6b (commit)
  discards  f96ce4b8513fe105b98787c3e0cee04ae278617b (commit)
  discards  9187343d00a0dc4bd8488a5675b9adfd8054f1a2 (commit)
  discards  7180deac4cbdfb25c5469e8f9719a19bbfffb774 (commit)
  discards  77244853a98924f1350c9c1eb373b719f0b1cba7 (commit)
  discards  ee85652d21b3f33abe4db20ce505707be39d25e1 (commit)
  discards  cb672434d7004036b7fb544ea80db3877332ef8a (commit)
       via  bb38f2e1f362259ddce00c6abfeeca817ac29d01 (commit)
       via  c6e634875316cf17368d497e6dc4f6f4b5dd65d2 (commit)
       via  9381d53acdce85fcbff828926b911e050ba36e51 (commit)
       via  22e6bd6e01d5df3f3e897562597e22ca1737f8c8 (commit)
       via  d0658dd3cdf072b2a7c2a1986f8785a697c591ee (commit)
       via  e640d89240017956b8c7411babb86be0f1e2b172 (commit)
       via  a955f36a509e5412b1f72632a1a956d99e768e35 (commit)
       via  d6b46d4c7b722945ce24ac2930381a109b1e3dda (commit)
       via  b75384d73958faf81d45847a7dfa56f4fa347e6f (commit)
       via  c8705e5ae50a4bc3d13489febca03d0ba6395c35 (commit)
       via  f5a95804af1bcf04f3e9d3e863243531fd77373b (commit)
       via  4487d13cc6a737d36f6cfafdf7c28ef6423e17c1 (commit)
       via  5652518cf87b35ac5e73ac8b24b37437da6ef7c4 (commit)
       via  55b9e4e29bdfa2dc0442a27921773a2cc260a5bf (commit)
       via  a6479e4efec5f5d58ad3e1c9d76d371f0d216779 (commit)
       via  6fcf162deadd3749fbf4c69407e0a1cea90f0755 (commit)
       via  830054b419be7f2a8097ae2856e9a09add4b58ee (commit)
       via  17855b4288260d3b00d01e5e310f1a37ab1a1add (commit)
       via  8199e0064bdc2f2cb0fdd59547f796391445093d (commit)
       via  013f9af82c201646bd228467d436d81a905700c0 (commit)
       via  bbf75f811cdc701cb6920edb1dde68b9534d36cf (commit)
       via  cbc96a08d0dae4ad748c222a265a80bccb49ed24 (commit)
       via  64096b870960d021ab5001b6a5cf3a999a9abeb7 (commit)
       via  33150c509d33ca0fad8f19d2d9e43fe7dad57b24 (commit)
       via  d07100b6606e791c878a1e4e1ec804f5438a1f5c (commit)
       via  8a95f13a69397bc2c8ee7dbf13da99c9998ad7d2 (commit)
       via  315260a58c64585b3236cf1b8fc199f1aed87e49 (commit)
       via  be873eb8da7b29ecefaa5a99b88de7ddcca711ee (commit)
       via  5c2b795724423ed484ab451de855ddcfc085342b (commit)
       via  f9ce2d78cf5b5ccfd599dac531c8eadfdfbfdbf1 (commit)
       via  d7fabebf847add671e5dd7fbefc1460233f44207 (commit)
       via  2b28150765ca160e34570f17ddbfd178c6517903 (commit)
       via  3e072c9032f51af4b099812854bbb4fa95a654d3 (commit)
       via  891e9b17dc5eaa5ba995887f32a4c8212363f9bb (commit)
       via  a10bd899d1f8eddaf4cec5c6d55bc9a80f5081d9 (commit)
       via  8a6d626c632b4b827fcb69ce80f488898fcbf7ed (commit)
       via  b3555e9c328633c9e1fbc27d652c004b30535e5b (commit)
       via  83f88e764528c18bf8939b08c53bad4b592e8590 (commit)
       via  ddcbb66d4dad58f5b269b98145ec3d7630a8fc1a (commit)
       via  77f322b63c95580cfa2553b91727069e67d00550 (commit)
       via  70e069d272f74b9cc60435f25e726a77f841e981 (commit)
       via  28820d1cd630ec466ed1069a437d740a025fb9aa (commit)
       via  a808c814b029dd7ddf4bb70a485bad94fbbdfb61 (commit)
       via  aa62dcbe39f003c599688f6a3003c746773fdd86 (commit)
       via  90eb7768f99ea249952df195a844a3a7c9a59b78 (commit)
       via  6051e255ac062985ada1989edb7f23cd750915e2 (commit)
       via  ff79959c037a7145f7104b06d9e6a64492fdb95f (commit)
       via  4fe7d2abdff2fce359e5e992206644cc03825ee0 (commit)
       via  121aa3bc612b86281535ac3bcfe98bc99bc99ace (commit)
       via  c2d17b927f8a222164b3bf2922a4ff337696f566 (commit)
       via  886f5a91f35d04cd4a721f1a66a2ef4a2c39fd5e (commit)
       via  b2cd80c0e030fe19a4ec8955237dd029fec62935 (commit)
       via  c6cdb4081e366f471b372102905a1192910ab2da (commit)
       via  8614dcebf384b74b283cd352001a65fa26ba925c (commit)
       via  11b7801bb57cb25cd2d26d58722d49691747725b (commit)
       via  13c0fca646e0d57468bcd4def119b6d255630bc5 (commit)
       via  bef09e0cdb274cb1c87335a2af9ee532d14a4596 (commit)
       via  bbec53edf9e585af4e20bbc9ba9057d6fdfda342 (commit)
       via  2738d02bef7972f55a3df45b5b1c841bd7738f40 (commit)
       via  fc51ce2a837e8e878d46d8ca54531aa7bd5d01b2 (commit)
       via  193f1e33b3bf4b40f7d59b1822d01c735daa9c32 (commit)
       via  3fdace649d6eb239146585be156153de60c9f567 (commit)
       via  e89ee9e0e1a91b15ecb00d41185de1d34c68104c (commit)
       via  6150757dbe0fa11cceb14460865b859a7c8164c7 (commit)
       via  a45a698372def1623323470c6a1c4eb70e0bb79f (commit)
       via  c7702bf85d3617b3e1c6619b8ebeff34932fc3e4 (commit)
       via  f6d76f948049a66214339d36f8835d88db99001a (commit)
       via  ee8c50b66b38cd8c639c90fc2ec83b938c0fdcfc (commit)
       via  45094ffe84a09af327d05f015111040fbfbbe1d2 (commit)
       via  573d2cc6dc52158bcdb00f0c001518603b9bfed6 (commit)
       via  6b054fcc99c4befa87e988c7fe6d3d1a47d1e2ab (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (564d8287c56abcf1a52bd618e68f87c8b6d07514)
            \
             N -- N -- N (bb38f2e1f362259ddce00c6abfeeca817ac29d01)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit bb38f2e1f362259ddce00c6abfeeca817ac29d01
Merge: c6e6348 2b23327 b3555e9 e1f1c56
Author: Kefu Chai <kchai at redhat.com>
Date:   Mon Jun 1 01:28:23 2015 +0800

    Merge branches 'wip-4726', 'wip-4760', 'wip-4702' and 'wip-4643' into \
wip-kefu-testing

commit c6e634875316cf17368d497e6dc4f6f4b5dd65d2
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 18:11:59 2015 +0800

    mon: add "--check" to CrushTester::test_with_crushtool()
    
    so we don't need to call CrushTester::check_name_maps() in OSDMonitor.cc
    anymore.
    
    Fixes: #11680
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit 9381d53acdce85fcbff828926b911e050ba36e51
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 17:51:50 2015 +0800

    crushtool: rename "--check-names" to "--check"
    
    * because "--check" also checks for the max_id
    
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit 22e6bd6e01d5df3f3e897562597e22ca1737f8c8
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 16:58:23 2015 +0800

    mon: check the new crush map against osdmap.max_osd
    
    Fixes: #11680
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit d0658dd3cdf072b2a7c2a1986f8785a697c591ee
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 15:35:10 2015 +0800

    crushtool: enable check against max_id
    
    add an argument "max_id" for "--check-names" to check if any item
    has an id greater or equal to given "max_id" in crush map.
    
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit e640d89240017956b8c7411babb86be0f1e2b172
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 15:34:33 2015 +0800

    crush/CrushTester: check if any item id is too large
    
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit a955f36a509e5412b1f72632a1a956d99e768e35
Author: Kefu Chai <kchai at redhat.com>
Date:   Mon May 25 20:14:32 2015 +0800

    mon: validate new crush for unknown names
    
    * the "osd tree dump" command enumerates all buckets/osds found in either the
      crush map or the osd map. but the newly set crushmap is not validated for
      the dangling references, so we need to check to see if any item in new crush
      map is referencing unknown type/name when a new crush map is sent to
      monitor, reject it if any.
    
    Fixes: #11680
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit d6b46d4c7b722945ce24ac2930381a109b1e3dda
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 12:08:36 2015 +0800

    crushtool: add the "--check-names" option
    
    * so one is able to verify that the "ceph osd tree" won't chock on the
      new crush map because of dangling name/type references
    
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit b75384d73958faf81d45847a7dfa56f4fa347e6f
Author: Kefu Chai <kchai at redhat.com>
Date:   Tue May 26 12:08:09 2015 +0800

    crush/CrushTester: add check_name_maps() method
    
    * check for dangling bucket name or type names referenced by the
      buckets/items in the crush map.
    * also check for the references from Item(0, 0, 0) which does not
      necessarily exist in the crush map under testing. the rationale
      behind this is: the "ceph osd tree" will also print stray OSDs
      whose id is greater or equal to 0. so it would be useful to
      check if the crush map offers the type name indexed by "0"
      (the name of OSDs is always "OSD.{id}", so we don't need to
      look up the name of an OSD item in the crushmap).
    
    Signed-off-by: Kefu Chai <kchai at redhat.com>

commit b3555e9c328633c9e1fbc27d652c004b30535e5b
Author: Kefu Chai <kchai at redhat.com>
Date:   Fri May 15 22:50:36 2015 +0800

    mon: always reply mdsbeacon
    
    the MDS (Beacon) is always expecting the reply for the mdsbeacon messages from
    the lead mon, and it uses the delay as a metric for the laggy-ness of the
    Beacon. when it comes to the MDSMonitor on a peon, it will remove the route
    session at seeing a reply (route message) from leader, so a reply to
    mdsbeacon will stop the peon from resending the mdsbeacon request to the
    leader.
    
    if the MDSMonitor re-forwards the unreplied requests after they are
    outdated, there are chances that the requests reflecting old and even wrong
    state of the MDSs mislead the lead monitor. for example, the MDSs which sent
    the outdated messages could be dead.
    
    Fixes: #11590
    Signed-off-by: Kefu Chai <kchai at redhat.com>

-----------------------------------------------------------------------

Summary of changes:
 install-deps.sh                                    |   29 +++-
 qa/workunits/cephtool/test.sh                      |   17 +++
 src/common/ceph_argparse.cc                        |    6 +
 src/common/xattr.h                                 |    8 +
 src/crush/CrushTester.cc                           |   60 ++++++++-
 src/crush/CrushTester.h                            |    9 ++
 src/messages/MOSDECSubOpWrite.h                    |    7 +-
 src/mon/MDSMonitor.cc                              |   10 +-
 src/mon/OSDMonitor.cc                              |   16 ++-
 src/os/WBThrottle.cc                               |   38 ++---
 src/os/WBThrottle.h                                |    9 ++
 src/os/chain_xattr.cc                              |   59 ++++++--
 src/os/chain_xattr.h                               |    6 +
 src/osd/ECBackend.cc                               |   11 +-
 src/osd/ECMsgTypes.h                               |   19 +++
 src/osd/OSDMap.cc                                  |    9 +-
 src/osd/OSDMap.h                                   |    2 +-
 src/osd/PGBackend.h                                |    4 +
 src/osd/ReplicatedBackend.cc                       |   28 ++--
 src/osd/ReplicatedPG.cc                            |  145 ++++++++++++--------
 src/osd/ReplicatedPG.h                             |   18 +++-
 src/osd/osd_types.cc                               |   24 ++++
 src/osd/osd_types.h                                |    9 +-
 src/osdc/Journaler.h                               |   10 +-
 src/osdc/Objecter.cc                               |    4 +
 src/osdc/Objecter.h                                |    2 +
 src/rgw/rgw_main.cc                                |    3 -
 src/test/ceph-helpers.sh                           |    4 +-
 src/test/ceph_argparse.cc                          |   24 ++++
 .../cli/crushtool/check-names.empty.crushmap.txt   |   11 ++
 src/test/cli/crushtool/check-names.empty.t         |    4 +
 src/test/cli/crushtool/check-names.max-id.t        |    7 +
 src/test/cli/crushtool/help.t                      |    1 +
 src/test/cli/osdmaptool/tree.t                     |   19 +++
 src/test/encoding/types.h                          |    2 +-
 src/test/librados/io.cc                            |   50 -------
 src/test/librados/snapshots.cc                     |   68 +++++++++
 src/test/librados/tier.cc                          |   63 +--------
 src/test/mon/osd-crush.sh                          |   13 ++
 src/test/objectstore/chain_xattr.cc                |   72 ++++++++++
 src/test/osd/RadosModel.h                          |   21 +++-
 src/test/osd/TestRados.cc                          |   23 +++-
 src/test/osd/osd-scrub-repair.sh                   |  111 ++++++++++------
 src/test/perf_local.cc                             |   14 ++-
 src/tools/crushtool.cc                             |   15 ++-
 src/tools/osdmaptool.cc                            |   20 ++-
 46 files changed, 792 insertions(+), 312 deletions(-)
 create mode 100644 src/test/cli/crushtool/check-names.empty.crushmap.txt
 create mode 100644 src/test/cli/crushtool/check-names.empty.t
 create mode 100644 src/test/cli/crushtool/check-names.max-id.t
 create mode 100644 src/test/cli/osdmaptool/tree.t

diff --git a/install-deps.sh b/install-deps.sh
index 00de548..4df059a 100755
--- a/install-deps.sh
+++ b/install-deps.sh
@@ -42,7 +42,7 @@ Ubuntu|Debian|Devuan)
         packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
             perl -p -e 's/.*Unmet build dependencies: *//;' \
             -e 's/build-essential:native/build-essential/;' \
-            -e 's/\|//g;' \
+            -e 's/\s*\|\s*/\|/g;' \
             -e 's/\(.*?\)//g;' \
             -e 's/ +/\n/g;' | sort)
         case $(lsb_release -sc) in
@@ -52,7 +52,7 @@ Ubuntu|Debian|Devuan)
                 ;;
         esac
         packages=$(echo $packages) # change newlines into spaces
-        $SUDO bash -c "DEBIAN_FRONTEND=noninteractive apt-get install $backports -y \
$packages" || exit 1 +        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get \
install $backports -y $packages || exit 1  ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -84,6 +84,23 @@ CentOS|Fedora|RedHatEnterpriseServer)
         ;;
 esac
 
+function get_pip_and_wheel() {
+    local install=$1
+
+    # Ubuntu-12.04 and Python 2.7.3 require this line
+    pip --timeout 300 $install 'distribute >= 0.7.3' || return 1
+    # although pip comes with virtualenv, having a recent version
+    # of pip matters when it comes to using wheel packages
+    pip --timeout 300 $install 'setuptools >= 0.8' 'pip >= 7.0' 'wheel >= 0.24' || \
return 1 +}
+
+# use pip cache if possible but do not store it outside of the source
+# tree
+# see https://pip.pypa.io/en/stable/reference/pip_install.html#caching
+mkdir -p install-deps-cache
+top_srcdir=$(pwd)
+export XDG_CACHE_HOME=$top_srcdir/install-deps-cache
+
 #
 # preload python modules so that tox can run without network access
 #
@@ -92,12 +109,11 @@ for interpreter in python2.7 python3 ; do
     if ! test -d install-deps-$interpreter ; then
         virtualenv --python $interpreter install-deps-$interpreter
         . install-deps-$interpreter/bin/activate
-        pip --timeout 300 install wheel || exit 1
+        get_pip_and_wheel install || exit 1
     fi
 done
 
 find . -name tox.ini | while read ini ; do
-    top_srcdir=$(pwd)
     (
         cd $(dirname $ini)
         require=$(ls *requirements.txt 2>/dev/null | sed -e 's/^/-r /')
@@ -105,9 +121,8 @@ find . -name tox.ini | while read ini ; do
             for interpreter in python2.7 python3 ; do
                 type $interpreter > /dev/null 2>&1 || continue
                 . $top_srcdir/install-deps-$interpreter/bin/activate
-                # although pip comes with virtualenv, having a recent version
-                # of pip matters when it comes to using wheel packages
-                pip --timeout 300 wheel $require 'setuptools >= 0.7' 'pip >= 6.1' || \
exit 1 +                get_pip_and_wheel wheel || exit 1
+                pip --timeout 300 wheel $require || exit 1
             done
         fi
     )
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index 2d243f1..06c5ff4 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -314,6 +314,23 @@ function test_tiering()
   ceph osd pool delete cache cache --yes-i-really-really-mean-it
   ceph osd pool delete cache2 cache2 --yes-i-really-really-mean-it
 
+  # make sure we can't clobber snapshot state
+  ceph osd pool create snap_base 2
+  ceph osd pool create snap_cache 2
+  rbd -p snap_cache create foo --size 10
+  rbd -p snap_cache snap create foo --snap snap1
+  rbd -p snap_cache snap rm foo --snap snap1
+  expect_false ceph osd tier add snap_base snap_cache --force-nonempty
+  ceph osd pool delete snap_base snap_base --yes-i-really-really-mean-it
+  ceph osd pool delete snap_cache snap_cache --yes-i-really-really-mean-it
+
+  # make sure we can't create an ec pool tier
+  ceph osd pool create eccache 2 2 erasure
+  ceph osd pool create repbase 2
+  expect_false ceph osd tier add repbase eccache
+  ceph osd pool delete repbase repbase --yes-i-really-really-mean-it
+  ceph osd pool delete eccache eccache --yes-i-really-really-mean-it
+
   # convenient add-cache command
   ceph osd pool create cache3 2
   ceph osd tier add-cache slow cache3 1024000
diff --git a/src/common/ceph_argparse.cc b/src/common/ceph_argparse.cc
index 400edee..924b680 100644
--- a/src/common/ceph_argparse.cc
+++ b/src/common/ceph_argparse.cc
@@ -154,6 +154,7 @@ void vec_to_argv(const char *argv0, std::vector<const char*>& \
args,  void ceph_arg_value_type(const char * nextargstr, bool *bool_option, bool \
*bool_numeric)  {
   bool is_numeric = true;
+  bool is_float = false;
   bool is_option;
 
   if (nextargstr == NULL) {
@@ -173,6 +174,11 @@ void ceph_arg_value_type(const char * nextargstr, bool \
*bool_option, bool *bool_  if (nextargstr[0] == '-')
 	  continue;
       }
+      if ( (nextargstr[i] == '.') && (is_float == false) ) {
+        is_float = true;
+        continue;
+      }
+        
       is_numeric = false;
       break;
     }
diff --git a/src/common/xattr.h b/src/common/xattr.h
index 30b0485..147a23c 100644
--- a/src/common/xattr.h
+++ b/src/common/xattr.h
@@ -13,11 +13,19 @@
 #define CEPH_EXTATTR_H
 
 #include <sys/types.h>
+#include <errno.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+// Almost everyone defines ENOATTR, except for Linux,
+// which does #define ENOATTR ENODATA.  It seems that occasionally that
+// isn't defined, though, so let's make sure.
+#ifndef ENOATTR
+# define ENOATTR ENODATA
+#endif
+
 int ceph_os_setxattr(const char *path, const char *name,
                   const void *value, size_t size);
 int ceph_os_fsetxattr(int fd, const char *name, const void *value,
diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc
index 036c07b..2a3ae8f 100644
--- a/src/crush/CrushTester.cc
+++ b/src/crush/CrushTester.cc
@@ -3,10 +3,11 @@
 
 #include "include/stringify.h"
 #include "CrushTester.h"
+#include "CrushTreeDumper.h"
 
 #include <algorithm>
 #include <stdlib.h>
-
+#include <boost/lexical_cast.hpp>
 #include <common/SubProcess.h>
 
 void CrushTester::set_device_weight(int dev, float f)
@@ -354,10 +355,11 @@ void \
CrushTester::write_integer_indexed_scalar_data_string(vector<string> &dst,  \
dst.push_back( data_buffer.str() );  }
 
-int CrushTester::test_with_crushtool(const char *crushtool_cmd, int timeout)
+int CrushTester::test_with_crushtool(const char *crushtool_cmd, int max_id, int \
timeout)  {
   SubProcessTimed crushtool(crushtool_cmd, true, false, true, timeout);
-  crushtool.add_cmd_args("-i", "-", "--test", NULL);
+  string opt_max_id = boost::lexical_cast<string>(max_id);
+  crushtool.add_cmd_args("-i", "-", "--test", "--check", opt_max_id.c_str(), NULL);
   int ret = crushtool.spawn();
   if (ret != 0) {
     err << "failed run crushtool: " << crushtool.err();
@@ -383,6 +385,58 @@ int CrushTester::test_with_crushtool(const char *crushtool_cmd, \
int timeout)  return 0;
 }
 
+namespace {
+  class BadCrushMap : public std::runtime_error {
+  public:
+    int item;
+    BadCrushMap(const char* msg, int id)
+      : std::runtime_error(msg), item(id) {}
+  };
+  // throws if any node in the crush fail to print
+  class CrushWalker : public CrushTreeDumper::Dumper<void> {
+    typedef void DumbFormatter;
+    typedef CrushTreeDumper::Dumper<DumbFormatter> Parent;
+    unsigned max_id;
+  public:
+    CrushWalker(const CrushWrapper *crush, unsigned max_id)
+      : Parent(crush), max_id(max_id) {}
+    void dump_item(const CrushTreeDumper::Item &qi, DumbFormatter *) {
+      int type = -1;
+      if (qi.is_bucket()) {
+	if (!crush->get_item_name(qi.id)) {
+	  throw BadCrushMap("unknown item name", qi.id);
+	}
+	type = crush->get_bucket_type(qi.id);
+      } else {
+	if (max_id > 0 && qi.id >= max_id) {
+	  throw BadCrushMap("item id too large", qi.id);
+	}
+	type = 0;
+      }
+      if (!crush->get_type_name(type)) {
+	throw BadCrushMap("unknown type name", qi.id);
+      }
+    }
+  };
+}
+
+bool CrushTester::check_name_maps(unsigned max_id) const
+{
+  CrushWalker crush_walker(&crush, max_id);
+  try {
+    // walk through the crush, to see if its self-contained
+    crush_walker.dump(NULL);
+    // and see if the maps is also able to handle straying OSDs, whose id >= 0.
+    // "ceph osd tree" will try to print them, even they are not listed in the
+    // crush map.
+    crush_walker.dump_item(CrushTreeDumper::Item(0, 0, 0), NULL);
+  } catch (const BadCrushMap& e) {
+    err << e.what() << ": item#" << e.item << std::endl;
+    return false;
+  }
+  return true;
+}
+
 int CrushTester::test()
 {
   if (min_rule < 0 || max_rule < 0) {
diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h
index 2cca2ad..ed14761 100644
--- a/src/crush/CrushTester.h
+++ b/src/crush/CrushTester.h
@@ -333,8 +333,17 @@ public:
     min_rule = max_rule = rule;
   }
 
+  /**
+   * check if any bucket/nodes is referencing an unknown name or type
+   * @param max_id rejects any non-bucket items with id less than this number,
+   *               pass 0 to disable this check
+   * @return false if an dangling name/type is referenced or an item id is too
+   *         large, true otherwise
+   */
+  bool check_name_maps(unsigned max_id = 0) const;
   int test();
   int test_with_crushtool(const char *crushtool_cmd = "crushtool",
+			  int max_id = -1,
 			  int timeout = 0);
 };
 
diff --git a/src/messages/MOSDECSubOpWrite.h b/src/messages/MOSDECSubOpWrite.h
index a47bcef..b3a8e3c 100644
--- a/src/messages/MOSDECSubOpWrite.h
+++ b/src/messages/MOSDECSubOpWrite.h
@@ -35,9 +35,10 @@ public:
   MOSDECSubOpWrite()
     : Message(MSG_OSD_EC_WRITE, HEAD_VERSION, COMPAT_VERSION)
     {}
-  MOSDECSubOpWrite(ECSubWrite &op)
-  : Message(MSG_OSD_EC_WRITE, HEAD_VERSION, COMPAT_VERSION),
-    op(op) {}
+  MOSDECSubOpWrite(ECSubWrite &in_op)
+    : Message(MSG_OSD_EC_WRITE, HEAD_VERSION, COMPAT_VERSION) {
+    op.claim(in_op);
+  }
 
   virtual void decode_payload() {
     bufferlist::iterator p = payload.begin();
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 7dc6c92..524fcc9 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -320,7 +320,7 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m)
     goto out;
   }
   // is there a state change here?
-  if (info.state != state) {    
+  if (info.state != state) {
     // legal state change?
     if ((info.state == MDSMap::STATE_STANDBY ||
 	 info.state == MDSMap::STATE_STANDBY_REPLAY ||
@@ -639,8 +639,14 @@ void MDSMonitor::_updated(MMDSBeacon *m)
   if (m->get_state() == MDSMap::STATE_STOPPED) {
     // send the map manually (they're out of the map, so they won't get it \
automatic)  mon->send_reply(m, new MMDSMap(mon->monmap->fsid, &mdsmap));
+  } else {
+    mon->send_reply(m, new MMDSBeacon(mon->monmap->fsid,
+				      m->get_global_id(),
+				      m->get_name(),
+				      mdsmap.get_epoch(),
+				      m->get_state(),
+				      m->get_seq()));
   }
-
   m->put();
 }
 
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 1e8723d..dacd4b6 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2945,11 +2945,11 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
     } else if (prefix == "osd tree") {
       if (f) {
 	f->open_object_section("tree");
-	p->print_tree(NULL, f.get());
+	p->print_tree(f.get(), NULL);
 	f->close_section();
 	f->flush(ds);
       } else {
-	p->print_tree(&ds, NULL);
+	p->print_tree(NULL, &ds);
       }
       rdata.append(ds);
     } else if (prefix == "osd getmap") {
@@ -4846,6 +4846,7 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m,
     // then we would consistently trigger an election before the command
     // finishes, having a flapping monitor unable to hold quorum.
     int r = tester.test_with_crushtool(g_conf->crushtool.c_str(),
+				       osdmap.get_max_osd(),
 				       g_conf->mon_lease);
     if (r < 0) {
       derr << "error on crush map: " << ess.str() << dendl;
@@ -6464,6 +6465,17 @@ done:
       err = -ENOTEMPTY;
       goto reply;
     }
+    if (tp->ec_pool()) {
+      ss << "tier pool '" << tierpoolstr
+	 << "' is an ec pool, which cannot be a tier";
+      err = -ENOTSUP;
+      goto reply;
+    }
+    if (!tp->removed_snaps.empty() || !tp->snaps.empty()) {
+      ss << "tier pool '" << tierpoolstr << "' has snapshot state; it cannot be \
added as a tier without breaking the pool"; +      err = -ENOTEMPTY;
+      goto reply;
+    }
     // go
     pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
     pg_pool_t *ntp = pending_inc.get_new_pool(tierpool_id, tp);
diff --git a/src/os/WBThrottle.cc b/src/os/WBThrottle.cc
index 0337456..bd2b767 100644
--- a/src/os/WBThrottle.cc
+++ b/src/os/WBThrottle.cc
@@ -135,10 +135,7 @@ bool WBThrottle::get_next_should_flush(
 {
   assert(lock.is_locked());
   assert(next);
-  while (!stopping &&
-         cur_ios < io_limits.first &&
-         pending_wbs.size() < fd_limits.first &&
-         cur_size < size_limits.first)
+  while (!stopping && !beyond_limit())
          cond.Wait(lock);
   if (stopping)
     return false;
@@ -159,6 +156,14 @@ void *WBThrottle::entry()
   boost::tuple<ghobject_t, FDRef, PendingWB> wb;
   while (get_next_should_flush(&wb)) {
     clearing = wb.get<0>();
+    cur_ios -= wb.get<2>().ios;
+    logger->dec(l_wbthrottle_ios_dirtied, wb.get<2>().ios);
+    logger->inc(l_wbthrottle_ios_wb, wb.get<2>().ios);
+    cur_size -= wb.get<2>().size;
+    logger->dec(l_wbthrottle_bytes_dirtied, wb.get<2>().size);
+    logger->inc(l_wbthrottle_bytes_wb, wb.get<2>().size);
+    logger->dec(l_wbthrottle_inodes_dirtied);
+    logger->inc(l_wbthrottle_inodes_wb);
     lock.Unlock();
 #ifdef HAVE_FDATASYNC
     ::fdatasync(**wb.get<1>());
@@ -173,14 +178,6 @@ void *WBThrottle::entry()
 #endif
     lock.Lock();
     clearing = ghobject_t();
-    cur_ios -= wb.get<2>().ios;
-    logger->dec(l_wbthrottle_ios_dirtied, wb.get<2>().ios);
-    logger->inc(l_wbthrottle_ios_wb, wb.get<2>().ios);
-    cur_size -= wb.get<2>().size;
-    logger->dec(l_wbthrottle_bytes_dirtied, wb.get<2>().size);
-    logger->inc(l_wbthrottle_bytes_wb, wb.get<2>().size);
-    logger->dec(l_wbthrottle_inodes_dirtied);
-    logger->inc(l_wbthrottle_inodes_wb);
     cond.Signal();
     wb = boost::tuple<ghobject_t, FDRef, PendingWB>();
   }
@@ -212,7 +209,8 @@ void WBThrottle::queue_wb(
 
   wbiter->second.first.add(nocache, len, 1);
   insert_object(hoid);
-  cond.Signal();
+  if (beyond_limit())
+    cond.Signal();
 }
 
 void WBThrottle::clear()
@@ -229,12 +227,11 @@ void WBThrottle::clear()
     }
 #endif
 
-    cur_ios -= i->second.first.ios;
-    logger->dec(l_wbthrottle_ios_dirtied, i->second.first.ios);
-    cur_size -= i->second.first.size;
-    logger->dec(l_wbthrottle_bytes_dirtied, i->second.first.size);
-    logger->dec(l_wbthrottle_inodes_dirtied);
   }
+  cur_ios = cur_size = 0;
+  logger->set(l_wbthrottle_ios_dirtied, 0);
+  logger->set(l_wbthrottle_bytes_dirtied, 0);
+  logger->set(l_wbthrottle_inodes_dirtied, 0);
   pending_wbs.clear();
   lru.clear();
   rev_lru.clear();
@@ -264,10 +261,7 @@ void WBThrottle::clear_object(const ghobject_t &hoid)
 void WBThrottle::throttle()
 {
   Mutex::Locker l(lock);
-  while (!stopping && !(
-	   cur_ios < io_limits.second &&
-	   pending_wbs.size() < fd_limits.second &&
-	   cur_size < size_limits.second)) {
+  while (!stopping && beyond_limit()) {
     cond.Wait(lock);
   }
 }
diff --git a/src/os/WBThrottle.h b/src/os/WBThrottle.h
index b3fd9e0..d951943 100644
--- a/src/os/WBThrottle.h
+++ b/src/os/WBThrottle.h
@@ -129,6 +129,15 @@ private:
   FS fs;
 
   void set_from_conf();
+  bool beyond_limit() const {
+    if (cur_ios < io_limits.first &&
+	pending_wbs.size() < fd_limits.first &&
+	cur_size < size_limits.first)
+      return false;
+    else
+      return true;
+  }
+
 public:
   WBThrottle(CephContext *cct);
   ~WBThrottle();
diff --git a/src/os/chain_xattr.cc b/src/os/chain_xattr.cc
index 80cd514..24b4634 100644
--- a/src/os/chain_xattr.cc
+++ b/src/os/chain_xattr.cc
@@ -116,7 +116,8 @@ static int getxattr_len(const char *fn, const char *name)
       break;
     total += r;
     i++;
-  } while (r == CHAIN_XATTR_MAX_BLOCK_LEN);
+  } while (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
+	   r == CHAIN_XATTR_SHORT_BLOCK_LEN);
 
   return total;
 }
@@ -135,25 +136,32 @@ int chain_getxattr(const char *fn, const char *name, void *val, \
size_t size)  do {
     chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : \
CHAIN_XATTR_MAX_BLOCK_LEN);  get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
-    size -= chunk_size;
 
     r = sys_getxattr(fn, raw_name, (char *)val + pos, chunk_size);
+    if (i && r == -ENOATTR) {
+      ret = pos;
+      break;
+    }
     if (r < 0) {
       ret = r;
       break;
     }
 
-    if (r > 0)
+    if (r > 0) {
       pos += r;
+      size -= r;
+    }
 
     i++;
-  } while (size && r == CHAIN_XATTR_MAX_BLOCK_LEN);
+  } while (size && (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
+		    r == CHAIN_XATTR_SHORT_BLOCK_LEN));
 
   if (r >= 0) {
     ret = pos;
     /* is there another chunk? that can happen if the last read size span over
        exactly one block */
-    if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN) {
+    if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN ||
+	chunk_size == CHAIN_XATTR_SHORT_BLOCK_LEN) {
       get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
       r = sys_getxattr(fn, raw_name, 0, 0);
       if (r > 0) { // there's another chunk.. the original buffer was too small
@@ -179,7 +187,8 @@ static int chain_fgetxattr_len(int fd, const char *name)
       break;
     total += r;
     i++;
-  } while (r == CHAIN_XATTR_MAX_BLOCK_LEN);
+  } while (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
+	   r == CHAIN_XATTR_SHORT_BLOCK_LEN);
 
   return total;
 }
@@ -198,25 +207,32 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t \
size)  do {
     chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : \
CHAIN_XATTR_MAX_BLOCK_LEN);  get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
-    size -= chunk_size;
 
     r = sys_fgetxattr(fd, raw_name, (char *)val + pos, chunk_size);
+    if (i && r == -ENOATTR) {
+      ret = pos;
+      break;
+    }
     if (r < 0) {
       ret = r;
       break;
     }
 
-    if (r > 0)
+    if (r > 0) {
       pos += r;
+      size -= r;
+    }
 
     i++;
-  } while (size && r == CHAIN_XATTR_MAX_BLOCK_LEN);
+  } while (size && (r == CHAIN_XATTR_MAX_BLOCK_LEN ||
+		    r == CHAIN_XATTR_SHORT_BLOCK_LEN));
 
   if (r >= 0) {
     ret = pos;
     /* is there another chunk? that can happen if the last read size span over
        exactly one block */
-    if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN) {
+    if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN ||
+	chunk_size == CHAIN_XATTR_SHORT_BLOCK_LEN) {
       get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
       r = sys_fgetxattr(fd, raw_name, 0, 0);
       if (r > 0) { // there's another chunk.. the original buffer was too small
@@ -230,14 +246,24 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t \
size)  
 // setxattr
 
+static int get_xattr_block_size(size_t size)
+{
+  if (size <= CHAIN_XATTR_SHORT_LEN_THRESHOLD)
+    // this may fit in the inode; stripe over short attrs so that XFS
+    // won't kick it out.
+    return CHAIN_XATTR_SHORT_BLOCK_LEN;
+  return CHAIN_XATTR_MAX_BLOCK_LEN;
+}
+
 int chain_setxattr(const char *fn, const char *name, const void *val, size_t size)
 {
   int i = 0, pos = 0;
   char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
   int ret = 0;
+  size_t max_chunk_size = get_xattr_block_size(size);
 
   do {
-    size_t chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : \
CHAIN_XATTR_MAX_BLOCK_LEN); +    size_t chunk_size = (size < max_chunk_size ? size : \
max_chunk_size);  get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
     size -= chunk_size;
 
@@ -256,10 +282,10 @@ int chain_setxattr(const char *fn, const char *name, const void \
*val, size_t siz  do {
       get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
       r = sys_removexattr(fn, raw_name);
-      if (r < 0 && r != -ENODATA)
+      if (r < 0 && r != -ENOATTR)
 	ret = r;
       i++;
-    } while (r != -ENODATA);
+    } while (r != -ENOATTR);
   }
   
   return ret;
@@ -270,9 +296,10 @@ int chain_fsetxattr(int fd, const char *name, const void *val, \
size_t size)  int i = 0, pos = 0;
   char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
   int ret = 0;
+  size_t max_chunk_size = get_xattr_block_size(size);
 
   do {
-    size_t chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : \
CHAIN_XATTR_MAX_BLOCK_LEN); +    size_t chunk_size = (size < max_chunk_size ? size : \
max_chunk_size);  get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
     size -= chunk_size;
 
@@ -291,10 +318,10 @@ int chain_fsetxattr(int fd, const char *name, const void *val, \
size_t size)  do {
       get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
       r = sys_fremovexattr(fd, raw_name);
-      if (r < 0 && r != -ENODATA)
+      if (r < 0 && r != -ENOATTR)
 	ret = r;
       i++;
-    } while (r != -ENODATA);
+    } while (r != -ENOATTR);
   }
   
   return ret;
diff --git a/src/os/chain_xattr.h b/src/os/chain_xattr.h
index 7e8312f..2d77568 100644
--- a/src/os/chain_xattr.h
+++ b/src/os/chain_xattr.h
@@ -11,6 +11,12 @@
 #define CHAIN_XATTR_MAX_NAME_LEN  128
 #define CHAIN_XATTR_MAX_BLOCK_LEN 2048
 
+/*
+ * XFS will only inline xattrs < 255 bytes, so for xattrs that are
+ * likely to fit in the inode, stripe over short xattrs.
+ */
+#define CHAIN_XATTR_SHORT_BLOCK_LEN 250
+#define CHAIN_XATTR_SHORT_LEN_THRESHOLD 1000
 
 // wrappers to hide annoying errno handling.
 
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc
index d2c06aa..b04776c 100644
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -478,7 +478,7 @@ void ECBackend::continue_recovery_op(
 	op.hoid, want, true, &to_read);
       if (r != 0) {
 	// we must have lost a recovery source
-	assert(op.recovery_progress.first);
+	assert(!op.recovery_progress.first);
 	dout(10) << __func__ << ": canceling recovery op for obj " << op.hoid
 		 << dendl;
 	get_parent()->cancel_pull(op.hoid);
@@ -841,7 +841,6 @@ void ECBackend::handle_sub_write(
       get_parent()->whoami_shard().shard >= ec_impl->get_data_chunk_count())
     op.t.set_fadvise_flag(CEPH_OSD_OP_FLAG_FADVISE_DONTNEED);
 
-  localt->append(op.t);
   if (on_local_applied_sync) {
     dout(10) << "Queueing onreadable_sync: " << on_local_applied_sync << dendl;
     localt->register_on_applied_sync(on_local_applied_sync);
@@ -857,7 +856,13 @@ void ECBackend::handle_sub_write(
       new SubWriteApplied(this, msg, op.tid, op.at_version)));
   localt->register_on_applied(
     new ObjectStore::C_DeleteTransaction(localt));
-  get_parent()->queue_transaction(localt, msg);
+  list<ObjectStore::Transaction*> tls;
+  tls.push_back(localt);
+  tls.push_back(new ObjectStore::Transaction);
+  tls.back()->swap(op.t);
+  tls.back()->register_on_complete(
+    new ObjectStore::C_DeleteTransaction(tls.back()));
+  get_parent()->queue_transactions(tls, msg);
 }
 
 void ECBackend::handle_sub_read(
diff --git a/src/osd/ECMsgTypes.h b/src/osd/ECMsgTypes.h
index 7819383..2d5dc75 100644
--- a/src/osd/ECMsgTypes.h
+++ b/src/osd/ECMsgTypes.h
@@ -57,10 +57,29 @@ struct ECSubWrite {
       temp_added(temp_added),
       temp_removed(temp_removed),
       updated_hit_set_history(updated_hit_set_history) {}
+  void claim(ECSubWrite &other) {
+    from = other.from;
+    tid = other.tid;
+    reqid = other.reqid;
+    soid = other.soid;
+    stats = other.stats;
+    t.swap(other.t);
+    at_version = other.at_version;
+    trim_to = other.trim_to;
+    trim_rollback_to = other.trim_rollback_to;
+    log_entries.swap(other.log_entries);
+    temp_added.swap(other.temp_added);
+    temp_removed.swap(other.temp_removed);
+    updated_hit_set_history = other.updated_hit_set_history;
+  }
   void encode(bufferlist &bl) const;
   void decode(bufferlist::iterator &bl);
   void dump(Formatter *f) const;
   static void generate_test_instances(list<ECSubWrite*>& o);
+private:
+  // no outside copying -- slow
+  ECSubWrite(ECSubWrite& other);
+  const ECSubWrite& operator=(const ECSubWrite& other);
 };
 WRITE_CLASS_ENCODER(ECSubWrite)
 
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 4ce8984..8d29147 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -2561,15 +2561,16 @@ private:
   const OSDMap *osdmap;
 };
 
-void OSDMap::print_tree(ostream *out, Formatter *f) const
+void OSDMap::print_tree(Formatter *f, ostream *out) const
 {
-  if (out) {
+  if (f)
+    OSDTreeFormattingDumper(crush.get(), this).dump(f);
+  else {
+    assert(out);
     TextTable tbl;
     OSDTreePlainDumper(crush.get(), this).dump(&tbl);
     *out << tbl;
   }
-  if (f)
-    OSDTreeFormattingDumper(crush.get(), this).dump(f);
 }
 
 void OSDMap::print_summary(Formatter *f, ostream& out) const
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 3e17d30..643ee10 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -842,7 +842,7 @@ public:
   void print_pools(ostream& out) const;
   void print_summary(Formatter *f, ostream& out) const;
   void print_oneline_summary(ostream& out) const;
-  void print_tree(ostream *out, Formatter *f) const;
+  void print_tree(Formatter *f, ostream *out) const;
 
   string get_flag_string() const;
   static string get_flag_string(unsigned flags);
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h
index d1bf105..aa9b6ca 100644
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -109,6 +109,10 @@
        ObjectStore::Transaction *t,
        OpRequestRef op = OpRequestRef()
        ) = 0;
+     virtual void queue_transactions(
+       list<ObjectStore::Transaction*>& tls,
+       OpRequestRef op = OpRequestRef()
+       ) = 0;
      virtual epoch_t get_epoch() const = 0;
 
      virtual const set<pg_shard_t> &get_actingbackfill_shards() const = 0;
diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc
index 48bface..19c3ee4 100644
--- a/src/osd/ReplicatedBackend.cc
+++ b/src/osd/ReplicatedBackend.cc
@@ -600,10 +600,10 @@ void ReplicatedBackend::submit_transaction(
     &op,
     op_t);
 
-  ObjectStore::Transaction local_t;
-  local_t.set_use_tbl(op_t->get_use_tbl());
+  ObjectStore::Transaction *local_t = new ObjectStore::Transaction;
+  local_t->set_use_tbl(op_t->get_use_tbl());
   if (!(t->get_temp_added().empty())) {
-    get_temp_coll(&local_t);
+    get_temp_coll(local_t);
     add_temp_objs(t->get_temp_added());
   }
   clear_temp_objs(t->get_temp_cleared());
@@ -614,10 +614,7 @@ void ReplicatedBackend::submit_transaction(
     trim_to,
     trim_rollback_to,
     true,
-    &local_t);
-
-  local_t.append(*op_t);
-  local_t.swap(*op_t);
+    local_t);
   
   op_t->register_on_applied_sync(on_local_applied_sync);
   op_t->register_on_applied(
@@ -625,11 +622,16 @@ void ReplicatedBackend::submit_transaction(
       new C_OSD_OnOpApplied(this, &op)));
   op_t->register_on_applied(
     new ObjectStore::C_DeleteTransaction(op_t));
+  op_t->register_on_applied(
+    new ObjectStore::C_DeleteTransaction(local_t));
   op_t->register_on_commit(
     parent->bless_context(
       new C_OSD_OnOpCommit(this, &op)));
-      
-  parent->queue_transaction(op_t, op.op);
+
+  list<ObjectStore::Transaction*> tls;
+  tls.push_back(local_t);
+  tls.push_back(op_t);
+  parent->queue_transactions(tls, op.op);
   delete t;
 }
 
@@ -1201,14 +1203,16 @@ void ReplicatedBackend::sub_op_modify_impl(OpRequestRef op)
 
   op->mark_started();
 
-  rm->localt.append(rm->opt);
-  rm->localt.register_on_commit(
+  rm->opt.register_on_commit(
     parent->bless_context(
       new C_OSD_RepModifyCommit(this, rm)));
   rm->localt.register_on_applied(
     parent->bless_context(
       new C_OSD_RepModifyApply(this, rm)));
-  parent->queue_transaction(&(rm->localt), op);
+  list<ObjectStore::Transaction*> tls;
+  tls.push_back(&(rm->localt));
+  tls.push_back(&(rm->opt));
+  parent->queue_transactions(tls, op);
   // op is cleaned up by oncommit/onapply when both are executed
 }
 
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 0043dad..6fbf139 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -2865,7 +2865,10 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const \
hobject_t &coid)  ctx->log.back().mod_desc.mark_unrollbackable();
     }
   } else {
-    dout(10) << coid << " updating snapset on " << snapoid << dendl;
+    dout(10) << coid << " filtering snapset on " << snapoid << dendl;
+    snapset.filter(pool.info);
+    dout(10) << coid << " writing updated snapset on " << snapoid
+	     << ", snapset is " << snapset << dendl;
     ctx->log.push_back(
       pg_log_entry_t(
 	pg_log_entry_t::MODIFY,
@@ -4210,8 +4213,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  write_update_size_and_usage(ctx->delta_stats, oi, ctx->modified_ranges,
 				    op.extent.offset, op.extent.length, true);
 	maybe_create_new_object(ctx);
-	if (op.extent.offset == 0 && op.extent.length == oi.size)
+	if (op.extent.offset == 0 && op.extent.length >= oi.size)
 	  obs.oi.set_data_digest(osd_op.indata.crc32c(-1));
+	else if (op.extent.offset == oi.size && obs.oi.is_data_digest())
+	  obs.oi.set_data_digest(osd_op.indata.crc32c(obs.oi.data_digest));
 	else
 	  obs.oi.clear_data_digest();
       }
@@ -4562,10 +4567,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  string aname;
 	bp.copy(op.xattr.name_len, aname);
 	tracepoint(osd, do_osd_op_pre_rmxattr, soid.oid.name.c_str(), soid.snap.val, \
                aname.c_str());
-	if (!obs.exists || oi.is_whiteout()) {
-	  result = -ENOENT;
-	  break;
-	}
 	string name = "_" + aname;
 	if (pool.info.require_rollback()) {
 	  map<string, boost::optional<bufferlist> > to_set;
@@ -4720,7 +4721,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  tracepoint(osd, do_osd_op_pre_omapgetkeys, soid.oid.name.c_str(), \
soid.snap.val, start_after.c_str(), max_return);  set<string> out_set;
 
-	if (pool.info.supports_omap()) {
+	if (!pool.info.require_rollback()) {
 	  ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
 	    coll, soid
 	    );
@@ -4757,7 +4758,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  tracepoint(osd, do_osd_op_pre_omapgetvals, soid.oid.name.c_str(), \
soid.snap.val, start_after.c_str(), max_return, filter_prefix.c_str());  map<string, \
bufferlist> out_set;  
-	if (pool.info.supports_omap()) {
+	if (!pool.info.require_rollback()) {
 	  ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
 	    coll, soid
 	    );
@@ -4783,7 +4784,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  
     case CEPH_OSD_OP_OMAPGETHEADER:
       tracepoint(osd, do_osd_op_pre_omapgetheader, soid.oid.name.c_str(), \
                soid.snap.val);
-      if (!pool.info.supports_omap()) {
+      if (pool.info.require_rollback()) {
 	// return empty header
 	break;
       }
@@ -4809,7 +4810,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  }
 	tracepoint(osd, do_osd_op_pre_omapgetvalsbykeys, soid.oid.name.c_str(), \
soid.snap.val, list_entries(keys_to_get).c_str());  map<string, bufferlist> out;
-	if (pool.info.supports_omap()) {
+	if (!pool.info.require_rollback()) {
 	  osd->store->omap_get_values(coll, soid, keys_to_get, &out);
 	} // else return empty omap entries
 	::encode(out, osd_op.outdata);
@@ -4839,7 +4840,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  
 	map<string, bufferlist> out;
 
-	if (pool.info.supports_omap()) {
+	if (!pool.info.require_rollback()) {
 	  set<string> to_get;
 	  for (map<string, pair<bufferlist, int> >::iterator i = assertions.begin();
 	       i != assertions.end();
@@ -4893,7 +4894,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  
       // OMAP Write ops
     case CEPH_OSD_OP_OMAPSETVALS:
-      if (!pool.info.supports_omap()) {
+      if (pool.info.require_rollback()) {
 	result = -EOPNOTSUPP;
 	tracepoint(osd, do_osd_op_pre_omapsetvals, soid.oid.name.c_str(), soid.snap.val, \
"???");  break;
@@ -4929,7 +4930,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  
     case CEPH_OSD_OP_OMAPSETHEADER:
       tracepoint(osd, do_osd_op_pre_omapsetheader, soid.oid.name.c_str(), \
                soid.snap.val);
-      if (!pool.info.supports_omap()) {
+      if (pool.info.require_rollback()) {
 	result = -EOPNOTSUPP;
 	break;
       }
@@ -4950,7 +4951,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  
     case CEPH_OSD_OP_OMAPCLEAR:
       tracepoint(osd, do_osd_op_pre_omapclear, soid.oid.name.c_str(), \
                soid.snap.val);
-      if (!pool.info.supports_omap()) {
+      if (pool.info.require_rollback()) {
 	result = -EOPNOTSUPP;
 	break;
       }
@@ -4970,7 +4971,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& \
ops)  break;
 
     case CEPH_OSD_OP_OMAPRMKEYS:
-      if (!pool.info.supports_omap()) {
+      if (pool.info.require_rollback()) {
 	result = -EOPNOTSUPP;
 	tracepoint(osd, do_osd_op_pre_omaprmkeys, soid.oid.name.c_str(), soid.snap.val, \
"???");  break;
@@ -5745,7 +5746,13 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, \
bool maintain_ssc  
       if (!ctx->snapset_obc)
 	ctx->snapset_obc = get_object_context(snapoid, true);
-      bool got = ctx->snapset_obc->get_write_greedy(ctx->op);
+      bool got = false;
+      if (ctx->lock_to_release == OpContext::W_LOCK) {
+	got = ctx->snapset_obc->get_write_greedy(ctx->op);
+      } else {
+	assert(ctx->lock_to_release == OpContext::E_LOCK);
+	got = ctx->snapset_obc->get_excl(ctx->op);
+      }
       assert(got);
       dout(20) << " got greedy write on snapset_obc " << *ctx->snapset_obc << dendl;
       ctx->release_snapset_obc = true;
@@ -6260,10 +6267,11 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, \
ceph_tid_t tid, int r)  return;
   }
 
-  if (cop->omap_data.length())
+  if (cop->omap_data.length() || cop->omap_header.length())
     cop->results.has_omap = true;
 
-  if (r >= 0 && !pool.info.supports_omap() && cop->omap_data.length()) {
+  if (r >= 0 && pool.info.require_rollback() &&
+      (cop->omap_data.length() || cop->omap_header.length())) {
     r = -EOPNOTSUPP;
   }
   cop->objecter_tid = 0;
@@ -6324,14 +6332,16 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, \
ceph_tid_t tid, int r)  _build_finish_copy_transaction(cop, cop->results.final_tx);
 
   // verify digests?
-  dout(20) << __func__ << std::hex
-	   << " got digest: rx data 0x" << cop->results.data_digest
-	   << " omap 0x" << cop->results.omap_digest
-	   << ", source: data 0x" << cop->results.source_data_digest
-	   << " omap 0x" <<  cop->results.source_omap_digest
-	   << std::dec
-	   << " flags " << cop->results.flags
-	   << dendl;
+  if (cop->results.is_data_digest() || cop->results.is_omap_digest()) {
+    dout(20) << __func__ << std::hex
+      << " got digest: rx data 0x" << cop->results.data_digest
+      << " omap 0x" << cop->results.omap_digest
+      << ", source: data 0x" << cop->results.source_data_digest
+      << " omap 0x" <<  cop->results.source_omap_digest
+      << std::dec
+      << " flags " << cop->results.flags
+      << dendl;
+  }
   if (cop->results.is_data_digest() &&
       cop->results.data_digest != cop->results.source_data_digest) {
     derr << __func__ << std::hex << " data digest 0x" << cop->results.data_digest
@@ -6376,13 +6386,11 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, \
ceph_tid_t tid, int r)  cobc->stop_block();
 
   // cancel and requeue proxy reads on this object
-  if (!r) {
-    kick_proxy_read_blocked(cobc->obs.oi.soid);
-    for (map<ceph_tid_t, ProxyReadOpRef>::iterator it = proxyread_ops.begin();
-	it != proxyread_ops.end(); ++it) {
-      if (it->second->soid == cobc->obs.oi.soid) {
-	cancel_proxy_read(it->second);
-      }
+  kick_proxy_read_blocked(cobc->obs.oi.soid);
+  for (map<ceph_tid_t, ProxyReadOpRef>::iterator it = proxyread_ops.begin();
+      it != proxyread_ops.end(); ++it) {
+    if (it->second->soid == cobc->obs.oi.soid) {
+      cancel_proxy_read(it->second);
     }
   }
 
@@ -6569,10 +6577,38 @@ void ReplicatedPG::finish_promote(int r, CopyResults \
*results,  return;
   }
 
-  if (r < 0 && results->started_temp_obj) {
+  if (r != -ENOENT && soid.is_snap()) {
+    if (results->snaps.empty()) {
+      // we must have read "snap" content from the head object in
+      // the base pool.  use snap_seq to construct what snaps should
+      // be for this clone (what is was before we evicted the clean
+      // clone from this pool, and what it will be when we flush and
+      // the clone eventually happens in the base pool).
+      SnapSet& snapset = obc->ssc->snapset;
+      vector<snapid_t>::iterator p = snapset.snaps.begin();
+      while (p != snapset.snaps.end() && *p > soid.snap)
+	++p;
+      while (p != snapset.snaps.end() && *p > results->snap_seq) {
+	results->snaps.push_back(*p);
+	++p;
+      }
+    }
+
+    dout(20) << __func__ << " snaps " << results->snaps << dendl;
+    filter_snapc(results->snaps);
+
+    dout(20) << __func__ << " filtered snaps " << results->snaps << dendl;
+    if (results->snaps.empty()) {
+      dout(20) << __func__
+	       << " snaps are empty, clone is invalid,"
+	       << " setting r to ENOENT" << dendl;
+      r = -ENOENT;
+    }
+  }
+
+  if (r == -ENOENT && results->started_temp_obj) {
     dout(10) << __func__ << " abort; will clean up partial work" << dendl;
-    ObjectContextRef tempobc = get_object_context(results->temp_oid, false);
-    assert(tempobc);
+    ObjectContextRef tempobc = get_object_context(results->temp_oid, true);
     RepGather *repop = simple_repop_create(tempobc);
     repop->ctx->op_t->remove(results->temp_oid);
     simple_repop_submit(repop);
@@ -6672,27 +6708,14 @@ void ReplicatedPG::finish_promote(int r, CopyResults \
*results,  }
     tctx->new_obs.oi.size = results->object_size;
     tctx->new_obs.oi.user_version = results->user_version;
+    // Don't care src object whether have data or omap digest
+    if (results->object_size)
+      tctx->new_obs.oi.set_data_digest(results->data_digest);
+    if (results->has_omap)
+      tctx->new_obs.oi.set_omap_digest(results->omap_digest);
 
     if (soid.snap != CEPH_NOSNAP) {
-      if (!results->snaps.empty()) {
-	tctx->new_obs.oi.snaps = results->snaps;
-      } else {
-	// we must have read "snap" content from the head object in
-	// the base pool.  use snap_seq to construct what snaps should
-	// be for this clone (what is was before we evicted the clean
-	// clone from this pool, and what it will be when we flush and
-	// the clone eventually happens in the base pool).
-	SnapSet& snapset = obc->ssc->snapset;
-	vector<snapid_t>::iterator p = snapset.snaps.begin();
-	while (p != snapset.snaps.end() && *p > soid.snap)
-	  ++p;
-	assert(p != snapset.snaps.end());
-	do {
-	  tctx->new_obs.oi.snaps.push_back(*p);
-	  ++p;
-	} while (p != snapset.snaps.end() && *p > results->snap_seq);
-      }
-      dout(20) << __func__ << " snaps " << tctx->new_obs.oi.snaps << dendl;
+      tctx->new_obs.oi.snaps = results->snaps;
       assert(!tctx->new_obs.oi.snaps.empty());
       assert(obc->ssc->snapset.clone_size.count(soid.snap));
       assert(obc->ssc->snapset.clone_size[soid.snap] ==
@@ -6831,8 +6854,10 @@ int ReplicatedPG::start_flush(
 	   << " " << (blocking ? "blocking" : "non-blocking/best-effort")
 	   << dendl;
 
+  // get a filtered snapset, need to remove removed snaps
+  SnapSet snapset = obc->ssc->snapset.get_filtered(pool.info);
+
   // verify there are no (older) check for dirty clones
-  SnapSet& snapset = obc->ssc->snapset;
   {
     dout(20) << " snapset " << snapset << dendl;
     vector<snapid_t>::reverse_iterator p = snapset.clones.rbegin();
@@ -10620,8 +10645,8 @@ bool ReplicatedPG::agent_work(int start_max)
     }
 
     // be careful flushing omap to an EC pool.
-    if (!base_pool->supports_omap() &&
-	obc->obs.oi.is_omap()) {
+    if (base_pool->is_erasure() &&
+	obc->obs.oi.test_flag(object_info_t::FLAG_OMAP)) {
       dout(20) << __func__ << " skip (omap to EC) " << obc->obs.oi << dendl;
       osd->logger->inc(l_osd_agent_skip);
       continue;
@@ -10958,7 +10983,7 @@ bool ReplicatedPG::agent_choose_mode(bool restart, \
OpRequestRef op)  // also exclude omap objects if ec backing pool
   const pg_pool_t *base_pool = get_osdmap()->get_pg_pool(pool.info.tier_of);
   assert(base_pool);
-  if (!base_pool->supports_omap())
+  if (base_pool->is_erasure())
     unflushable += info.stats.stats.sum.num_objects_omap;
 
   uint64_t num_user_objects = info.stats.stats.sum.num_objects;
@@ -10973,7 +10998,7 @@ bool ReplicatedPG::agent_choose_mode(bool restart, \
OpRequestRef op)  
   // also reduce the num_dirty by num_objects_omap
   int64_t num_dirty = info.stats.stats.sum.num_objects_dirty;
-  if (!base_pool->supports_omap()) {
+  if (base_pool->is_erasure()) {
     if (num_dirty > info.stats.stats.sum.num_objects_omap)
       num_dirty -= info.stats.stats.sum.num_objects_omap;
     else
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index f2dfca9..3aab489 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -339,6 +339,9 @@ public:
   void queue_transaction(ObjectStore::Transaction *t, OpRequestRef op) {
     osd->store->queue_transaction(osr.get(), t, 0, 0, 0, op);
   }
+  void queue_transactions(list<ObjectStore::Transaction*>& tls, OpRequestRef op) {
+    osd->store->queue_transactions(osr.get(), tls, 0, 0, 0, op);
+  }
   epoch_t get_epoch() const {
     return get_osdmap()->get_epoch();
   }
@@ -866,7 +869,20 @@ protected:
 	requeue_snaptrimmer_clone ||
 	requeue_snaptrimmer_snapset)
       queue_snap_trim();
-    requeue_ops(to_req);
+
+    if (!to_req.empty()) {
+      assert(ctx->obc);
+      // requeue at front of scrub blocking queue if we are blocked by scrub
+      if (scrubber.write_blocked_by_scrub(ctx->obc->obs.oi.soid.get_head())) {
+	waiting_for_active.splice(
+	  waiting_for_active.begin(),
+	  to_req,
+	  to_req.begin(),
+	  to_req.end());
+      } else {
+	requeue_ops(to_req);
+      }
+    }
   }
 
   // replica ops
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 3703ae6..74c733e 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -2547,6 +2547,8 @@ bool pg_interval_t::is_new_interval(
   int new_up_primary,
   const vector<int> &old_up,
   const vector<int> &new_up,
+  int old_size,
+  int new_size,
   int old_min_size,
   int new_min_size,
   unsigned old_pg_num,
@@ -2557,6 +2559,7 @@ bool pg_interval_t::is_new_interval(
     old_up_primary != new_up_primary ||
     new_up != old_up ||
     old_min_size != new_min_size ||
+    old_size != new_size ||
     pgid.is_split(old_pg_num, new_pg_num, 0);
 }
 
@@ -2581,6 +2584,8 @@ bool pg_interval_t::is_new_interval(
 		    new_up_primary,
 		    old_up,
 		    new_up,
+		    lastmap->get_pools().find(pgid.pool())->second.size,
+		    osdmap->get_pools().find(pgid.pool())->second.size,
 		    lastmap->get_pools().find(pgid.pool())->second.min_size,
 		    osdmap->get_pools().find(pgid.pool())->second.min_size,
 		    lastmap->get_pg_num(pgid.pool()),
@@ -4080,6 +4085,25 @@ uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
   return size;
 }
 
+void SnapSet::filter(const pg_pool_t &pinfo)
+{
+  vector<snapid_t> oldsnaps;
+  oldsnaps.swap(snaps);
+  for (vector<snapid_t>::const_iterator i = oldsnaps.begin();
+       i != oldsnaps.end();
+       ++i) {
+    if (!pinfo.is_removed_snap(*i))
+      snaps.push_back(*i);
+  }
+}
+
+SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
+{
+  SnapSet ss = *this;
+  ss.filter(pinfo);
+  return ss;
+}
+
 // -- watch_info_t --
 
 void watch_info_t::encode(bufferlist& bl) const
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 59e4abc..09d67f2 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -1092,10 +1092,6 @@ public:
   bool is_replicated()   const { return get_type() == TYPE_REPLICATED; }
   bool is_erasure() const { return get_type() == TYPE_ERASURE; }
 
-  bool supports_omap() const {
-    return !(get_type() == TYPE_ERASURE || has_flag(FLAG_DEBUG_FAKE_EC_POOL));
-  }
-
   bool requires_aligned_append() const { return is_erasure(); }
   uint64_t required_alignment() const { return stripe_width; }
 
@@ -1850,6 +1846,8 @@ struct pg_interval_t {
     int new_up_primary,
     const vector<int> &old_up,
     const vector<int> &new_up,
+    int old_size,
+    int new_size,
     int old_min_size,
     int new_min_size,
     unsigned old_pg_num,
@@ -2771,6 +2769,9 @@ struct SnapSet {
     }
     return max;
   }
+
+  SnapSet get_filtered(const pg_pool_t &pinfo) const;
+  void filter(const pg_pool_t &pinfo);
 };
 WRITE_CLASS_ENCODER(SnapSet)
 
diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h
index c4e9b2f..d7ae92d 100644
--- a/src/osdc/Journaler.h
+++ b/src/osdc/Journaler.h
@@ -164,11 +164,11 @@ public:
 	f->open_object_section("layout");
 	{
 	  f->dump_unsigned("stripe_unit", layout.fl_stripe_unit);
-	  f->dump_unsigned("stripe_count", layout.fl_stripe_unit);
-	  f->dump_unsigned("object_size", layout.fl_stripe_unit);
-	  f->dump_unsigned("cas_hash", layout.fl_stripe_unit);
-	  f->dump_unsigned("object_stripe_unit", layout.fl_stripe_unit);
-	  f->dump_unsigned("pg_pool", layout.fl_stripe_unit);
+	  f->dump_unsigned("stripe_count", layout.fl_stripe_count);
+	  f->dump_unsigned("object_size", layout.fl_object_size);
+	  f->dump_unsigned("cas_hash", layout.fl_cas_hash);
+	  f->dump_unsigned("object_stripe_unit", layout.fl_object_stripe_unit);
+	  f->dump_unsigned("pg_pool", layout.fl_pg_pool);
 	}
 	f->close_section(); // layout
       }
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index dcb360e..f82f6c7 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -2474,6 +2474,7 @@ int Objecter::_calc_target(op_target_t *t, epoch_t \
*last_force_resend,  bool any  }
   }
 
+  int size = pi->size;
   int min_size = pi->min_size;
   unsigned pg_num = pi->get_pg_num();
   int up_primary, acting_primary;
@@ -2489,6 +2490,8 @@ int Objecter::_calc_target(op_target_t *t, epoch_t \
*last_force_resend,  bool any  up_primary,
 	  t->up,
 	  up,
+	  t->size,
+	  size,
 	  t->min_size,
 	  min_size,
 	  t->pg_num,
@@ -2515,6 +2518,7 @@ int Objecter::_calc_target(op_target_t *t, epoch_t \
*last_force_resend,  bool any  t->acting_primary = acting_primary;
     t->up_primary = up_primary;
     t->up = up;
+    t->size = size;
     t->min_size = min_size;
     t->pg_num = pg_num;
     ldout(cct, 10) << __func__ << " "
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 03c91d7..76cadf6 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -1126,6 +1126,7 @@ public:
     vector<int> acting;  ///< set of acting osds for last pg we mapped to
     int up_primary;      ///< primary for last pg we mapped to based on the up set
     int acting_primary;  ///< primary for last pg we mapped to based on the acting \
set +    int size;        ///< the size of the pool when were were last mapped
     int min_size;        ///< the min size of the pool when were were last mapped
 
     bool used_replica;
@@ -1141,6 +1142,7 @@ public:
 	pg_num(0),
 	up_primary(-1),
 	acting_primary(-1),
+	size(-1),
 	min_size(-1),
 	used_replica(false),
 	paused(false),
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 2a247e4..9a8aa5f 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -716,9 +716,6 @@ static int civetweb_callback(struct mg_connection *conn) {
   RGWRequest *req = new RGWRequest(store->get_new_req_id());
   RGWMongoose client_io(conn, pe->port);
 
-  client_io.init(g_ceph_context);
-
-
   int ret = process_request(store, rest, req, &client_io, olog);
   if (ret < 0) {
     /* we don't really care about return code */
diff --git a/src/test/ceph-helpers.sh b/src/test/ceph-helpers.sh
index 64667a4..4adb515 100755
--- a/src/test/ceph-helpers.sh
+++ b/src/test/ceph-helpers.sh
@@ -192,7 +192,7 @@ function kill_daemons() {
     local dir=$1
     local signal=${2:-KILL}
     local name_prefix=$3 # optional, osd, mon, osd.1
-    local delays=${4:-0 1 1 1 2 3 5 5 5 10 10 20 60}
+    local delays=${4:-0 0 1 1 1 2 3 5 5 5 10 10 20 60}
 
     local status=0
     for pidfile in $(find $dir | grep $name_prefix'[^/]*\.pid') ; do
@@ -200,6 +200,7 @@ function kill_daemons() {
         local send_signal=$signal
         local kill_complete=false
         for try in $delays ; do
+            sleep $try
             if kill -$send_signal $pid 2> /dev/null ; then
                 kill_complete=false
             else
@@ -207,7 +208,6 @@ function kill_daemons() {
                 break
             fi
             send_signal=0
-            sleep $try
         done
         if ! $kill_complete ; then
             status=1
diff --git a/src/test/ceph_argparse.cc b/src/test/ceph_argparse.cc
index f48f387..f846db7 100644
--- a/src/test/ceph_argparse.cc
+++ b/src/test/ceph_argparse.cc
@@ -280,6 +280,30 @@ TEST(CephArgParse, WithDashesAndUnderscores) {
   ASSERT_EQ(found_baz, "");
 }
 
+TEST(CephArgParse, WithFloat) {
+  const char *BAZSTUFF1[] = { "./myprog", "--foo", "50.5", "--bar", "52", NULL };
+
+  VectorContainer bazstuff1(BAZSTUFF1);
+  ostringstream err;
+  float foo;
+  int bar = -1;
+  for (std::vector<const char*>::iterator i = bazstuff1.arr.begin();
+       i != bazstuff1.arr.end(); )
+  {
+    if (ceph_argparse_double_dash(bazstuff1.arr, i)) {
+      break;
+    } else if (ceph_argparse_witharg(bazstuff1.arr, i, &foo, err, "--foo", \
(char*)NULL)) { +      ASSERT_EQ(string(""), err.str());
+    } else if (ceph_argparse_witharg(bazstuff1.arr, i, &bar, err, "--bar", \
(char*)NULL)) { +      ASSERT_EQ(string(""), err.str());
+    }
+    else {
+      ++i;
+    }
+  }
+  ASSERT_EQ(foo, 50.5);
+  ASSERT_EQ(bar, 52);
+}
 
 TEST(CephArgParse, WithInt) {
   const char *BAZSTUFF1[] = { "./myprog", "--foo", "50", "--bar", "52", NULL };
diff --git a/src/test/cli/crushtool/check-names.empty.crushmap.txt \
b/src/test/cli/crushtool/check-names.empty.crushmap.txt new file mode 100644
index 0000000..6ba00cd
--- /dev/null
+++ b/src/test/cli/crushtool/check-names.empty.crushmap.txt
@@ -0,0 +1,11 @@
+# begin crush map
+
+# devices
+
+# types
+
+# buckets
+
+# rules
+
+# end crush map
diff --git a/src/test/cli/crushtool/check-names.empty.t \
b/src/test/cli/crushtool/check-names.empty.t new file mode 100644
index 0000000..9e30790
--- /dev/null
+++ b/src/test/cli/crushtool/check-names.empty.t
@@ -0,0 +1,4 @@
+  $ crushtool -c "$TESTDIR/check-names.empty.crushmap.txt" -o \
"$TESTDIR/check-names.empty.crushmap" +  $ crushtool -i \
"$TESTDIR/check-names.empty.crushmap" --check-names +  unknown type name: item#0
+  $ rm -f "$TESTDIR/check-names.empty.crushmap"
diff --git a/src/test/cli/crushtool/check-names.max-id.t \
b/src/test/cli/crushtool/check-names.max-id.t new file mode 100644
index 0000000..18724ff
--- /dev/null
+++ b/src/test/cli/crushtool/check-names.max-id.t
@@ -0,0 +1,7 @@
+  $ crushtool -i "$TESTDIR/simple.template" --add-item 0 1.0 device0 --loc host \
host0 --loc cluster cluster0 -o check-names.crushmap > /dev/null +  $ crushtool -i \
check-names.crushmap       --add-item 1 1.0 device1 --loc host host0 --loc cluster \
cluster0 -o check-names.crushmap > /dev/null +  $ crushtool -i check-names.crushmap \
--check 2 +  $ crushtool -i check-names.crushmap       --add-item 2 1.0 device2 --loc \
host host0 --loc cluster cluster0 -o check-names.crushmap > /dev/null +  $ crushtool \
-i check-names.crushmap --check 2 +  item id too large: item#2
+  $ crushtool -i check-names.crushmap --check
diff --git a/src/test/cli/crushtool/help.t b/src/test/cli/crushtool/help.t
index cd7d4a7..4c21912 100644
--- a/src/test/cli/crushtool/help.t
+++ b/src/test/cli/crushtool/help.t
@@ -59,6 +59,7 @@
   Options for the display/test stage
   
      --tree                print map summary as a tree
+     --check [max_id]      check if any item is referencing an unknown name/type
      -i mapfn --show-location id
                            show location for given device id
      -i mapfn --test       test a range of inputs on the map
diff --git a/src/test/cli/osdmaptool/tree.t b/src/test/cli/osdmaptool/tree.t
new file mode 100644
index 0000000..00eb0be
--- /dev/null
+++ b/src/test/cli/osdmaptool/tree.t
@@ -0,0 +1,19 @@
+  $ osdmaptool --createsimple 3 om
+  osdmaptool: osdmap file 'om'
+  osdmaptool: writing epoch 1 to om
+
+  $ osdmaptool --tree=plain om
+  osdmaptool: osdmap file 'om'
+  ID WEIGHT  TYPE NAME              UP/DOWN REWEIGHT PRIMARY-AFFINITY 
+  -1 3.00000 root default                                             
+  -3 3.00000     rack localrack                                       
+  -2 3.00000         host localhost                                   
+   0 1.00000             osd.0          DNE        0                  
+   1 1.00000             osd.1          DNE        0                  
+   2 1.00000             osd.2          DNE        0                  
+
+  $ osdmaptool --tree=json om
+  osdmaptool: osdmap file 'om'
+  {"nodes":[{"id":-1,"name":"default","type":"root","type_id":10,"children":[-3]},{"i \
d":-3,"name":"localrack","type":"rack","type_id":3,"children":[-2]},{"id":-2,"name":"l \
ocalhost","type":"host","type_id":1,"children":[2,1,0]},{"id":0,"name":"osd.0","type": \
"osd","type_id":0,"crush_weight":1.000000,"depth":3,"exists":0,"status":"down","reweig \
ht":0.000000,"primary_affinity":1.000000},{"id":1,"name":"osd.1","type":"osd","type_id \
":0,"crush_weight":1.000000,"depth":3,"exists":0,"status":"down","reweight":0.000000," \
primary_affinity":1.000000},{"id":2,"name":"osd.2","type":"osd","type_id":0,"crush_wei \
ght":1.000000,"depth":3,"exists":0,"status":"down","reweight":0.000000,"primary_affinity":1.000000}],"stray":[]}
 +  $ rm -f om
+
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index ce099eb..cb06b15 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -93,7 +93,7 @@ TYPE(PushReplyOp)
 TYPE(ECUtil::HashInfo)
 
 #include "osd/ECMsgTypes.h"
-TYPE(ECSubWrite)
+TYPE_NOCOPY(ECSubWrite)
 TYPE(ECSubWriteReply)
 TYPE_FEATUREFUL(ECSubRead)
 TYPE(ECSubReadReply)
diff --git a/src/test/librados/io.cc b/src/test/librados/io.cc
index 0976422..d01f090 100644
--- a/src/test/librados/io.cc
+++ b/src/test/librados/io.cc
@@ -463,17 +463,6 @@ TEST_F(LibRadosIo, RmXattr) {
       rados_setxattr(ioctx, "foo", attr1, attr1_buf, sizeof(attr1_buf)));
   ASSERT_EQ(0, rados_rmxattr(ioctx, "foo", attr1));
   ASSERT_EQ(-ENODATA, rados_getxattr(ioctx, "foo", attr1, buf, sizeof(buf)));
-
-  // Test rmxattr on a removed object
-  char buf2[128];
-  char attr2[] = "attr2";
-  char attr2_buf[] = "foo bar baz";
-  memset(buf2, 0xbb, sizeof(buf2));
-  ASSERT_EQ(0, rados_write(ioctx, "foo_rmxattr", buf2, sizeof(buf2), 0));
-  ASSERT_EQ(0,
-      rados_setxattr(ioctx, "foo_rmxattr", attr2, attr2_buf, sizeof(attr2_buf)));
-  ASSERT_EQ(0, rados_remove(ioctx, "foo_rmxattr"));
-  ASSERT_EQ(-ENOENT, rados_rmxattr(ioctx, "foo_rmxattr", attr2));
 }
 
 TEST_F(LibRadosIoPP, RmXattrPP) {
@@ -490,20 +479,6 @@ TEST_F(LibRadosIoPP, RmXattrPP) {
   ASSERT_EQ(0, ioctx.rmxattr("foo", attr1));
   bufferlist bl3;
   ASSERT_EQ(-ENODATA, ioctx.getxattr("foo", attr1, bl3));
-
-  // Test rmxattr on a removed object
-  char buf2[128];
-  char attr2[] = "attr2";
-  char attr2_buf[] = "foo bar baz";
-  memset(buf2, 0xbb, sizeof(buf2));
-  bufferlist bl21;
-  bl21.append(buf, sizeof(buf));
-  ASSERT_EQ(0, ioctx.write("foo_rmxattr", bl21, sizeof(buf2), 0));
-  bufferlist bl22;
-  bl22.append(attr2_buf, sizeof(attr2_buf));
-  ASSERT_EQ(0, ioctx.setxattr("foo_rmxattr", attr2, bl22));
-  ASSERT_EQ(0, ioctx.remove("foo_rmxattr"));
-  ASSERT_EQ(-ENOENT, ioctx.rmxattr("foo_rmxattr", attr2));
 }
 
 TEST_F(LibRadosIo, XattrIter) {
@@ -985,17 +960,6 @@ TEST_F(LibRadosIoEC, RmXattr) {
       rados_setxattr(ioctx, "foo", attr1, attr1_buf, sizeof(attr1_buf)));
   ASSERT_EQ(0, rados_rmxattr(ioctx, "foo", attr1));
   ASSERT_EQ(-ENODATA, rados_getxattr(ioctx, "foo", attr1, buf, sizeof(buf)));
-
-  // Test rmxattr on a removed object
-  char buf2[128];
-  char attr2[] = "attr2";
-  char attr2_buf[] = "foo bar baz";
-  memset(buf2, 0xbb, sizeof(buf2));
-  ASSERT_EQ(0, rados_write(ioctx, "foo_rmxattr", buf2, sizeof(buf2), 0));
-  ASSERT_EQ(0,
-      rados_setxattr(ioctx, "foo_rmxattr", attr2, attr2_buf, sizeof(attr2_buf)));
-  ASSERT_EQ(0, rados_remove(ioctx, "foo_rmxattr"));
-  ASSERT_EQ(-ENOENT, rados_rmxattr(ioctx, "foo_rmxattr", attr2));
 }
 
 TEST_F(LibRadosIoECPP, RmXattrPP) {
@@ -1012,20 +976,6 @@ TEST_F(LibRadosIoECPP, RmXattrPP) {
   ASSERT_EQ(0, ioctx.rmxattr("foo", attr1));
   bufferlist bl3;
   ASSERT_EQ(-ENODATA, ioctx.getxattr("foo", attr1, bl3));
-
-  // Test rmxattr on a removed object
-  char buf2[128];
-  char attr2[] = "attr2";
-  char attr2_buf[] = "foo bar baz";
-  memset(buf2, 0xbb, sizeof(buf2));
-  bufferlist bl21;
-  bl21.append(buf, sizeof(buf));
-  ASSERT_EQ(0, ioctx.write("foo_rmxattr", bl21, sizeof(buf2), 0));
-  bufferlist bl22;
-  bl22.append(attr2_buf, sizeof(attr2_buf));
-  ASSERT_EQ(0, ioctx.setxattr("foo_rmxattr", attr2, bl22));
-  ASSERT_EQ(0, ioctx.remove("foo_rmxattr"));
-  ASSERT_EQ(-ENOENT, ioctx.rmxattr("foo_rmxattr", attr2));
 }
 
 TEST_F(LibRadosIoEC, XattrIter) {
diff --git a/src/test/librados/snapshots.cc b/src/test/librados/snapshots.cc
index 880b84d..3eb4e72 100644
--- a/src/test/librados/snapshots.cc
+++ b/src/test/librados/snapshots.cc
@@ -485,6 +485,40 @@ TEST_F(LibRadosSnapshotsSelfManagedPP, SnapOverlapPP) {
   readioctx.close();
 }
 
+TEST_F(LibRadosSnapshotsSelfManagedPP, Bug11677) {
+  std::vector<uint64_t> my_snaps;
+  my_snaps.push_back(-2);
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps.back()));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], my_snaps));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+
+  int bsize = 1<<20;
+  char *buf = (char *)new char[bsize];
+  memset(buf, 0xcc, bsize);
+  bufferlist bl1;
+  bl1.append(buf, bsize);
+  ASSERT_EQ(0, ioctx.write("foo", bl1, bsize, 0));
+
+  my_snaps.push_back(-2);
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps.back()));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], my_snaps));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+
+  librados::ObjectWriteOperation *op = new librados::ObjectWriteOperation();
+  op->assert_exists();
+  op->remove();
+  ASSERT_EQ(0, ioctx.operate("foo", op));
+
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps.back()));
+  my_snaps.pop_back();
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps.back()));
+  my_snaps.pop_back();
+  ioctx.snap_set_read(LIBRADOS_SNAP_HEAD);
+  delete[] buf;
+}
+
 // EC testing
 TEST_F(LibRadosSnapshotsEC, SnapList) {
   char buf[bufsize];
@@ -816,3 +850,37 @@ TEST_F(LibRadosSnapshotsSelfManagedECPP, RollbackPP) {
   delete[] buf2;
 }
 
+TEST_F(LibRadosSnapshotsSelfManagedECPP, Bug11677) {
+  std::vector<uint64_t> my_snaps;
+  my_snaps.push_back(-2);
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps.back()));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], my_snaps));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+
+  int bsize = alignment;
+  char *buf = (char *)new char[bsize];
+  memset(buf, 0xcc, bsize);
+  bufferlist bl1;
+  bl1.append(buf, bsize);
+  ASSERT_EQ(0, ioctx.write("foo", bl1, bsize, 0));
+
+  my_snaps.push_back(-2);
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps.back()));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], my_snaps));
+  ::std::reverse(my_snaps.begin(), my_snaps.end());
+
+  librados::ObjectWriteOperation *op = new librados::ObjectWriteOperation();
+  op->assert_exists();
+  op->remove();
+  ASSERT_EQ(0, ioctx.operate("foo", op));
+
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps.back()));
+  my_snaps.pop_back();
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps.back()));
+  my_snaps.pop_back();
+  ioctx.snap_set_read(LIBRADOS_SNAP_HEAD);
+  delete[] buf;
+}
+
diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
index 60b56f9..93cd42d 100644
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -76,16 +76,15 @@ protected:
   static void SetUpTestCase() {
     pool_name = get_temp_pool_name();
     ASSERT_EQ("", create_one_pool_pp(pool_name, s_cluster));
-    cache_pool_name = get_temp_pool_name();
-    ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
   }
   static void TearDownTestCase() {
-    ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
     ASSERT_EQ(0, destroy_one_pool_pp(pool_name, s_cluster));
   }
   static std::string cache_pool_name;
 
   virtual void SetUp() {
+    cache_pool_name = get_temp_pool_name();
+    ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
     RadosTestPP::SetUp();
     ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
     cache_ioctx.set_namespace(nspace);
@@ -114,6 +113,7 @@ protected:
     cleanup_namespace(cache_ioctx, nspace);
 
     cache_ioctx.close();
+    ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
   }
   librados::IoCtx cache_ioctx;
 };
@@ -2399,16 +2399,15 @@ protected:
   static void SetUpTestCase() {
     pool_name = get_temp_pool_name();
     ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster));
-    cache_pool_name = get_temp_pool_name();
-    ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
   }
   static void TearDownTestCase() {
-    ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
     ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster));
   }
   static std::string cache_pool_name;
 
   virtual void SetUp() {
+    cache_pool_name = get_temp_pool_name();
+    ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
     RadosTestECPP::SetUp();
     ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
     cache_ioctx.set_namespace(nspace);
@@ -2437,6 +2436,7 @@ protected:
     cleanup_namespace(cache_ioctx, nspace);
 
     cache_ioctx.close();
+    ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
   }
 
   librados::IoCtx cache_ioctx;
@@ -4439,57 +4439,6 @@ TEST_F(LibRadosTwoPoolsECPP, ProxyRead) {
   cluster.wait_for_latest_osdmap();
 }
 
-//Make ecpool as cache pool; no-ecpool as data pool
-//Judge promote object which has omap from no-ecpool into ecpool.
-TEST_F(LibRadosTwoPoolsECPP, OmapOperation) {
-  // create object
-  {
-    bufferlist bl;
-    bl.append("hi there");
-    ASSERT_EQ(0, cache_ioctx.omap_set_header("foo", bl));
-  }
-
-  // configure cache.
-  bufferlist inbl;
-  ASSERT_EQ(0, cluster.mon_command(
-    "{\"prefix\": \"osd tier add\", \"pool\": \"" + cache_pool_name +
-    "\", \"tierpool\": \"" + pool_name +
-    "\", \"force_nonempty\": \"--force-nonempty\" }",
-    inbl, NULL, NULL));
-  ASSERT_EQ(0, cluster.mon_command(
-    "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + cache_pool_name +
-    "\", \"overlaypool\": \"" + pool_name + "\"}",
-    inbl, NULL, NULL));
-  ASSERT_EQ(0, cluster.mon_command(
-    "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + pool_name +
-    "\", \"mode\": \"writeback\"}",
-    inbl, NULL, NULL));
-
-
-  // wait for maps to settle
-  cluster.wait_for_latest_osdmap();
-
-  {
-    bufferlist got;
-    ObjectReadOperation o;
-    o.omap_get_header(&got, NULL);
-    ASSERT_EQ(-EOPNOTSUPP, ioctx.operate("foo", &o, NULL));
-
-  }
-  // tear down tiers
-  ASSERT_EQ(0, cluster.mon_command(
-    "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + cache_pool_name +
-    "\"}",
-    inbl, NULL, NULL));
-  ASSERT_EQ(0, cluster.mon_command(
-    "{\"prefix\": \"osd tier remove\", \"pool\": \"" + cache_pool_name +
-    "\", \"tierpool\": \"" + pool_name + "\"}",
-    inbl, NULL, NULL));
-
-  // wait for maps to settle before next test
-  cluster.wait_for_latest_osdmap();
-}
-
 int main(int argc, char **argv)
 {
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/test/mon/osd-crush.sh b/src/test/mon/osd-crush.sh
index 4857ca2..8abfd87 100755
--- a/src/test/mon/osd-crush.sh
+++ b/src/test/mon/osd-crush.sh
@@ -205,6 +205,19 @@ function TEST_crush_rename_bucket() {
     ./ceph osd crush rename-bucket nonexistent something 2>&1 | grep "Error ENOENT" \
|| return 1  }
 
+function TEST_crush_reject_empty() {
+    local dir=$1
+    run_mon $dir a || return 1
+    # should have at least one OSD
+    run_osd $dir 0 || return 1
+
+    local empty_map=$dir/empty_map
+    :> $empty_map.txt
+    ./crushtool -c $empty_map.txt -o $empty_map.map || return 1
+    expect_failure $dir "Error EINVAL" \
+        ./ceph osd setcrushmap -i $empty_map.map || return 1
+}
+
 main osd-crush "$@"
 
 # Local Variables:
diff --git a/src/test/objectstore/chain_xattr.cc \
b/src/test/objectstore/chain_xattr.cc index 5c77b95..15190e7 100644
--- a/src/test/objectstore/chain_xattr.cc
+++ b/src/test/objectstore/chain_xattr.cc
@@ -148,6 +148,78 @@ TEST(chain_xattr, get_and_set) {
   ::unlink(file);
 }
 
+TEST(chain_xattr, chunk_aligned) {
+  const char* file = FILENAME;
+  ::unlink(file);
+  int fd = ::open(file, O_CREAT|O_WRONLY|O_TRUNC, 0700);
+  const string user("user.");
+
+  // set N* chunk size
+  const string name = "user.foo";
+  const string name2 = "user.bar";
+
+  for (int len = CHAIN_XATTR_MAX_BLOCK_LEN - 10;
+       len < CHAIN_XATTR_MAX_BLOCK_LEN + 10;
+       ++len) {
+    cout << len << std::endl;
+    const string x(len, 'x');
+    char buf[len*2];
+    ASSERT_EQ(len, chain_setxattr(file, name.c_str(), x.c_str(), len));
+    char attrbuf[4096];
+    int l = ceph_os_listxattr(file, attrbuf, sizeof(attrbuf));
+    for (char *p = attrbuf; p - attrbuf < l; p += strlen(p) + 1) {
+      cout << "  attr " << p << std::endl;
+    }
+    ASSERT_EQ(len, chain_getxattr(file, name.c_str(), buf, len*2));
+    ASSERT_EQ(0, chain_removexattr(file, name.c_str()));
+
+    ASSERT_EQ(len, chain_fsetxattr(fd, name2.c_str(), x.c_str(), len));
+    l = ceph_os_flistxattr(fd, attrbuf, sizeof(attrbuf));
+    for (char *p = attrbuf; p - attrbuf < l; p += strlen(p) + 1) {
+      cout << "  attr " << p << std::endl;
+    }
+    ASSERT_EQ(len, chain_fgetxattr(fd, name2.c_str(), buf, len*2));
+    ASSERT_EQ(0, chain_fremovexattr(fd, name2.c_str()));
+  }
+
+  for (int len = CHAIN_XATTR_SHORT_BLOCK_LEN - 10;
+       len < CHAIN_XATTR_SHORT_BLOCK_LEN + 10;
+       ++len) {
+    cout << len << std::endl;
+    const string x(len, 'x');
+    char buf[len*2];
+    ASSERT_EQ(len, chain_setxattr(file, name.c_str(), x.c_str(), len));
+    char attrbuf[4096];
+    int l = ceph_os_listxattr(file, attrbuf, sizeof(attrbuf));
+    for (char *p = attrbuf; p - attrbuf < l; p += strlen(p) + 1) {
+      cout << "  attr " << p << std::endl;
+    }
+    ASSERT_EQ(len, chain_getxattr(file, name.c_str(), buf, len*2));
+  }
+
+  {
+    // test tail path in chain_getxattr
+    const char *aname = "user.baz";
+    char buf[CHAIN_XATTR_SHORT_BLOCK_LEN*3];
+    memset(buf, 'x', sizeof(buf));
+    ASSERT_EQ(sizeof(buf), chain_setxattr(file, aname, buf, sizeof(buf)));
+    ASSERT_EQ(-ERANGE, chain_getxattr(file, aname, buf,
+				      CHAIN_XATTR_SHORT_BLOCK_LEN*2));
+  }
+  {
+    // test tail path in chain_fgetxattr
+    const char *aname = "user.biz";
+    char buf[CHAIN_XATTR_SHORT_BLOCK_LEN*3];
+    memset(buf, 'x', sizeof(buf));
+    ASSERT_EQ(sizeof(buf), chain_fsetxattr(fd, aname, buf, sizeof(buf)));
+    ASSERT_EQ(-ERANGE, chain_fgetxattr(fd, aname, buf,
+				       CHAIN_XATTR_SHORT_BLOCK_LEN*2));
+  }
+
+  ::close(fd);
+  ::unlink(file);
+}
+
 TEST(chain_xattr, listxattr) {
   const char* file = FILENAME;
   ::unlink(file);
diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h
index f9c0f9b..8d6889e 100644
--- a/src/test/osd/RadosModel.h
+++ b/src/test/osd/RadosModel.h
@@ -47,6 +47,7 @@ typename T::iterator rand_choose(T &cont) {
 enum TestOpType {
   TEST_OP_READ,
   TEST_OP_WRITE,
+  TEST_OP_WRITE_EXCL,
   TEST_OP_DELETE,
   TEST_OP_SNAP_CREATE,
   TEST_OP_SNAP_REMOVE,
@@ -61,7 +62,8 @@ enum TestOpType {
   TEST_OP_CACHE_FLUSH,
   TEST_OP_CACHE_TRY_FLUSH,
   TEST_OP_CACHE_EVICT,
-  TEST_OP_APPEND
+  TEST_OP_APPEND,
+  TEST_OP_APPEND_EXCL
 };
 
 class TestWatchContext : public librados::WatchCtx2 {
@@ -716,14 +718,17 @@ public:
   bufferlist rbuffer;
 
   bool do_append;
+  bool do_excl;
 
   WriteOp(int n,
 	  RadosTestContext *context,
 	  const string &oid,
 	  bool do_append,
+	  bool do_excl,
 	  TestOpStat *stat = 0)
     : TestOp(n, context, stat),
-      oid(oid), waiting_on(0), last_acked_tid(0), do_append(do_append)
+      oid(oid), waiting_on(0), last_acked_tid(0), do_append(do_append),
+      do_excl(do_excl)
   {}
 		
   void _begin()
@@ -795,6 +800,8 @@ public:
       } else {
 	op.write(i->first, to_write);
       }
+      if (do_excl && tid == 1)
+	op.assert_exists();
       context->io_ctx.aio_operate(
 	context->prefix+oid, completion,
 	&op);
@@ -937,7 +944,15 @@ public:
     interval_set<uint64_t> ranges;
     context->state_lock.Unlock();
 
-    int r = context->io_ctx.remove(context->prefix+oid);
+    int r = 0;
+    if (rand() % 2) {
+      librados::ObjectWriteOperation op;
+      op.assert_exists();
+      op.remove();
+      r = context->io_ctx.operate(context->prefix+oid, &op);
+    } else {
+      r = context->io_ctx.remove(context->prefix+oid);
+    }
     if (r && !(r == -ENOENT && !present)) {
       cerr << "r is " << r << " while deleting " << oid << " and present is " << \
present << std::endl;  assert(0);
diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc
index 4d8b45c..e8eb0db 100644
--- a/src/test/osd/TestRados.cc
+++ b/src/test/osd/TestRados.cc
@@ -55,9 +55,9 @@ public:
       cout << m_op << ": write initial oid " << oid.str() << std::endl;
       context.oid_not_flushing.insert(oid.str());
       if (m_ec_pool) {
-	return new WriteOp(m_op, &context, oid.str(), true);
+	return new WriteOp(m_op, &context, oid.str(), true, true);
       } else {
-	return new WriteOp(m_op, &context, oid.str(), false);
+	return new WriteOp(m_op, &context, oid.str(), false, true);
       }
     } else if (m_op >= m_ops) {
       return NULL;
@@ -105,7 +105,14 @@ private:
       oid = *(rand_choose(context.oid_not_in_use));
       cout << m_op << ": " << "write oid " << oid << " current snap is "
 	   << context.current_snap << std::endl;
-      return new WriteOp(m_op, &context, oid, false, m_stats);
+      return new WriteOp(m_op, &context, oid, false, false, m_stats);
+
+    case TEST_OP_WRITE_EXCL:
+      oid = *(rand_choose(context.oid_not_in_use));
+      cout << m_op << ": " << "write (excl) oid "
+	   << oid << " current snap is "
+	   << context.current_snap << std::endl;
+      return new WriteOp(m_op, &context, oid, false, true, m_stats);
 
     case TEST_OP_DELETE:
       oid = *(rand_choose(context.oid_not_in_use));
@@ -206,7 +213,13 @@ private:
       oid = *(rand_choose(context.oid_not_in_use));
       cout << "append oid " << oid << " current snap is "
 	   << context.current_snap << std::endl;
-      return new WriteOp(m_op, &context, oid, true, m_stats);
+      return new WriteOp(m_op, &context, oid, true, false, m_stats);
+
+    case TEST_OP_APPEND_EXCL:
+      oid = *(rand_choose(context.oid_not_in_use));
+      cout << "append oid (excl) " << oid << " current snap is "
+	   << context.current_snap << std::endl;
+      return new WriteOp(m_op, &context, oid, true, true, m_stats);
 
     default:
       cerr << m_op << ": Invalid op type " << type << std::endl;
@@ -244,6 +257,7 @@ int main(int argc, char **argv)
   } op_types[] = {
     { TEST_OP_READ, "read", true },
     { TEST_OP_WRITE, "write", false },
+    { TEST_OP_WRITE_EXCL, "write_excl", false },
     { TEST_OP_DELETE, "delete", true },
     { TEST_OP_SNAP_CREATE, "snap_create", true },
     { TEST_OP_SNAP_REMOVE, "snap_remove", true },
@@ -259,6 +273,7 @@ int main(int argc, char **argv)
     { TEST_OP_CACHE_TRY_FLUSH, "cache_try_flush", true },
     { TEST_OP_CACHE_EVICT, "cache_evict", true },
     { TEST_OP_APPEND, "append", true },
+    { TEST_OP_APPEND_EXCL, "append_excl", true },
     { TEST_OP_READ /* grr */, NULL },
   };
 
diff --git a/src/test/osd/osd-scrub-repair.sh b/src/test/osd/osd-scrub-repair.sh
index 783474f..d5432c2 100755
--- a/src/test/osd/osd-scrub-repair.sh
+++ b/src/test/osd/osd-scrub-repair.sh
@@ -65,6 +65,31 @@ function TEST_corrupt_and_repair_replicated() {
     teardown $dir || return 1
 }
 
+function corrupt_and_repair_two() {
+    local dir=$1
+    local poolname=$2
+    local first=$3
+    local second=$4
+
+    #
+    # 1) remove the corresponding file from the OSDs
+    #
+    objectstore_tool $dir $first SOMETHING remove || return 1
+    objectstore_tool $dir $second SOMETHING remove || return 1
+    #
+    # 2) repair the PG
+    #
+    local pg=$(get_pg $poolname SOMETHING)
+    repair $pg
+    #
+    # 3) The files must be back
+    #
+    objectstore_tool $dir $first SOMETHING list-attrs || return 1
+    objectstore_tool $dir $second SOMETHING list-attrs || return 1
+    rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
 #
 # 1) add an object
 # 2) remove the corresponding file from a designated OSD
@@ -95,22 +120,12 @@ function corrupt_and_repair_one() {
     wait_for_clean || return 1
 }
 
-function TEST_corrupt_and_repair_erasure_coded() {
+function corrupt_and_repair_erasure_coded() {
     local dir=$1
-    local poolname=ecpool
-    local payload=ABCDEF
-
-    setup $dir || return 1
-    run_mon $dir a || return 1
-    run_osd $dir 0 || return 1
-    run_osd $dir 1 || return 1
-    run_osd $dir 2 || return 1
-    run_osd $dir 3 || return 1
-    wait_for_clean || return 1
+    local poolname=$2
+    local profile=$3
 
-    ceph osd erasure-code-profile set myprofile \
-        k=2 m=2 ruleset-failure-domain=osd || return 1
-    ceph osd pool create $poolname 1 1 erasure myprofile \
+    ceph osd pool create $poolname 1 1 erasure $profile \
         || return 1
 
     add_something $dir $poolname
@@ -127,10 +142,51 @@ function TEST_corrupt_and_repair_erasure_coded() {
     corrupt_and_repair_two $dir $poolname $not_primary_first $not_primary_second || \
                return 1
     corrupt_and_repair_two $dir $poolname $primary $not_primary_first || return 1
 
+}
+
+function TEST_corrupt_and_repair_jerasure() {
+    local dir=$1
+    local poolname=ecpool
+    local profile=myprofile
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    for id in $(seq 0 3) ; do
+        run_osd $dir $id || return 1
+    done
+    wait_for_clean || return 1
+
+    ceph osd erasure-code-profile set $profile \
+        k=2 m=2 ruleset-failure-domain=osd || return 1
+
+    corrupt_and_repair_erasure_coded $dir $poolname $profile || return 1
+
     teardown $dir || return 1
 }
 
-function TEST_unreocvery_erasure_coded() {
+function TEST_corrupt_and_repair_lrc() {
+    local dir=$1
+    local poolname=ecpool
+    local profile=myprofile
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    for id in $(seq 0 9) ; do
+        run_osd $dir $id || return 1
+    done
+    wait_for_clean || return 1
+
+    ceph osd erasure-code-profile set $profile \
+        pluing=lrc \
+        k=4 m=2 l=3 \
+        ruleset-failure-domain=osd || return 1
+
+    corrupt_and_repair_erasure_coded $dir $poolname $profile || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_unfound_erasure_coded() {
     local dir=$1
     local poolname=ecpool
     local payload=ABCDEF
@@ -176,31 +232,6 @@ function TEST_unreocvery_erasure_coded() {
     teardown $dir || return 1
 }
 
-function corrupt_and_repair_two() {
-    local dir=$1
-    local poolname=$2
-    local first=$3
-    local second=$4
-
-    #
-    # 1) remove the corresponding file from the OSDs
-    #
-    objectstore_tool $dir $first SOMETHING remove || return 1
-    objectstore_tool $dir $second SOMETHING remove || return 1
-    #
-    # 2) repair the PG
-    #
-    local pg=$(get_pg $poolname SOMETHING)
-    repair $pg
-    #
-    # 3) The files must be back
-    #
-    objectstore_tool $dir $first SOMETHING list-attrs || return 1
-    objectstore_tool $dir $second SOMETHING list-attrs || return 1
-    rados --pool $poolname get SOMETHING $dir/COPY || return 1
-    diff $dir/ORIGINAL $dir/COPY || return 1
-}
-
 main osd-scrub-repair "$@"
 
 # Local Variables:
diff --git a/src/test/perf_local.cc b/src/test/perf_local.cc
index bbe390b..f388f61 100644
--- a/src/test/perf_local.cc
+++ b/src/test/perf_local.cc
@@ -403,6 +403,7 @@ double div32()
 // probably pick worse values.
 double div64()
 {
+#if defined(__x86_64__) || defined(__amd64__)
   int count = 1000000;
   // NB: Expect an x86 processor exception is there's overflow.
   uint64_t start = Cycles::rdtsc();
@@ -419,6 +420,9 @@ double div64()
   }
   uint64_t stop = Cycles::rdtsc();
   return Cycles::to_seconds(stop - start)/count;
+#else
+  return -1;
+#endif
 }
 
 // Measure the cost of calling a non-inlined function.
@@ -596,6 +600,7 @@ double perf_cycles_to_nanoseconds()
 }
 
 
+#if defined(__x86_64__) || defined(__amd64__)
 /**
  * Prefetch the cache lines containing [object, object + numBytes) into the
  * processor's caches.
@@ -615,10 +620,12 @@ static inline void prefetch(const void *object, uint64_t \
num_bytes)  _mm_prefetch(p + i, _MM_HINT_T0);
 #endif
 }
+#endif
 
 // Measure the cost of the prefetch instruction.
 double perf_prefetch()
 {
+#if defined(__x86_64__) || defined(__amd64__)
   uint64_t total_ticks = 0;
   int count = 10;
   char buf[16 * 64];
@@ -647,6 +654,9 @@ double perf_prefetch()
     total_ticks += stop - start;
   }
   return Cycles::to_seconds(total_ticks) / count / 16;
+#else
+  return -1;
+#endif
 }
 
 /**
@@ -968,7 +978,9 @@ void run_test(TestInfo& info)
 {
   double secs = info.func();
   int width = printf("%-24s ", info.name);
-  if (secs < 1.0e-06) {
+  if (secs == -1) {
+    width += printf(" architecture nonsupport ");
+  } else if (secs < 1.0e-06) {
     width += printf("%8.2fns", 1e09*secs);
   } else if (secs < 1.0e-03) {
     width += printf("%8.2fus", 1e06*secs);
diff --git a/src/tools/crushtool.cc b/src/tools/crushtool.cc
index 5e53f0a..c7ed691 100644
--- a/src/tools/crushtool.cc
+++ b/src/tools/crushtool.cc
@@ -165,6 +165,7 @@ void usage()
   cout << "Options for the display/test stage\n";
   cout << "\n";
   cout << "   --tree                print map summary as a tree\n";
+  cout << "   --check [max_id]      check if any item is referencing an unknown \
name/type\n";  cout << "   -i mapfn --show-location id\n";
   cout << "                         show location for given device id\n";
   cout << "   -i mapfn --test       test a range of inputs on the map\n";
@@ -226,6 +227,8 @@ int main(int argc, const char **argv)
   std::string infn, srcfn, outfn, add_name, remove_name, reweight_name;
   bool compile = false;
   bool decompile = false;
+  bool check = false;
+  int max_id = -1;
   bool test = false;
   bool display = false;
   bool tree = false;
@@ -311,6 +314,8 @@ int main(int argc, const char **argv)
     } else if (ceph_argparse_witharg(args, i, &val, "-c", "--compile", (char*)NULL)) \
{  srcfn = val;
       compile = true;
+    } else if (ceph_argparse_witharg(args, i, &max_id, err, "--check", (char*)NULL)) \
{ +      check = true;
     } else if (ceph_argparse_flag(args, i, "-t", "--test", (char*)NULL)) {
       test = true;
     } else if (ceph_argparse_witharg(args, i, &full_location, err, \
"--show-location", (char*)NULL)) { @@ -497,7 +502,7 @@ int main(int argc, const char \
**argv)  }
   }
 
-  if (test && !display && !write_to_file) {
+  if (test && !check && !display && !write_to_file) {
     cerr << "WARNING: no output selected; use --output-csv or --show-X" << \
std::endl;  }
 
@@ -505,7 +510,7 @@ int main(int argc, const char **argv)
     cerr << "cannot specify more than one of compile, decompile, and build" << \
std::endl;  exit(EXIT_FAILURE);
   }
-  if (!compile && !decompile && !build && !test && !reweight && !adjust && !tree &&
+  if (!check && !compile && !decompile && !build && !test && !reweight && !adjust && \
!tree &&  add_item < 0 && full_location < 0 &&
       remove_name.empty() && reweight_name.empty()) {
     cerr << "no action specified; -h for help" << std::endl;
@@ -823,6 +828,12 @@ int main(int argc, const char **argv)
     }
   }
 
+  if (check) {
+    if (!tester.check_name_maps(max_id)) {
+      exit(1);
+    }
+  }
+
   if (test) {
     if (tester.get_output_utilization_all() ||
 	tester.get_output_utilization())
diff --git a/src/tools/osdmaptool.cc b/src/tools/osdmaptool.cc
index 14331c3..b547b7a 100644
--- a/src/tools/osdmaptool.cc
+++ b/src/tools/osdmaptool.cc
@@ -55,6 +55,7 @@ int main(int argc, const char **argv)
   bool print = false;
   bool print_json = false;
   bool tree = false;
+  boost::scoped_ptr<Formatter> tree_formatter;
   bool createsimple = false;
   bool create_from_conf = false;
   int num_osd = 0;
@@ -84,8 +85,11 @@ int main(int argc, const char **argv)
       print = true;
     } else if (ceph_argparse_flag(args, i, "--dump-json", (char*)NULL)) {
       print_json = true;
-    } else if (ceph_argparse_flag(args, i, "--tree", (char*)NULL)) {
+    } else if (ceph_argparse_witharg(args, i, &val, err, "--tree", (char*)NULL)) {
       tree = true;
+      if (!val.empty() && val != "plain") {
+	tree_formatter.reset(Formatter::create(val, "", "json"));
+      }
     } else if (ceph_argparse_witharg(args, i, &num_osd, err, "--createsimple", \
(char*)NULL)) {  if (!err.str().empty()) {
 	cerr << err.str() << std::endl;
@@ -465,9 +469,17 @@ int main(int argc, const char **argv)
     osdmap.print(cout);
   if (print_json)
     osdmap.dump_json(cout);
-  if (tree) 
-    osdmap.print_tree(&cout, NULL);
-
+  if (tree) {
+    if (tree_formatter) {
+      tree_formatter->open_object_section("tree");
+      osdmap.print_tree(tree_formatter.get(), NULL);
+      tree_formatter->close_section();
+      tree_formatter->flush(cout);
+      cout << std::endl;
+    } else {
+      osdmap.print_tree(NULL, &cout);
+    }
+  }
   if (modified) {
     bl.clear();
     osdmap.encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT | CEPH_FEATURE_RESERVED);


hooks/post-receive
-- 


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic