[prev in list] [next in list] [prev in thread] [next in thread] 

List:       squid-cvs
Subject:    /bzr/squid3/trunk/ r12113: Bug 3441: Part 3: Replace corrupted v1 swap.state with new v2 format.
From:       Alex Rousskov <rousskov () measurement-factory ! com>
Date:       2012-04-10 4:26:14
Message-ID: 20120410042512.41904.qmail () squid-cache ! org
[Download RAW message or body]

--===============1764837571==
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

------------------------------------------------------------
revno: 12113
fixes bug(s): http://bugs.squid-cache.org/show_bug.cgi?id=3441
committer: Alex Rousskov <rousskov@measurement-factory.com>
branch nick: trunk
timestamp: Mon 2012-04-09 22:26:14 -0600
message:
  Bug 3441: Part 3: Replace corrupted v1 swap.state with new v2 format.
  
  A fix for bug 3408 changed the offset at which we start writing dirty
  swap.state entries from StoreSwapLogHeader::record_size to StoreSwapLogHeader
  size.  However, the log-reading code still read the entries starting from the
  old offset (which is required to remain compatible with how a clean swap.state
  is written).
  
  Wrong starting offset essentially means that the vast majority of read
  swap.state entries were bogus. They could only match some real entry when 64*n
  is divisible by 12 and perhaps when their random data just happened to match a
  real entry. Part 2 of this bug fix (trunk r11995) started to pad the [dirty]
  swap.state header to start entry writing at StoreSwapLogHeader::record_size
  boundary.
  
  Changes specific to Part 3:
  
  Unfortunately, since old v1 logs could contain completely bogus entries as the
  result of being read (at some point) from the wrong offset, we should not load
  v1 logs any more (neither dirty nor clean because what looks clean now could
  be based on a previously dirty and, hence, corrupted log). This forced us to
  raise the swap.state format version from 1 to 2.
  
  After this change, if a v1 swap log is detected, Squid ignores it and does a
  from-directory rebuild as if no swap.state files were found.
  
  Since we had to change swap.state format version, we also made log entry size
  and composition the same across virtually all platforms; added checksums so
  that a similar bug would not go unnoticed for so long (and would not result in
  log corruption); and increased the size of time-related entries to avoid the
  "year 2038" problem.
  
  The swap log entries are still written to disk in host byte order.
  
  
  We now also zero the [dirty] swap.state header padding to prevent random and
  potentially sensitive garbage in logs.
  
  
  Cache index rebuild kinds are now reported using the following three labels:
  
  * Rebuild using a swap log created by Squid during clean shutdown: "clean log"
  * Rebuild using a swap log accumulated by a running Squid: "dirty log"
  * Rebuild using directory scan: "no log"
  
  The first kind used to be reported as CLEAN and the other two as DIRTY rebuild.
modified:
  src/StoreSwapLogData.cc
  src/StoreSwapLogData.h
  src/fs/ufs/store_dir_ufs.cc
  src/fs/ufs/ufscommon.cc

--===============1764837571==
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"; name="r12113.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

=== modified file 'src/StoreSwapLogData.cc'
--- a/src/StoreSwapLogData.cc	2012-01-31 01:57:05 +0000
+++ b/src/StoreSwapLogData.cc	2012-04-10 04:26:14 +0000
@@ -36,16 +36,53 @@
 #include "StoreSwapLogData.h"
 #include "swap_log_op.h"
 
-StoreSwapLogData::StoreSwapLogData(): op(0), swap_filen (0), timestamp (0), lastref \
                (0), expires (0), lastmod(0), swap_file_sz (0), refcount (0), flags \
                (0)
-{
-    memset (key, '\0', sizeof(key));
+// Based on Internet Checksum (RFC 1071) algorithm but takes three 32bit ints.
+// TODO: Consider Fletcher's checksum algorithm as a higher quality alternative
+void
+SwapChecksum24::set(uint32_t f1, uint32_t f2, uint32_t f3)
+{
+    uint64_t sum = f1;
+    sum += f2;
+    sum += f3;
+
+    while (const uint64_t higherBits = sum >> 24)
+       sum = (sum & 0xFFFFFF) + higherBits;
+
+    sum = ~sum;
+
+    raw[0] = static_cast<uint8_t>(sum);
+    raw[1] = static_cast<uint8_t>(sum >> 8);
+    raw[2] = static_cast<uint8_t>(sum >> 16);
+}
+
+/// Same as 3-argument SwapChecksum24::set() but for int32_t and uint64_t
+void
+SwapChecksum24::set(int32_t f1, uint64_t f2)
+{
+    // split the second 64bit word into two 32bit words
+    set(static_cast<uint32_t>(f1),
+        static_cast<uint32_t>(f2 >> 32),
+        static_cast<uint32_t>(f2 & 0xFFFFFFFF));
+}
+
+std::ostream &
+SwapChecksum24::print(std::ostream &os) const
+{
+    return os << raw[0] << '-' << raw[1] << '-' << raw[2];
+}
+
+StoreSwapLogData::StoreSwapLogData()
+{
+    memset(this, 0, sizeof(*this));
 }
 
 bool
 StoreSwapLogData::sane() const
 {
-    // TODO: These checks are rather weak. A corrupted swap.state may still
-    // cause havoc (e.g., cur_size may become astronomical). Add checksums?
+    SwapChecksum24 actualSum;
+    actualSum.set(swap_filen, swap_file_sz);
+    if (checksum != actualSum)
+        return false;
 
     const time_t minTime = -2; // -1 is common; expires sometimes uses -2
 
@@ -59,7 +96,33 @@
            swap_file_sz > 0; // because swap headers ought to consume space
 }
 
-StoreSwapLogHeader::StoreSwapLogHeader():op(SWAP_LOG_VERSION), version(1)
-{
-    record_size = sizeof(StoreSwapLogData);
+void
+StoreSwapLogData::finalize()
+{
+    checksum.set(swap_filen, swap_file_sz);
+}
+
+StoreSwapLogHeader::StoreSwapLogHeader(): op(SWAP_LOG_VERSION), version(2),
+    record_size(sizeof(StoreSwapLogData))
+{
+    checksum.set(version, record_size, 0);
+}
+
+bool
+StoreSwapLogHeader::sane() const
+{
+    SwapChecksum24 actualSum;
+    actualSum.set(version, record_size, 0);
+    if (checksum != actualSum)
+        return false;
+
+    return op == SWAP_LOG_VERSION && version >= 2 && record_size > 0;
+}
+
+size_t
+StoreSwapLogHeader::gapSize() const
+{
+    assert(record_size > 0);
+    assert(static_cast<size_t>(record_size) > sizeof(*this));
+    return static_cast<size_t>(record_size) - sizeof(*this);
 }

=== modified file 'src/StoreSwapLogData.h'
--- a/src/StoreSwapLogData.h	2012-01-30 17:13:52 +0000
+++ b/src/StoreSwapLogData.h	2012-04-10 04:26:14 +0000
@@ -61,20 +61,44 @@
 
 #include "squid-old.h"
 
-/*
- * Do we need to have the dirn in here? I don't think so, since we already
- * know the dirn ..
- */
+/// maintains a 24-bit checksum over integer fields
+class SwapChecksum24 {
+public:
+    SwapChecksum24() { raw[0] = raw[1] = raw[2] = 0; }
+
+	bool operator ==(const SwapChecksum24 &o) const {
+        return raw[0] == o.raw[0] && raw[1] == o.raw[1] && raw[2] == o.raw[2];
+    }
+
+	bool operator !=(const SwapChecksum24 &o) const {
+        return !(*this == o);
+    }
+
+    /// compute and store checksum based on three 32bit integers
+    void set(uint32_t f1, uint32_t f2, uint32_t f3);
+
+    /// compute and store checksum based on int32_t and uint64_t integers
+    void set(int32_t f1, uint64_t f2);
+
+    // printing for debugging
+    std::ostream &print(std::ostream &os) const;
+
+private:
+    uint8_t raw[3]; // designed to follow "op" members, in pading space
+};
+
+inline std::ostream &
+operator <<(std::ostream &os, const SwapChecksum24 &sum)
+{
+    return sum.print(os);
+}
+
 /**
  \ingroup FielFormatSwapStateAPI
- \note This information is current as of version 2.2.STABLE4
- *
- \li		Binary format on disk.
- \li		DO NOT randomly alter.
- \li		DO NOT add ANY virtual's.
  *
  \par
- * Defines the structure of a binary swap.state file entry.
+ * Defines the structure of a binary swap.state file entry for UFS stores.
+ * TODO: Move to fs/ufs (and remove from COSS).
  *
  \note StoreSwapLogData entries are written in native machine byte order
  *     They are not necessarily portable across architectures.
@@ -84,16 +108,28 @@
 
 public:
     MEMPROXY_CLASS(StoreSwapLogData);
+
+    /// type to use for storing time-related members; must be signed
+    typedef int64_t SwappedTime;
+
     StoreSwapLogData();
 
     /// consistency self-check: whether the data appears to make sense
     bool sane() const;
 
+    /// call this before storing the log entry
+    void finalize();
+
     /**
      * Either SWAP_LOG_ADD when an object is added to the disk storage,
      * or SWAP_LOG_DEL when an object is deleted.
      */
-    char op;
+    uint8_t op;
+
+    /**
+     * Fingerprint to weed out bogus/corrupted swap.state entries.
+     */
+    SwapChecksum24 checksum; // follows "op" because compiler will pad anyway
 
     /**
      * The 32-bit file number which maps to a pathname.
@@ -105,20 +141,20 @@
     sfileno swap_filen;
 
     /**
-     * A 32-bit Unix time value that represents the time when
+     * A Unix time value that represents the time when
      * the origin server generated this response. If the response
      * has a valid Date: header, this timestamp corresponds
      * to that time. Otherwise, it is set to the Squid process time
      * when the response is read (as soon as the end of headers are found).
      */
-    time_t timestamp;
+    SwappedTime timestamp;
 
     /**
      * The last time that a client requested this object.
      * Strictly speaking, this time is set whenever the StoreEntry
      * is locked (via storeLockObject()).
      */
-    time_t lastref;
+    SwappedTime lastref;
 
     /**
      * The value of the response's Expires: header, if any.
@@ -129,14 +165,14 @@
      * where Squid sets expires to -2. This happens for the
      * internal "netdb" object and for FTP URL responses.
      */
-    time_t expires;
+    SwappedTime expires;
 
     /**
      * The value of the response's Last-modified: header, if any.
      * This is set to -1 if there is no Last-modified: header,
      * or if it is unparseable.
      */
-    time_t lastmod;
+    SwappedTime lastmod;
 
     /**
      * This is the number of bytes that the object occupies on
@@ -167,13 +203,23 @@
 MEMPROXY_CLASS_INLINE(StoreSwapLogData);
 
 /// \ingroup FileFormatSwapStateAPI
+/// Swap log starts with this binary structure.
 class StoreSwapLogHeader
 {
 public:
+    // sets default values for this Squid version; loaded values may differ
     StoreSwapLogHeader();
-    char op;
-    int version;
-    int record_size;
+
+    /// consistency self-check: whether the data appears to make sense
+    bool sane() const;
+
+    /// number of bytes after the log header before the first log entry
+    size_t gapSize() const;
+
+    uint8_t op;
+    SwapChecksum24 checksum; // follows "op" because compiler will pad anyway
+    int32_t version;
+    int32_t record_size;
 };
 
 

=== modified file 'src/fs/ufs/store_dir_ufs.cc'
--- a/src/fs/ufs/store_dir_ufs.cc	2012-02-05 07:29:09 +0000
+++ b/src/fs/ufs/store_dir_ufs.cc	2012-04-10 04:26:14 +0000
@@ -804,9 +804,8 @@
         buf.init(header.record_size, header.record_size);
         buf.append(reinterpret_cast<const char*>(&header), sizeof(header));
         // Pad to keep in sync with UFSSwapDir::writeCleanStart().
-        // TODO: When MemBuf::spaceSize() is fixed not to subtract one,
-        // memset() space() with zeroes and use spaceSize() below.
-        buf.appended(static_cast<size_t>(header.record_size) - sizeof(header));
+        memset(buf.space(), 0, header.gapSize());
+        buf.appended(header.gapSize());
         file_write(swaplog_fd, -1, buf.content(), buf.contentSize(),
                    NULL, NULL, buf.freeFunc());
     }
@@ -895,6 +894,7 @@
     /*copy the header */
     memcpy(state->outbuf, &header, sizeof(StoreSwapLogHeader));
     // Leave a gap to keep in sync with UFSSwapDir::openTmpSwapLog().
+    memset(state->outbuf + sizeof(StoreSwapLogHeader), 0, header.gapSize());
     state->outbuf_offset += header.record_size;
 
     state->walker = repl->WalkInit(repl);
@@ -944,6 +944,7 @@
     s.refcount = e.refcount;
     s.flags = e.flags;
     memcpy(&s.key, e.key, SQUID_MD5_DIGEST_LENGTH);
+    s.finalize();
     memcpy(outbuf + outbuf_offset, &s, ss);
     outbuf_offset += ss;
     /* buffered write */
@@ -1052,6 +1053,7 @@
     s->refcount = e.refcount;
     s->flags = e.flags;
     memcpy(s->key, e.key, SQUID_MD5_DIGEST_LENGTH);
+    s->finalize();
     file_write(swaplog_fd,
                -1,
                s,

=== modified file 'src/fs/ufs/ufscommon.cc'
--- a/src/fs/ufs/ufscommon.cc	2012-01-30 17:13:52 +0000
+++ b/src/fs/ufs/ufscommon.cc	2012-04-10 04:26:14 +0000
@@ -95,6 +95,7 @@
     }
 };
 
+#if UNUSED_CODE
 /// Parse a swap header entry created on a system with 32-bit size_t, time_t and \
sfileno  /// this is typical of 32-bit systems without large file support and with \
old kernels  /// NP: SQUID_MD5_DIGEST_LENGTH is very risky still.
@@ -213,6 +214,20 @@
     }
     return true;
 }
+#endif /* UNUSED_CODE */
+
+/// swap.state v2 log parser
+class UFSSwapLogParser_v2: public UFSSwapLogParser
+{
+public:
+    UFSSwapLogParser_v2(FILE *fp): UFSSwapLogParser(fp) {
+        record_size = sizeof(StoreSwapLogData);
+    }
+    bool ReadRecord(StoreSwapLogData &swapData) {
+        assert(log);
+        return fread(&swapData, sizeof(StoreSwapLogData), 1, log) == 1;
+    }
+};
 
 
 UFSSwapLogParser *UFSSwapLogParser::GetUFSSwapLogParser(FILE *fp)
@@ -230,10 +245,18 @@
         return new UFSSwapLogParser_v1_32bs(fp); // Um. 32-bits except time_t, and \
can't determine that.  }
 
+    debugs(47, 2, "Swap file version: " << header.version);
+
     if (header.version == 1) {
         if (fseek(fp, header.record_size, SEEK_SET) != 0)
             return NULL;
 
+        debugs(47, DBG_IMPORTANT, "Rejecting swap file v1 to avoid cache " <<
+               "index corruption. Forcing a full cache index rebuild. " <<
+               "See Squid bug #3441.");
+        return NULL;
+
+#if UNUSED_CODE
         // baseline
         // 32-bit sfileno
         // native time_t (hopefully 64-bit)
@@ -272,11 +295,26 @@
         debugs(47, 1, "WARNING: The swap file has wrong format!... ");
         debugs(47, 1, "NOTE: Cannot safely downgrade caches to short (32-bit) \
timestamps.");  return NULL;
-    }
-
-    // XXX: version 2 of swapfile. This time use fixed-bit sizes for everything!!
-    // and preferrably write to disk in network-order bytes for the larger fields.
-
+#endif
+    }
+
+    if (header.version >= 2) {
+        if (!header.sane()) {
+            debugs(47, DBG_IMPORTANT, "ERROR: Corrupted v" << header.version <<
+                   " swap file header.");
+            return NULL;
+        }
+
+        if (fseek(fp, header.record_size, SEEK_SET) != 0)
+            return NULL;
+
+        if (header.version == 2)
+            return new UFSSwapLogParser_v2(fp);
+    }
+
+    // TODO: v3: write to disk in network-order bytes for the larger fields?
+
+    debugs(47, DBG_IMPORTANT, "Unknown swap file version: " << header.version);
     return NULL;
 }
 
@@ -327,7 +365,8 @@
     if (!clean)
         flags.need_to_validate = 1;
 
-    debugs(47, 1, "Rebuilding storage in " << sd->path << " (" << (clean ? "CLEAN" : \
"DIRTY") << ")"); +    debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path \
<< " (" << +        (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << \
")");  }
 
 RebuildState::~RebuildState()
@@ -407,7 +446,8 @@
     fd = getNextFile(&filn, &size);
 
     if (fd == -2) {
-        debugs(47, 1, "Done scanning " << sd->path << " swaplog (" << n_read << " \
entries)"); +        debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir \
(" << +               n_read << " entries)");
         _done = true;
         return;
     } else if (fd < 0) {
@@ -417,6 +457,8 @@
     assert(fd > -1);
     /* lets get file stats here */
 
+    n_read++;
+
     if (fstat(fd, &sb) < 0) {
         debugs(47, 1, "commonUfsDirRebuildFromDirectory: fstat(FD " << fd << "): " \
<< xstrerror());  file_close(fd);


--===============1764837571==--


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic