[prev in list] [next in list] [prev in thread] [next in thread] 

List:       hadoop-commits
Subject:    svn commit: r501616 - in /lucene/hadoop/trunk: CHANGES.txt
From:       cutting () apache ! org
Date:       2007-01-30 22:39:54
Message-ID: 20070130223955.24D561A981A () eris ! apache ! org
[Download RAW message or body]

Author: cutting
Date: Tue Jan 30 14:39:53 2007
New Revision: 501616

URL: http://svn.apache.org/viewvc?view=rev&rev=501616
Log:
HADOOP-922.  Optimize small forward seeks in HDFS.  Contributed by Dhruba.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=501616&r1=501615&r2=501616
 ==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jan 30 14:39:53 2007
@@ -98,6 +98,10 @@
 30. HADOOP-937.  Change the namenode to request re-registration of
     datanodes in more circumstances.  (Hairong Kuang via cutting)
 
+31. HADOOP-922.  Optimize small forward seeks in HDFS.  If data is has
+    likely already in flight, skip ahead rather than re-opening the
+    block.  (Dhruba Borthakur via cutting)
+
 
 Release 0.10.1 - 2007-01-10
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?view=diff&rev=501616&r1=501615&r2=501616
 ==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Tue Jan 30 \
14:39:53 2007 @@ -44,6 +44,7 @@
 class DFSClient implements FSConstants {
     public static final Log LOG = \
LogFactory.getLog("org.apache.hadoop.fs.DFSClient");  static int \
MAX_BLOCK_ACQUIRE_FAILURES = 3; +    private static final int TCP_WINDOW_SIZE = 128 * \
1024; // 128 KB  private static final long DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;
     ClientProtocol namenode;
     String localName;
@@ -885,8 +886,25 @@
             if (targetPos > filelen) {
                 throw new IOException("Cannot seek after EOF");
             }
-            pos = targetPos;
-            blockEnd = -1;
+            boolean done = false;
+            if (pos <= targetPos && targetPos <= blockEnd) {
+                //
+                // If this seek is to a positive position in the current
+                // block, and this piece of data might already be lying in
+                // the TCP buffer, then just eat up the intervening data.
+                //
+                int diff = (int)(targetPos - pos);
+                if (diff <= TCP_WINDOW_SIZE) {
+                  blockStream.skipBytes(diff);
+                  pos += diff;
+                  assert(pos == targetPos);
+                  done = true;
+                }
+            }
+            if (!done) {
+                pos = targetPos;
+                blockEnd = -1;
+            }
         }
 
         /**

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java?view=diff&rev=501616&r1=501615&r2=501616
 ==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java Tue Jan 30 \
14:39:53 2007 @@ -78,6 +78,37 @@
     // all done
     stm.close();
   }
+
+  /*
+   * Read some data, skip a few bytes and read more. HADOOP-922.
+   */
+  private void smallReadSeek(FileSystem fileSys, Path name) throws IOException {
+    FSInputStream stmRaw = fileSys.openRaw(name);
+    byte[] expected = new byte[ONEMB];
+    Random rand = new Random(seed);
+    rand.nextBytes(expected);
+    
+    // Issue a simple read first.
+	byte[] actual = new byte[128];
+    stmRaw.seek(100000);
+    stmRaw.read(actual, 0, actual.length);
+    checkAndEraseData(actual, 100000, expected, "First Small Read Test");
+
+    // now do a small seek of 4 bytes, within the same block.
+    int newpos1 = 100000 + 128 + 4;
+    stmRaw.seek(newpos1);
+    stmRaw.read(actual, 0, actual.length);
+    checkAndEraseData(actual, newpos1, expected, "Small Seek Bug 1");
+
+    // seek another 256 bytes this time
+    int newpos2 = newpos1 + 256;
+    stmRaw.seek(newpos2);
+    stmRaw.read(actual, 0, actual.length);
+    checkAndEraseData(actual, newpos2, expected, "Small Seek Bug 2");
+
+    // all done
+    stmRaw.close();
+  }
   
   private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
     assertTrue(fileSys.exists(name));
@@ -96,6 +127,7 @@
       Path file1 = new Path("seektest.dat");
       writeFile(fileSys, file1);
       seekReadFile(fileSys, file1);
+      smallReadSeek(fileSys, file1);
       cleanupFile(fileSys, file1);
     } finally {
       fileSys.close();


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic