[prev in list] [next in list] [prev in thread] [next in thread] 

List:       nutch-cvs
Subject:    [Nutch-cvs] svn commit: r265503 - in /lucene/nutch/trunk/src:
From:       jerome () apache ! org
Date:       2005-08-31 15:17:36
Message-ID: 20050831151741.16143.qmail () minotaur ! apache ! org
[Download RAW message or body]

Author: jerome
Date: Wed Aug 31 08:17:11 2005
New Revision: 265503

URL: http://svn.apache.org/viewcvs?rev=265503&view=rev
Log:
Merged 0.7 branch changes 240321:240453 into trunk

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
  lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
  lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
  lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java


Modified: lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java
                
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java Wed \
Aug 31 08:17:11 2005 @@ -23,8 +23,8 @@
  * algorithms.
  *
  * <p>By the term <b>online</b> search results clustering we will understand
- * a clusterer that works on a set of {@link Hit}s retrieved for a user's query
- * and produces a set of {@link Clusters} that can be displayed to help
+ * a clusterer that works on a set of {@link HitDetails} retrieved for a user's
+ * query and produces a set of {@link HitsCluster} that can be displayed to help
  * the user gain insight in the topics found in the result.</p>
  *
  * <p>Other clustering options include predefined categories and off-line

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java Wed Aug 31 \
08:17:11 2005 @@ -80,8 +80,8 @@
       return getNamed(NutchConf.get().get("fs.default.name", "local"));
     }
 
-    /** Returns a name for this filesystem, suitable to pass to {@link
-     * NutchFileSystem#getNamed(String).*/
+    /** Returns a name for this filesystem, suitable to pass to
+     * {@link NutchFileSystem#getNamed(String)}.*/
     public abstract String getName();
   
     /** Returns a named filesystem.  Names are either the string "local" or a

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java Wed Aug 31 \
08:17:11 2005 @@ -25,9 +25,12 @@
 import org.apache.nutch.io.UTF8;
 import org.apache.nutch.fs.NutchFileSystem;
 
-/** A section of an input file.  Returned by {@link
- * InputFormat#getSplits(File[], int)} and passed to
- * InputFormat#getRecordReader(FileSplit). */
+/**
+ * A section of an input file.
+ * Returned by {@link InputFormat#getSplits(NutchFileSystem, JobConf, int)}
+ * and passed to
+ * {@link InputFormat#getRecordReader(NutchFileSystem, FileSplit, JobConf)}.
+ */
 public class FileSplit implements Writable {
   private File file;
   private long start;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java Wed Aug \
31 08:17:11 2005 @@ -42,7 +42,7 @@
 
   /** Create a local reduce input file name.
    * @param mapTaskId a map task id
-   * @param partition a reduce partition
+   * @param reduceTaskId a reduce task id
    */
   public static File getInputFile(String mapTaskId, String reduceTaskId) {
     File taskDir = new File(LOCAL_DIR, reduceTaskId);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java Wed Aug \
31 08:17:11 2005 @@ -22,7 +22,7 @@
 import org.apache.nutch.io.WritableComparable;
 import org.apache.nutch.io.Writable;
 
-/** Reads key/value pairs from an input file {@link InputFormat.Split}.
+/** Reads key/value pairs from an input file {@link FileSplit}.
  * Implemented by {@link InputFormat} implementations. */
 public interface RecordReader {
   /** Reads the next key/value pair.

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html Wed Aug 31 \
08:17:11 2005 @@ -6,7 +6,7 @@
 
 <p>Applications implement {@link org.apache.nutch.mapReduce.Mapper} and
 {@link org.apache.nutch.mapReduce.Reducer} interfaces.  These are submitted
-as a {@link org.apache.nutch.mapReduce.MapReduceJob} and are applied to data
+as a MapReduceJob and are applied to data
 stored in a {@link org.apache.nutch.fs.NutchFileSystem}.</p>
 
 <p>See <a href="http://labs.google.com/papers/mapreduce.html">Google's

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java Wed Aug 31 08:17:11 \
2005 @@ -17,7 +17,7 @@
 package org.apache.nutch.parse;
 
 /** The result of parsing a page's raw content.
- * @see Parser#getParse(FetcherOutput,Content)
+ * @see Parser#getParse(Content)
  */
 public interface Parse {
   

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Wed Aug 31 \
08:17:11 2005 @@ -114,7 +114,8 @@
   public void setContent(byte[] content) { this.content = content; }
 
   /** The media type of the retrieved content.
-   * @see http://www.iana.org/assignments/media-types/
+   * @see <a href="http://www.iana.org/assignments/media-types/">
+   *      http://www.iana.org/assignments/media-types/</a>
    */
   public String getContentType() { return contentType; }
   public void setContentType(String contentType) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java
                
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java Wed \
Aug 31 08:17:11 2005 @@ -18,7 +18,6 @@
 
 import java.net.URL;
 
-/** Thrown by {@link Protocol#getContent(String)}.*/
 public class ProtocolException extends Exception {
 
   public ProtocolException() {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java Wed Aug \
31 08:17:11 2005 @@ -19,7 +19,7 @@
 import java.io.IOException;
 import java.net.URL;
 
-/** Thrown by {@link Protocol#getContent(String)} when a {@link URL} is invalid.*/
+/** Thrown when a resource is invalid. */
 public class ResourceGone extends ProtocolException {
   private URL url;
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java Wed Aug \
31 08:17:11 2005 @@ -19,8 +19,7 @@
 import java.io.IOException;
 import java.net.URL;
 
-/** Thrown by {@link Protocol#getContent(String)} when a {@link URL} no longer
- * exists.*/
+/** Thrown when a resource no longer exists.*/
 public class ResourceMoved extends IOException {
   private URL oldUrl;
   private URL newUrl;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java Wed Aug 31 \
08:17:11 2005 @@ -19,8 +19,7 @@
 import java.io.IOException;
 import java.net.URL;
 
-/** Thrown by {@link Protocol#getContent(String)} when a {@link URL} should be
- * retried later.*/
+/** Thrown when a resource should be retried later.*/
 public class RetryLater extends ProtocolException {
   private URL url;
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java Wed Aug 31 \
08:17:11 2005 @@ -44,14 +44,14 @@
   }
 
   /** Returns the total number of hits for this query.  This may be an estimate
-   * when (@link totalIsExact()} is false. */
+   * when (@link #totalIsExact()} is false. */
   public long getTotal() { return total; }
 
-  /** True if {@link getTotal()} gives the exact number of hits, or false if
+  /** True if {@link #getTotal()} gives the exact number of hits, or false if
    * it is only an estimate of the total number of hits. */
   public boolean totalIsExact() { return totalIsExact; }
 
-  /** Set {@link totalIsExact()}. */
+  /** Set {@link #totalIsExact()}. */
   public void setTotalIsExact(boolean isExact) { totalIsExact = isExact; }
 
   /** Returns the number of hits included in this current listing. */

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java \
                (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Wed Aug \
31 08:17:11 2005 @@ -179,7 +179,8 @@
    * @param withParseText if true, fix parse_text, otherwise ignore it
    * @param withParseData if true, fix parse_data, otherwise ignore it
    * @param dryrun if true, only show what would be done without performing any \
                actions
-   * @return
+   * @return <code>true</code> if segment was fixed successfully, otherwise
+   *         return <code>false</code>.
    */
   public static boolean fixSegment(NutchFileSystem nfs, File dir, 
           boolean withContent, boolean withParseText, boolean withParseData,

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java Wed Aug 31 08:17:11 \
2005 @@ -16,7 +16,7 @@
 
 package org.apache.nutch.util;
 
-/** A thread that has called {@link Thread#SetDaemon(boolean) } with true.*/
+/** A thread that has called {@link Thread#setDaemon(boolean) } with true.*/
 public class Daemon extends Thread {
 
   {

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
                
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/sr \
c/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java \
                (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java \
Wed Aug 31 08:17:11 2005 @@ -20,6 +20,7 @@
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.util.List;
@@ -48,9 +49,13 @@
 
 
 /**
+ * Identify the language of a content, based on statistical analysis.
+ *
+ * @see <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+ *      Language Codes</a>
  * 
  * @author Sami Siren
- * @author Jerome Charron
+ * @author J&eacute;r&ocirc;me Charron
  */
 public class LanguageIdentifier {
   
@@ -59,8 +64,8 @@
   
   private final static float SCORE_THRESOLD = 0.00F;
 
-  public final static Logger LOG = \
                LogFormatter.getLogger(LanguageIdentifier.class.getName());
-
+  private final static Logger LOG =
+          LogFormatter.getLogger(LanguageIdentifier.class.getName());
   
   private ArrayList languages = new ArrayList();
 
@@ -168,7 +173,8 @@
   }
 
   /**
-   * return handle to singleton instance
+   * Get a LanguageIdentifier instance.
+   * @return the LanguageIdentifier singleton instance.
    */
   public static LanguageIdentifier getInstance() {
     if (identifier == null) {
@@ -182,13 +188,25 @@
   }
 
   /**
-   * main method used for testing
-   * 
-   * @param args
+   * Main method used for command line process.
+   * <br/>Usage is:
+   * <pre>
+   * LanguageIdentifier [-identifyrows filename maxlines]
+   *                    [-identifyfile charset filename]
+   *                    [-identifyfileset charset files]
+   *                    [-identifytext text]
+   *                    [-identifyurl url]
+   * </pre>
+   * @param args arguments.
    */
   public static void main(String args[]) {
 
-    String usage = "Usage: LanguageIdentifier [-identifyrows filename maxlines] \
[-identifyfile filename] [-identifyfileset files] [-identifytext text] [-identifyurl \
url]"; +    String usage = "Usage: LanguageIdentifier "            +
+                      "[-identifyrows filename maxlines] " +
+                      "[-identifyfile charset filename] "  +
+                      "[-identifyfileset charset files] "  +
+                      "[-identifytext text] "              +
+                      "[-identifyurl url]";
     int command = 0;
 
     final int IDFILE = 1;
@@ -199,6 +217,7 @@
 
     Vector fileset = new Vector();
     String filename = "";
+    String charset = "";
     String url = "";
     String text = "";
     int max = 0;
@@ -211,6 +230,7 @@
     for (int i = 0; i < args.length; i++) { // parse command line
       if (args[i].equals("-identifyfile")) {
         command = IDFILE;
+        charset = args[++i];
         filename = args[++i];
       }
 
@@ -233,6 +253,7 @@
 
       if (args[i].equals("-identifyfileset")) {
         command = IDFILESET;
+        charset = args[++i];
         for (i++; i < args.length; i++) {
           File[] files = null;
           File f = new File(args[i]);
@@ -264,7 +285,7 @@
         case IDFILE:
           f = new File(filename);
           fis = new FileInputStream(f);
-          lang = idfr.identify(fis);
+          lang = idfr.identify(fis, charset);
           fis.close();
           break;
 
@@ -302,7 +323,7 @@
               filename = (String) i.next();
               f = new File(filename);
               fis = new FileInputStream(f);
-              lang = idfr.identify(fis);
+              lang = idfr.identify(fis, charset);
               fis.close();
             } catch (Exception e) {
               System.out.println(e);
@@ -349,22 +370,26 @@
   }
 
   /**
-   * Identify language based on submitted content
+   * Identify language of a content.
    * 
-   * @param text to analyze
-   * @return 2 letter ISO639 code of language (en, fi, sv...) , or null if
-   *         unknown
+   * @param content is the content to analyze.
+   * @return The 2 letter
+   *         <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+   *         language code</a> (en, fi, sv, ...) of the language that best
+   *         matches the specified content.
    */
-  public String identify(String text) {
-    return identify(new StringBuffer(text));
+  public String identify(String content) {
+    return identify(new StringBuffer(content));
   }
 
   /**
-   * Identify language based on submitted content
+   * Identify language of a content.
    * 
-   * @param text to analyze
-   * @return 2 letter ISO639 code of language (en, fi, sv...) , or null if
-   *         unknown
+   * @param content is the content to analyze.
+   * @return The 2 letter
+   *         <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+   *         language code</a> (en, fi, sv, ...) of the language that best
+   *         matches the specified content.
    */
   public String identify(StringBuffer content) {
 
@@ -405,26 +430,48 @@
   }
 
   /**
-   * Identify language from inputstream
-   * 
-   * @param is
-   * @return language code
-   * @throws IOException
+   * Identify language from input stream.
+   * This method uses the platform default encoding to read the input stream.
+   * For using a specific encoding, use the
+   * {@link #identify(InputStream, String)} method.
+   *
+   * @param is is the input stream to analyze.
+   * @return The 2 letter
+   *         <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+   *         language code</a> (en, fi, sv, ...) of the language that best
+   *         matches the content of the specified input stream.
+   * @throws IOException if something wrong occurs on the input stream.
    */
   public String identify(InputStream is) throws IOException {
+    return identify(is, null);
+  }
+  
+  /**
+   * Identify language from input stream.
+   * 
+   * @param is is the input stream to analyze.
+   * @param charset is the charset to use to read the input stream.
+   * @return The 2 letter
+   *         <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+   *         language code</a> (en, fi, sv, ...) of the language that best
+   *         matches the content of the specified input stream.
+   * @throws IOException if something wrong occurs on the input stream.
+   */
+  public String identify(InputStream is, String charset) throws IOException {
 
-    StringBuffer text = new StringBuffer();
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
     byte[] buffer = new byte[2048];
     int len = 0;
 
     while (((len = is.read(buffer)) != -1) &&
-           ((analyzeLength == 0) || (text.length() < analyzeLength))) {
+           ((analyzeLength == 0) || (out.size() < analyzeLength))) {
       if (analyzeLength != 0) {
-          len = Math.min(len, analyzeLength - text.length());
+          len = Math.min(len, analyzeLength - out.size());
       }
-      text.append(new String(buffer, 0, len, "UTF-8"));
+      out.write(buffer, 0, len);
     }
-    return identify(text);
+    return identify((charset == null) ? out.toString()
+                                      : out.toString(charset));
   }
 
 }

Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
                
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/o \
rg/apache/nutch/parse/html/DOMBuilder.java?rev=265503&r1=265502&r2=265503&view=diff \
                ==============================================================================
                
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java \
                (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java \
Wed Aug 31 08:17:11 2005 @@ -42,7 +42,6 @@
  * This class takes SAX events (in addition to some extra events
  * that SAX doesn't handle yet) and adds the result to a document
  * or document fragment.
- * @xsl.usage general
  */
 public class DOMBuilder
         implements ContentHandler, LexicalHandler

Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
                
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/o \
rg/apache/nutch/parse/html/XMLCharacterRecognizer.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java \
                (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java \
Wed Aug 31 08:17:11 2005 @@ -27,7 +27,6 @@
 /**
  * Class used to verify whether the specified <var>ch</var> 
  * conforms to the XML 1.0 definition of whitespace. 
- * @xsl.usage internal
  */
 public class XMLCharacterRecognizer
 {
@@ -90,7 +89,7 @@
   /**
    * Tell if the string is whitespace.
    *
-   * @param buf StringBuffer to check as XML whitespace.
+   * @param s String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
   public static boolean isWhiteSpace(String s)

Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
                
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/s \
rc/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java?rev=265503&r1=265502&r2=265503&view=diff
 ==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java \
                (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java \
Wed Aug 31 08:17:11 2005 @@ -58,7 +58,7 @@
   }
 
   /**
-   * @see SecureProtocolSocketFactory#createSocket(java.lang.String,int,java.net.InetAddress,int)
 +   * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int)
                
    */
   public Socket createSocket(String host, int port, InetAddress clientHost, int \
clientPort) throws IOException,  UnknownHostException {
@@ -79,8 +79,8 @@
    * 
    * @param host the host name/IP
    * @param port the port on the host
-   * @param clientHost the local host name/IP to bind the socket to
-   * @param clientPort the port on the local machine
+   * @param localAddress the local host name/IP to bind the socket to
+   * @param localPort the port on the local machine
    * @param params {@link HttpConnectionParams Http connection parameters}
    * 
    * @return Socket a new socket
@@ -104,14 +104,14 @@
   }
 
   /**
-   * @see SecureProtocolSocketFactory#createSocket(java.lang.String,int)
+   * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int)
                
    */
   public Socket createSocket(String host, int port) throws IOException, \
UnknownHostException {  return getSSLContext().getSocketFactory().createSocket(host, \
port);  }
 
   /**
-   * @see SecureProtocolSocketFactory#createSocket(java.net.Socket,java.lang.String,int,boolean)
 +   * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean)
                
    */
   public Socket createSocket(Socket socket, String host, int port, boolean \
autoClose) throws IOException,  UnknownHostException {




-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic