[prev in list] [next in list] [prev in thread] [next in thread]
List: nutch-cvs
Subject: [Nutch-cvs] svn commit: r265503 - in /lucene/nutch/trunk/src:
From: jerome () apache ! org
Date: 2005-08-31 15:17:36
Message-ID: 20050831151741.16143.qmail () minotaur ! apache ! org
[Download RAW message or body]
Author: jerome
Date: Wed Aug 31 08:17:11 2005
New Revision: 265503
URL: http://svn.apache.org/viewcvs?rev=265503&view=rev
Log:
Merged 0.7 branch changes 240321:240453 into trunk
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java
lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java
lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java
lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java
lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html
lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClusterer.java Wed \
Aug 31 08:17:11 2005 @@ -23,8 +23,8 @@
* algorithms.
*
* <p>By the term <b>online</b> search results clustering we will understand
- * a clusterer that works on a set of {@link Hit}s retrieved for a user's query
- * and produces a set of {@link Clusters} that can be displayed to help
+ * a clusterer that works on a set of {@link HitDetails} retrieved for a user's
+ * query and produces a set of {@link HitsCluster} that can be displayed to help
* the user gain insight in the topics found in the result.</p>
*
* <p>Other clustering options include predefined categories and off-line
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java Wed Aug 31 \
08:17:11 2005 @@ -80,8 +80,8 @@
return getNamed(NutchConf.get().get("fs.default.name", "local"));
}
- /** Returns a name for this filesystem, suitable to pass to {@link
- * NutchFileSystem#getNamed(String).*/
+ /** Returns a name for this filesystem, suitable to pass to
+ * {@link NutchFileSystem#getNamed(String)}.*/
public abstract String getName();
/** Returns a named filesystem. Names are either the string "local" or a
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/FileSplit.java Wed Aug 31 \
08:17:11 2005 @@ -25,9 +25,12 @@
import org.apache.nutch.io.UTF8;
import org.apache.nutch.fs.NutchFileSystem;
-/** A section of an input file. Returned by {@link
- * InputFormat#getSplits(File[], int)} and passed to
- * InputFormat#getRecordReader(FileSplit). */
+/**
+ * A section of an input file.
+ * Returned by {@link InputFormat#getSplits(NutchFileSystem, JobConf, int)}
+ * and passed to
+ * {@link InputFormat#getRecordReader(NutchFileSystem, FileSplit, JobConf)}.
+ */
public class FileSplit implements Writable {
private File file;
private long start;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/MapOutputFile.java Wed Aug \
31 08:17:11 2005 @@ -42,7 +42,7 @@
/** Create a local reduce input file name.
* @param mapTaskId a map task id
- * @param partition a reduce partition
+ * @param reduceTaskId a reduce task id
*/
public static File getInputFile(String mapTaskId, String reduceTaskId) {
File taskDir = new File(LOCAL_DIR, reduceTaskId);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/RecordReader.java Wed Aug \
31 08:17:11 2005 @@ -22,7 +22,7 @@
import org.apache.nutch.io.WritableComparable;
import org.apache.nutch.io.Writable;
-/** Reads key/value pairs from an input file {@link InputFormat.Split}.
+/** Reads key/value pairs from an input file {@link FileSplit}.
* Implemented by {@link InputFormat} implementations. */
public interface RecordReader {
/** Reads the next key/value pair.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/mapReduce/package.html Wed Aug 31 \
08:17:11 2005 @@ -6,7 +6,7 @@
<p>Applications implement {@link org.apache.nutch.mapReduce.Mapper} and
{@link org.apache.nutch.mapReduce.Reducer} interfaces. These are submitted
-as a {@link org.apache.nutch.mapReduce.MapReduceJob} and are applied to data
+as a MapReduceJob and are applied to data
stored in a {@link org.apache.nutch.fs.NutchFileSystem}.</p>
<p>See <a href="http://labs.google.com/papers/mapreduce.html">Google's
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parse.java Wed Aug 31 08:17:11 \
2005 @@ -17,7 +17,7 @@
package org.apache.nutch.parse;
/** The result of parsing a page's raw content.
- * @see Parser#getParse(FetcherOutput,Content)
+ * @see Parser#getParse(Content)
*/
public interface Parse {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Wed Aug 31 \
08:17:11 2005 @@ -114,7 +114,8 @@
public void setContent(byte[] content) { this.content = content; }
/** The media type of the retrieved content.
- * @see http://www.iana.org/assignments/media-types/
+ * @see <a href="http://www.iana.org/assignments/media-types/">
+ * http://www.iana.org/assignments/media-types/</a>
*/
public String getContentType() { return contentType; }
public void setContentType(String contentType) {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolException.java Wed \
Aug 31 08:17:11 2005 @@ -18,7 +18,6 @@
import java.net.URL;
-/** Thrown by {@link Protocol#getContent(String)}.*/
public class ProtocolException extends Exception {
public ProtocolException() {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceGone.java Wed Aug \
31 08:17:11 2005 @@ -19,7 +19,7 @@
import java.io.IOException;
import java.net.URL;
-/** Thrown by {@link Protocol#getContent(String)} when a {@link URL} is invalid.*/
+/** Thrown when a resource is invalid. */
public class ResourceGone extends ProtocolException {
private URL url;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ResourceMoved.java Wed Aug \
31 08:17:11 2005 @@ -19,8 +19,7 @@
import java.io.IOException;
import java.net.URL;
-/** Thrown by {@link Protocol#getContent(String)} when a {@link URL} no longer
- * exists.*/
+/** Thrown when a resource no longer exists.*/
public class ResourceMoved extends IOException {
private URL oldUrl;
private URL newUrl;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/RetryLater.java Wed Aug 31 \
08:17:11 2005 @@ -19,8 +19,7 @@
import java.io.IOException;
import java.net.URL;
-/** Thrown by {@link Protocol#getContent(String)} when a {@link URL} should be
- * retried later.*/
+/** Thrown when a resource should be retried later.*/
public class RetryLater extends ProtocolException {
private URL url;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java Wed Aug 31 \
08:17:11 2005 @@ -44,14 +44,14 @@
}
/** Returns the total number of hits for this query. This may be an estimate
- * when (@link totalIsExact()} is false. */
+ * when (@link #totalIsExact()} is false. */
public long getTotal() { return total; }
- /** True if {@link getTotal()} gives the exact number of hits, or false if
+ /** True if {@link #getTotal()} gives the exact number of hits, or false if
* it is only an estimate of the total number of hits. */
public boolean totalIsExact() { return totalIsExact; }
- /** Set {@link totalIsExact()}. */
+ /** Set {@link #totalIsExact()}. */
public void setTotalIsExact(boolean isExact) { totalIsExact = isExact; }
/** Returns the number of hits included in this current listing. */
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java \
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Wed Aug \
31 08:17:11 2005 @@ -179,7 +179,8 @@
* @param withParseText if true, fix parse_text, otherwise ignore it
* @param withParseData if true, fix parse_data, otherwise ignore it
* @param dryrun if true, only show what would be done without performing any \
actions
- * @return
+ * @return <code>true</code> if segment was fixed successfully, otherwise
+ * return <code>false</code>.
*/
public static boolean fixSegment(NutchFileSystem nfs, File dir,
boolean withContent, boolean withParseText, boolean withParseData,
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java Wed Aug 31 08:17:11 \
2005 @@ -16,7 +16,7 @@
package org.apache.nutch.util;
-/** A thread that has called {@link Thread#SetDaemon(boolean) } with true.*/
+/** A thread that has called {@link Thread#setDaemon(boolean) } with true.*/
public class Daemon extends Thread {
{
Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/sr \
c/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java \
(original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java \
Wed Aug 31 08:17:11 2005 @@ -20,6 +20,7 @@
import java.io.InputStream;
import java.io.IOException;
import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.List;
@@ -48,9 +49,13 @@
/**
+ * Identify the language of a content, based on statistical analysis.
+ *
+ * @see <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+ * Language Codes</a>
*
* @author Sami Siren
- * @author Jerome Charron
+ * @author Jérôme Charron
*/
public class LanguageIdentifier {
@@ -59,8 +64,8 @@
private final static float SCORE_THRESOLD = 0.00F;
- public final static Logger LOG = \
LogFormatter.getLogger(LanguageIdentifier.class.getName());
-
+ private final static Logger LOG =
+ LogFormatter.getLogger(LanguageIdentifier.class.getName());
private ArrayList languages = new ArrayList();
@@ -168,7 +173,8 @@
}
/**
- * return handle to singleton instance
+ * Get a LanguageIdentifier instance.
+ * @return the LanguageIdentifier singleton instance.
*/
public static LanguageIdentifier getInstance() {
if (identifier == null) {
@@ -182,13 +188,25 @@
}
/**
- * main method used for testing
- *
- * @param args
+ * Main method used for command line process.
+ * <br/>Usage is:
+ * <pre>
+ * LanguageIdentifier [-identifyrows filename maxlines]
+ * [-identifyfile charset filename]
+ * [-identifyfileset charset files]
+ * [-identifytext text]
+ * [-identifyurl url]
+ * </pre>
+ * @param args arguments.
*/
public static void main(String args[]) {
- String usage = "Usage: LanguageIdentifier [-identifyrows filename maxlines] \
[-identifyfile filename] [-identifyfileset files] [-identifytext text] [-identifyurl \
url]"; + String usage = "Usage: LanguageIdentifier " +
+ "[-identifyrows filename maxlines] " +
+ "[-identifyfile charset filename] " +
+ "[-identifyfileset charset files] " +
+ "[-identifytext text] " +
+ "[-identifyurl url]";
int command = 0;
final int IDFILE = 1;
@@ -199,6 +217,7 @@
Vector fileset = new Vector();
String filename = "";
+ String charset = "";
String url = "";
String text = "";
int max = 0;
@@ -211,6 +230,7 @@
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-identifyfile")) {
command = IDFILE;
+ charset = args[++i];
filename = args[++i];
}
@@ -233,6 +253,7 @@
if (args[i].equals("-identifyfileset")) {
command = IDFILESET;
+ charset = args[++i];
for (i++; i < args.length; i++) {
File[] files = null;
File f = new File(args[i]);
@@ -264,7 +285,7 @@
case IDFILE:
f = new File(filename);
fis = new FileInputStream(f);
- lang = idfr.identify(fis);
+ lang = idfr.identify(fis, charset);
fis.close();
break;
@@ -302,7 +323,7 @@
filename = (String) i.next();
f = new File(filename);
fis = new FileInputStream(f);
- lang = idfr.identify(fis);
+ lang = idfr.identify(fis, charset);
fis.close();
} catch (Exception e) {
System.out.println(e);
@@ -349,22 +370,26 @@
}
/**
- * Identify language based on submitted content
+ * Identify language of a content.
*
- * @param text to analyze
- * @return 2 letter ISO639 code of language (en, fi, sv...) , or null if
- * unknown
+ * @param content is the content to analyze.
+ * @return The 2 letter
+ * <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+ * language code</a> (en, fi, sv, ...) of the language that best
+ * matches the specified content.
*/
- public String identify(String text) {
- return identify(new StringBuffer(text));
+ public String identify(String content) {
+ return identify(new StringBuffer(content));
}
/**
- * Identify language based on submitted content
+ * Identify language of a content.
*
- * @param text to analyze
- * @return 2 letter ISO639 code of language (en, fi, sv...) , or null if
- * unknown
+ * @param content is the content to analyze.
+ * @return The 2 letter
+ * <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+ * language code</a> (en, fi, sv, ...) of the language that best
+ * matches the specified content.
*/
public String identify(StringBuffer content) {
@@ -405,26 +430,48 @@
}
/**
- * Identify language from inputstream
- *
- * @param is
- * @return language code
- * @throws IOException
+ * Identify language from input stream.
+ * This method uses the platform default encoding to read the input stream.
+ * For using a specific encoding, use the
+ * {@link #identify(InputStream, String)} method.
+ *
+ * @param is is the input stream to analyze.
+ * @return The 2 letter
+ * <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+ * language code</a> (en, fi, sv, ...) of the language that best
+ * matches the content of the specified input stream.
+ * @throws IOException if something wrong occurs on the input stream.
*/
public String identify(InputStream is) throws IOException {
+ return identify(is, null);
+ }
+
+ /**
+ * Identify language from input stream.
+ *
+ * @param is is the input stream to analyze.
+ * @param charset is the charset to use to read the input stream.
+ * @return The 2 letter
+ * <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">ISO 639
+ * language code</a> (en, fi, sv, ...) of the language that best
+ * matches the content of the specified input stream.
+ * @throws IOException if something wrong occurs on the input stream.
+ */
+ public String identify(InputStream is, String charset) throws IOException {
- StringBuffer text = new StringBuffer();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[2048];
int len = 0;
while (((len = is.read(buffer)) != -1) &&
- ((analyzeLength == 0) || (text.length() < analyzeLength))) {
+ ((analyzeLength == 0) || (out.size() < analyzeLength))) {
if (analyzeLength != 0) {
- len = Math.min(len, analyzeLength - text.length());
+ len = Math.min(len, analyzeLength - out.size());
}
- text.append(new String(buffer, 0, len, "UTF-8"));
+ out.write(buffer, 0, len);
}
- return identify(text);
+ return identify((charset == null) ? out.toString()
+ : out.toString(charset));
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/o \
rg/apache/nutch/parse/html/DOMBuilder.java?rev=265503&r1=265502&r2=265503&view=diff \
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java \
(original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java \
Wed Aug 31 08:17:11 2005 @@ -42,7 +42,6 @@
* This class takes SAX events (in addition to some extra events
* that SAX doesn't handle yet) and adds the result to a document
* or document fragment.
- * @xsl.usage general
*/
public class DOMBuilder
implements ContentHandler, LexicalHandler
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/o \
rg/apache/nutch/parse/html/XMLCharacterRecognizer.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java \
(original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java \
Wed Aug 31 08:17:11 2005 @@ -27,7 +27,6 @@
/**
* Class used to verify whether the specified <var>ch</var>
* conforms to the XML 1.0 definition of whitespace.
- * @xsl.usage internal
*/
public class XMLCharacterRecognizer
{
@@ -90,7 +89,7 @@
/**
* Tell if the string is whitespace.
*
- * @param buf StringBuffer to check as XML whitespace.
+ * @param s String to check as XML whitespace.
* @return True if characters in buffer are XML whitespace, false otherwise
*/
public static boolean isWhiteSpace(String s)
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/s \
rc/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java?rev=265503&r1=265502&r2=265503&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java \
(original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java \
Wed Aug 31 08:17:11 2005 @@ -58,7 +58,7 @@
}
/**
- * @see SecureProtocolSocketFactory#createSocket(java.lang.String,int,java.net.InetAddress,int)
+ * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int)
*/
public Socket createSocket(String host, int port, InetAddress clientHost, int \
clientPort) throws IOException, UnknownHostException {
@@ -79,8 +79,8 @@
*
* @param host the host name/IP
* @param port the port on the host
- * @param clientHost the local host name/IP to bind the socket to
- * @param clientPort the port on the local machine
+ * @param localAddress the local host name/IP to bind the socket to
+ * @param localPort the port on the local machine
* @param params {@link HttpConnectionParams Http connection parameters}
*
* @return Socket a new socket
@@ -104,14 +104,14 @@
}
/**
- * @see SecureProtocolSocketFactory#createSocket(java.lang.String,int)
+ * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int)
*/
public Socket createSocket(String host, int port) throws IOException, \
UnknownHostException { return getSSLContext().getSocketFactory().createSocket(host, \
port); }
/**
- * @see SecureProtocolSocketFactory#createSocket(java.net.Socket,java.lang.String,int,boolean)
+ * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean)
*/
public Socket createSocket(Socket socket, String host, int port, boolean \
autoClose) throws IOException, UnknownHostException {
-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic