[prev in list] [next in list] [prev in thread] [next in thread]
List: slide-dev
Subject: svn commit: r360272 -
From: dflorey () apache ! org
Date: 2005-12-31 11:47:57
Message-ID: 20051231114801.95566.qmail () minotaur ! apache ! org
[Download RAW message or body]
Author: dflorey
Date: Sat Dec 31 03:47:45 2005
New Revision: 360272
URL: http://svn.apache.org/viewcvs?rev=360272&view=rev
Log:
Refactored the PropertyExtractor interface to allow more sophicticated
property extraction.
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -26,6 +26,9 @@
import java.io.InputStream;
import java.util.Map;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
/**
* The AbstractPropertyExtractor class
*/
@@ -45,7 +48,7 @@
this.namespace = namespace;
}
- public abstract Map extract(InputStream content) throws ExtractorException;
+ public abstract Map extract(NodeRevisionDescriptors descriptors, \
NodeRevisionDescriptor descriptor, InputStream content) throws ExtractorException;
/* (non-Javadoc)
* @see org.apache.slide.extractor.Extractor#getContentType()
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java \
Sat Dec 31 03:47:45 2005 @@ -28,6 +28,7 @@
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
+
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.util.conf.Configurable;
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -29,6 +29,7 @@
import java.io.InputStream;
import java.io.Reader;
import java.util.Iterator;
+
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -29,6 +29,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
+
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java Sat \
Dec 31 03:47:45 2005 @@ -27,6 +27,7 @@
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
+
import org.textmining.text.extraction.WordExtractor;
/**
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java Sat \
Dec 31 03:47:45 2005 @@ -7,6 +7,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.Property;
import org.apache.poi.hpsf.PropertySet;
@@ -16,6 +17,8 @@
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;
@@ -100,7 +103,7 @@
super(uri, contentType, namespace);
}
- public Map extract(InputStream content) throws ExtractorException {
+ public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor \
descriptor, InputStream content) throws ExtractorException { \
OfficePropertiesListener listener = new OfficePropertiesListener(); try {
POIFSReader r = new POIFSReader();
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java Sat \
Dec 31 03:47:45 2005 @@ -28,6 +28,7 @@
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.Reader;
+
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -26,6 +26,9 @@
import java.io.InputStream;
import java.util.Map;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
/**
* The PropertyExtractor interface
*
@@ -36,5 +39,5 @@
* Gets extracted property value from the resource, for example "author"
* for a word doc, ...
*/
- public Map extract(InputStream content) throws ExtractorException;
+ public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor \
descriptor, InputStream content) throws ExtractorException; }
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java \
Sat Dec 31 03:47:45 2005 @@ -54,7 +54,7 @@
if ( content != null && descriptor != null ) {
List extractor = \
ExtractorManager.getInstance().getPropertyExtractors(namespaceName, descriptors, \
descriptor); for ( int i = 0, l = extractor.size(); i < l; i++ ) {
- Map extractedProperties = \
((PropertyExtractor)extractor.get(i)).extract(new \
ByteArrayInputStream(content.getContentBytes())); + Map \
extractedProperties = ((PropertyExtractor)extractor.get(i)).extract(descriptors, \
descriptor, new ByteArrayInputStream(content.getContentBytes()));
for ( Iterator j = extractedProperties.entrySet().iterator(); \
j.hasNext(); ) { Map.Entry entry = (Map.Entry) j.next();
final Object key = entry.getKey();
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java \
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -31,7 +31,10 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+
import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;
@@ -93,7 +96,7 @@
super(uri, contentType, namespace);
}
- public Map extract(InputStream content) throws ExtractorException {
+ public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor \
descriptor, InputStream content) throws ExtractorException { Map properties = new \
HashMap(); try {
SAXBuilder saxBuilder = new SAXBuilder();
---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic