[prev in list] [next in list] [prev in thread] [next in thread] 

List:       slide-dev
Subject:    svn commit: r360272 -
From:       dflorey () apache ! org
Date:       2005-12-31 11:47:57
Message-ID: 20051231114801.95566.qmail () minotaur ! apache ! org
[Download RAW message or body]

Author: dflorey
Date: Sat Dec 31 03:47:45 2005
New Revision: 360272

URL: http://svn.apache.org/viewcvs?rev=360272&view=rev
Log:
Refactored the PropertyExtractor interface to allow more sophicticated
property extraction.

Modified:
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
  jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
  jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
  jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -26,6 +26,9 @@
 import java.io.InputStream;
 import java.util.Map;
 
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
 /**
  * The AbstractPropertyExtractor class
  */
@@ -45,7 +48,7 @@
         this.namespace = namespace;
     }
 
-    public abstract Map extract(InputStream content) throws ExtractorException;
+    public abstract Map extract(NodeRevisionDescriptors descriptors, \
NodeRevisionDescriptor descriptor, InputStream content) throws ExtractorException;  
     /* (non-Javadoc)
      * @see org.apache.slide.extractor.Extractor#getContentType()

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java \
Sat Dec 31 03:47:45 2005 @@ -28,6 +28,7 @@
 import java.util.Enumeration;
 import java.util.Iterator;
 import java.util.List;
+
 import org.apache.slide.content.NodeRevisionDescriptor;
 import org.apache.slide.content.NodeRevisionDescriptors;
 import org.apache.slide.util.conf.Configurable;

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -29,6 +29,7 @@
 import java.io.InputStream;
 import java.io.Reader;
 import java.util.Iterator;
+
 import org.apache.poi.hssf.usermodel.HSSFCell;
 import org.apache.poi.hssf.usermodel.HSSFRow;
 import org.apache.poi.hssf.usermodel.HSSFSheet;

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -29,6 +29,7 @@
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
+
 import org.apache.poi.poifs.eventfilesystem.POIFSReader;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java Sat \
Dec 31 03:47:45 2005 @@ -27,6 +27,7 @@
 import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
+
 import org.textmining.text.extraction.WordExtractor;
 
 /**

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java Sat \
Dec 31 03:47:45 2005 @@ -7,6 +7,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.poi.hpsf.NoPropertySetStreamException;
 import org.apache.poi.hpsf.Property;
 import org.apache.poi.hpsf.PropertySet;
@@ -16,6 +17,8 @@
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
 import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
 import org.apache.slide.util.conf.Configurable;
 import org.apache.slide.util.conf.Configuration;
 import org.apache.slide.util.conf.ConfigurationException;
@@ -100,7 +103,7 @@
 		super(uri, contentType, namespace);
 	}
 
-	public Map extract(InputStream content) throws ExtractorException {
+	public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor \
descriptor, InputStream content) throws ExtractorException {  \
OfficePropertiesListener listener = new OfficePropertiesListener();  try {
 			POIFSReader r = new POIFSReader();

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java Sat \
Dec 31 03:47:45 2005 @@ -28,6 +28,7 @@
 import java.io.FileInputStream;
 import java.io.InputStream;
 import java.io.Reader;
+
 import org.pdfbox.pdfparser.PDFParser;
 import org.pdfbox.pdmodel.PDDocument;
 import org.pdfbox.util.PDFTextStripper;

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -26,6 +26,9 @@
 import java.io.InputStream;
 import java.util.Map;
 
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
 /**
  * The PropertyExtractor interface
  * 
@@ -36,5 +39,5 @@
      *  Gets extracted property value from the resource, for example "author"
      *  for a word doc, ...
      */
-    public Map extract(InputStream content) throws ExtractorException;
+    public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor \
descriptor, InputStream content) throws ExtractorException;  }

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java \
Sat Dec 31 03:47:45 2005 @@ -54,7 +54,7 @@
             if ( content != null && descriptor != null ) {
                 List extractor = \
ExtractorManager.getInstance().getPropertyExtractors(namespaceName, descriptors, \
descriptor);  for ( int i = 0, l = extractor.size(); i < l; i++ ) {
-                    Map extractedProperties = \
((PropertyExtractor)extractor.get(i)).extract(new \
ByteArrayInputStream(content.getContentBytes())); +                    Map \
extractedProperties = ((PropertyExtractor)extractor.get(i)).extract(descriptors, \
                descriptor, new ByteArrayInputStream(content.getContentBytes()));
                     for ( Iterator j = extractedProperties.entrySet().iterator(); \
j.hasNext(); ) {  Map.Entry entry = (Map.Entry) j.next();
                         final Object key = entry.getKey();

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
                
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
 ==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java \
                (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java \
Sat Dec 31 03:47:45 2005 @@ -31,7 +31,10 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
 import org.apache.slide.util.conf.Configurable;
 import org.apache.slide.util.conf.Configuration;
 import org.apache.slide.util.conf.ConfigurationException;
@@ -93,7 +96,7 @@
         super(uri, contentType, namespace);
     }
 
-    public Map extract(InputStream content) throws ExtractorException {
+    public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor \
descriptor, InputStream content) throws ExtractorException {  Map properties = new \
HashMap();  try {
             SAXBuilder saxBuilder = new SAXBuilder();



---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic