[prev in list] [next in list] [prev in thread] [next in thread] 

List:       poi-dev
Subject:    PowerPoint to Text
From:       "Koundinya \(Sudhakar Chavali\)" <sudhakar_koundinya () yahoo ! com>
Date:       2004-03-26 5:38:06
Message-ID: 20040326053806.77908.qmail () web41209 ! mail ! yahoo ! com
[Download RAW message or body]

Hi All,

We have done initail ground work for extracting PowerPoint 2
text. We would like to say thanks to POI group. Though the base
work is rough, we are able to extract the text from PowerPoint.

Sorry for bad programming. But hope this wll be helpful to make
the good program from this scrath by the efficient developers.  


Here is the sample. When ever there are modifictaions, we will
post the information.


import java.io.*;
import java.util.*;
import org.apache.poi.hpsf.*;
import org.apache.poi.poifs.eventfilesystem.*;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;

public class PPT2Text
{
		public static void main(String[] args)
			throws IOException
		{
			final String filename = args[0];
			POIFSReader r = new POIFSReader();

			/* Register a listener for *all* documents. */
			r.registerListener(new MyPOIFSReaderListener());
			r.read(new FileInputStream(filename));
		}



		static class MyPOIFSReaderListener implements
POIFSReaderListener
		{

			static int filename=1;

			public void processPOIFSReaderEvent(POIFSReaderEvent event) 
			{
				PropertySet ps = null;

				
				try
				{
					
					org.apache.poi.poifs.filesystem.DocumentInputStream
dis=null;

					System.out.println("\n\n");
					System.out.println(event.getPath()+event.getName());
					dis=event.getStream();
/*
					byte btoWrite[]= new byte[12];

					dis.read(btoWrite);

					System.out.println("Version
:"+LittleEndian.getUnsignedByte(btoWrite,0));
					System.out.println("Instance
:"+LittleEndian.getUShort(btoWrite,0));
					System.out.println("Type
:"+LittleEndian.getUShort(btoWrite,2));
					System.out.println("Len
:"+LittleEndian.getLong(btoWrite,4));

*/					

					FileOutputStream fos= new
FileOutputStream(""+filename+".txt");

					byte btoWrite[]= new byte[dis.available()];
					dis.read(btoWrite,0,dis.available());
					for(int i=0;i<btoWrite.length-20;i++)
					{
					//System.out.println("Version
:"+LittleEndian.getUnsignedByte(btoWrite,i+0));
					//System.out.println("Instance
:"+LittleEndian.getUShort(btoWrite,i+0));
					//System.out.println("Type
:"+LittleEndian.getUShort(btoWrite,i+2));
					//System.out.println("Len
:"+LittleEndian.getUInt(btoWrite,i+4));

					long type=LittleEndian.getUShort(btoWrite,i+2);
					long size=LittleEndian.getUInt(btoWrite,i+4);
						if (type==4008)
						{
							fos.write(btoWrite,i+4+1,(int)size+3);

						}

					}

					filename++;
					//System.out.println(event.getStream().toString());
					//ps = PropertySetFactory.create(event.getStream());
				}
				catch (Exception ex)
				{
					//System.out.println("No property set stream: \"" +
event.getPath() +
					//	event.getName() + "\"");
					System.out.println(ex);
					return;
				}
			}
		}


}






thanks,
Sudhakar

=====
"No one can earn a million dollars honestly."- William Jennings Bryan (1860-1925) 

"Make everything as simple as possible, but not simpler."- Albert Einstein (1879-1955)

"It is dangerous to be sincere unless you are also stupid."- George Bernard Shaw (1856-1950)

__________________________________
Do you Yahoo!?
Yahoo! Finance Tax Center - File online. File on time.
http://taxes.yahoo.com/filing.html

---------------------------------------------------------------------
To unsubscribe, e-mail: poi-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: poi-dev-help@jakarta.apache.org

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic