[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xerces-cvs
Subject:    svn commit: r581333 -
From:       mrglavas () apache ! org
Date:       2007-10-02 18:12:46
Message-ID: 20071002181247.20FFC1A9838 () eris ! apache ! org
[Download RAW message or body]

Author: mrglavas
Date: Tue Oct  2 11:12:46 2007
New Revision: 581333

URL: http://svn.apache.org/viewvc?rev=581333&view=rev
Log:
Adding native support for UTF-16.

Added:
    xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java   (with props)

Added: xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java?rev=581333&view=auto
 ==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java (added)
+++ xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java Tue Oct  2 \
11:12:46 2007 @@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xerces.impl.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.Locale;
+
+import org.apache.xerces.impl.msg.XMLMessageFormatter;
+import org.apache.xerces.util.MessageFormatter;
+
+/**
+ * <p>A UTF-16 reader. Can also be used for UCS-2 (i.e. ISO-10646-UCS-2).</p>
+ * 
+ * @xerces.internal
+ * 
+ * @author Michael Glavassevich, IBM
+ *
+ * @version $Id$
+ */
+public class UTF16Reader 
+    extends Reader {
+    
+    //
+    // Constants
+    //
+
+    /** Default byte buffer size (4096). */
+    public static final int DEFAULT_BUFFER_SIZE = 4096;
+    
+    //
+    // Data
+    //
+
+    /** Input stream. */
+    protected final InputStream fInputStream;
+
+    /** Byte buffer. */
+    protected final byte[] fBuffer;
+    
+    /** Endianness. */
+    protected final boolean fIsBigEndian;
+    
+    // message formatter; used to produce localized exception messages
+    private final MessageFormatter fFormatter;
+
+    // Locale to use for messages
+    private final Locale fLocale;
+    
+    //
+    // Constructors
+    //
+
+    /** 
+     * Constructs a UTF-16 reader from the specified input stream 
+     * using the default buffer size. Primarily for testing.
+     *
+     * @param inputStream The input stream.
+     * @param isBigEndian The byte order.
+     */
+    public UTF16Reader(InputStream inputStream, boolean isBigEndian) {
+        this(inputStream, DEFAULT_BUFFER_SIZE, isBigEndian, 
+                new XMLMessageFormatter(), Locale.getDefault());
+    } // <init>(InputStream, boolean)
+    
+    /** 
+     * Constructs a UTF-16 reader from the specified input stream 
+     * using the default buffer size and the given MessageFormatter.
+     *
+     * @param inputStream The input stream.
+     * @param isBigEndian The byte order.
+     */
+    public UTF16Reader(InputStream inputStream, boolean isBigEndian, 
+            MessageFormatter messageFormatter, Locale locale) {
+        this(inputStream, DEFAULT_BUFFER_SIZE, isBigEndian, messageFormatter, \
locale); +    } // <init>(InputStream, boolean, MessageFormatter, Locale)
+    
+    /** 
+     * Constructs a UTF-16 reader from the specified input stream 
+     * and buffer size and given MessageFormatter.
+     *
+     * @param inputStream       The input stream.
+     * @param size              The initial buffer size.
+     * @param isBigEndian       The byte order.
+     * @param messageFormatter  Given MessageFormatter
+     * @param locale            Locale to use for messages
+     */
+    public UTF16Reader(InputStream inputStream, int size, boolean isBigEndian, 
+            MessageFormatter messageFormatter, Locale locale) {
+        this(inputStream, new byte[size], isBigEndian, messageFormatter, locale);
+    } // <init>(InputStream, int, boolean, MessageFormatter, Locale)
+    
+    /** 
+     * Constructs a UTF-16 reader from the specified input stream, 
+     * buffer and MessageFormatter.
+     *
+     * @param inputStream       The input stream.
+     * @param buffer            The byte buffer.
+     * @param isBigEndian       The byte order.
+     * @param messageFormatter  Given MessageFormatter
+     * @param locale            Locale to use for messages
+     */
+    public UTF16Reader(InputStream inputStream, byte [] buffer, boolean isBigEndian, \
 +            MessageFormatter messageFormatter, Locale locale) {
+        fInputStream = inputStream;
+        fBuffer = buffer;
+        fIsBigEndian = isBigEndian;
+        fFormatter = messageFormatter;
+        fLocale = locale;
+    } // <init>(InputStream, byte[], boolean, MessageFormatter, Locale)
+    
+    //
+    // Reader methods
+    //
+    
+    /**
+     * Read a single character.  This method will block until a character is
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * <p> Subclasses that intend to support efficient single-character input
+     * should override this method.
+     *
+     * @return     The character read, as an integer in the range 0 to 65535
+     *             (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
+     *             been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public int read() throws IOException {
+        final int b0 = fInputStream.read();
+        if (b0 == -1) {
+            return -1;
+        }
+        final int b1 = fInputStream.read();
+        if (b1 == -1) {
+            expectedTwoBytes();
+        }
+        // UTF-16BE
+        if (fIsBigEndian) {
+            return (b0 << 8) | b1;
+        }
+        // UTF-16LE
+        return (b1 << 8) | b0;
+    } // read():int
+
+    /**
+     * Read characters into a portion of an array.  This method will block
+     * until some input is available, an I/O error occurs, or the end of the
+     * stream is reached.
+     *
+     * @param      ch     Destination buffer
+     * @param      offset Offset at which to start storing characters
+     * @param      length Maximum number of characters to read
+     *
+     * @return     The number of characters read, or -1 if the end of the
+     *             stream has been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public int read(char ch[], int offset, int length) throws IOException {
+        int byteLength = length << 1;
+        if (byteLength > fBuffer.length) {
+            byteLength = fBuffer.length;
+        }
+        int byteCount = fInputStream.read(fBuffer, 0, byteLength);
+        if (byteCount == -1) {
+            return -1;
+        }
+        // If an odd number of bytes were read, we still need to read one more.
+        if ((byteCount & 1) != 0) {
+            int b = fInputStream.read();
+            if (b == -1) {
+                expectedTwoBytes();
+            }
+            fBuffer[byteCount++] = (byte) b;
+        }
+        final int charCount = byteCount >> 1;
+        if (fIsBigEndian) {
+            processBE(ch, offset, charCount);
+        }
+        else {
+            processLE(ch, offset, charCount);
+        }
+        return charCount;
+    } // read(char[],int,int)
+
+    /**
+     * Skip characters.  This method will block until some characters are
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * @param  n  The number of characters to skip
+     *
+     * @return    The number of characters actually skipped
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public long skip(long n) throws IOException {
+        long bytesSkipped = fInputStream.skip(n << 1);
+        if ((bytesSkipped & 1) != 0) {
+            int b = fInputStream.read();
+            if (b == -1) {
+                expectedTwoBytes();
+            }
+            ++bytesSkipped;
+        }
+        return bytesSkipped >> 1;
+    } // skip(long):long
+
+    /**
+     * Tell whether this stream is ready to be read.
+     *
+     * @return True if the next read() is guaranteed not to block for input,
+     * false otherwise.  Note that returning false does not guarantee that the
+     * next read will block.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public boolean ready() throws IOException {
+        return false;
+    } // ready()
+
+    /**
+     * Tell whether this stream supports the mark() operation.
+     */
+    public boolean markSupported() {
+        return false;
+    } // markSupported()
+
+    /**
+     * Mark the present position in the stream.  Subsequent calls to reset()
+     * will attempt to reposition the stream to this point.  Not all
+     * character-input streams support the mark() operation.
+     *
+     * @param  readAheadLimit  Limit on the number of characters that may be
+     *                         read while still preserving the mark.  After
+     *                         reading this many characters, attempting to
+     *                         reset the stream may fail.
+     *
+     * @exception  IOException  If the stream does not support mark(),
+     *                          or if some other I/O error occurs
+     */
+    public void mark(int readAheadLimit) throws IOException {
+        throw new IOException(fFormatter.formatMessage(fLocale, \
"OperationNotSupported", new Object[]{"mark()", "UTF-16"})); +    } // mark(int)
+
+    /**
+     * Reset the stream.  If the stream has been marked, then attempt to
+     * reposition it at the mark.  If the stream has not been marked, then
+     * attempt to reset it in some way appropriate to the particular stream,
+     * for example by repositioning it to its starting point.  Not all
+     * character-input streams support the reset() operation, and some support
+     * reset() without supporting mark().
+     *
+     * @exception  IOException  If the stream has not been marked,
+     *                          or if the mark has been invalidated,
+     *                          or if the stream does not support reset(),
+     *                          or if some other I/O error occurs
+     */
+    public void reset() throws IOException {
+    } // reset()
+
+    /**
+     * Close the stream.  Once a stream has been closed, further read(),
+     * ready(), mark(), or reset() invocations will throw an IOException.
+     * Closing a previously-closed stream, however, has no effect.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+     public void close() throws IOException {
+         fInputStream.close();
+     } // close()
+     
+     //
+     // Private methods
+     //
+     
+     /** Decodes UTF-16BE **/
+     private void processBE(final char ch[], int offset, final int count) {
+         int curPos = 0;
+         for (int i = 0; i < count; ++i) {
+             final int b0 = fBuffer[curPos++] & 0xff;
+             final int b1 = fBuffer[curPos++] & 0xff;
+             ch[offset++] = (char) ((b0 << 8) | b1);
+         }
+     } // processBE(char[],int,int)
+     
+     /** Decodes UTF-16LE **/
+     private void processLE(final char ch[], int offset, final int count) {
+         int curPos = 0;
+         for (int i = 0; i < count; ++i) {
+             final int b0 = fBuffer[curPos++] & 0xff;
+             final int b1 = fBuffer[curPos++] & 0xff;
+             ch[offset++] = (char) ((b1 << 8) | b0);
+         }
+     } // processLE(char[],int,int)
+
+     /** Throws an exception for expected byte. */
+     private void expectedTwoBytes()
+         throws MalformedByteSequenceException {
+         throw new MalformedByteSequenceException(fFormatter,
+             fLocale,
+             XMLMessageFormatter.XML_DOMAIN,
+             "ExpectedByte",
+             new Object[] {"2", "2"});
+     } // expectedTwoBytes()
+
+} // class UTF16Reader

Propchange: xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: xerces/java/trunk/src/org/apache/xerces/impl/io/UTF16Reader.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic