'[patches] proposed patch to HEAD: Integration of CABExtract'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xwt-patches
Subject:    [patches] proposed patch to HEAD: Integration of CABExtract
From:       andrew () xwt ! org
Date:       2003-09-29 19:57:06
[Download RAW message or body]

I know I'm not doing this right yet; I am not sure how to pass the font name yet, and I'd like to eliminate the file I/O and use the baseaddr and cabstream.length parameters that you already had in there, Adam.

need a little guidance here.  :-)

Regards,
Andrew

? .build_libmspack-20030726_mips-unknown-elf
? .compile
? .configure_binutils-2.13.2.1_i686-pc-linux-gnu
? .configure_binutils-2.13.2.1_mips-unknown-elf
? .configure_gcc-3.3_i686-pc-linux-gnu
? .configure_gcc-3.3_mips-unknown-elf
? .configure_jikes-1.18_
? .configure_jpeg-6b_i686-pc-linux-gnu
? .configure_libmspack-20030726_mips-unknown-elf
? .configure_newlib-1.11.0_mips-unknown-elf
? .download_binutils-2.13.2.1
? .download_freetype-2.1.4
? .download_gcc-3.3
? .download_jikes-1.18
? .download_jpeg-6b
? .download_libmspack-20030726
? .download_newlib-1.11.0
? .empty.c
? .install_binutils-2.13.2.1_i686-pc-linux-gnu
? .install_binutils-2.13.2.1_mips-unknown-elf
? .install_freetype-2.1.4_mips-unknown-elf
? .install_gcc-3.3_i686-pc-linux-gnu
? .install_gcc-3.3_mips-unknown-elf
? .install_jikes-1.18_
? .install_jpeg-6b_i686-pc-linux-gnu
? .install_libmspack-20030726_mips-unknown-elf
? .install_newlib-1.11.0_mips-unknown-elf
? .jikes+
? .vendor
? build
? x
? src/org/xwt/translators/fe.c
? upstream/binutils-2.13.2.1
? upstream/install
? upstream/jikes-1.18
? upstream/libmspack-20030726
? upstream/freetype-2.1.4/src
? upstream/gcc-3.3/build-i686-pc-linux-gnu
? upstream/gcc-3.3/build-i686-pc-mingw32
? upstream/gcc-3.3/build-mips-unknown-elf
? upstream/gcc-3.3/src
? upstream/jpeg-6b/build-i686-pc-linux-gnu
? upstream/jpeg-6b/build-i686-pc-mingw32
? upstream/jpeg-6b/src
? upstream/newlib-1.11.0/build-mips-unknown-elf
? upstream/newlib-1.11.0/src
Index: src/org/xwt/translators/MSPack.c
===================================================================
RCS file: /cvs/xwt/src/org/xwt/translators/MSPack.c,v
retrieving revision 1.1
diff -u -d -B -u -d -r1.1 MSPack.c
--- src/org/xwt/translators/MSPack.c	27 Sep 2003 23:43:30 -0000	1.1
+++ src/org/xwt/translators/MSPack.c	29 Sep 2003 19:50:12 -0000
@@ -9,8 +9,1593 @@
 
        FIXME
 
+ fontextract 0.1 - a program to extract a single ttf from the ms core fonts 
+
+ * (C) 2003 Andrew Kohlsmith <akohlsmith@mixdown.ca>
+ *
+ * really it's just a hacked up cabextract-0.6 
+ * (C) 2000-2002 Stuart Caie <kyzer@4u.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <string.h>
+#include <time.h>
+#include <utime.h>
+
+extern char *_user_info;
+
+typedef unsigned char  UBYTE; /* 8 bits exactly    */
+typedef unsigned short UWORD; /* 16 bits (or more) */
+typedef unsigned int   ULONG; /* 32 bits (or more) */
+typedef   signed int    LONG; /* 32 bits (or more) */
+
+/* number of bits in a ULONG */
+#ifndef CHAR_BIT
+# define CHAR_BIT (8)
+#endif
+#define ULONG_BITS (sizeof(ULONG) * CHAR_BIT)
+
+/* endian-neutral reading of little-endian data */
+#define EndGetI32(a)  ((((a)[3])<<24)|(((a)[2])<<16)|(((a)[1])<<8)|((a)[0]))
+#define EndGetI16(a)  ((((a)[1])<<8)|((a)[0]))
+
+/* maximum number of cabinets any one folder can be split across */
+#define CAB_SPLITMAX (10)
+
+struct cabinet {
+  struct cabinet *next;                /* for making a list of cabinets  */
+  char  *filename;                     /* input name of cabinet          */
+  FILE  *fh;                           /* open file handle or NULL       */
+  off_t filelen;                       /* length of cabinet file         */
+  off_t blocks_off;                    /* offset to data blocks in file  */
+  struct cabinet *prevcab, *nextcab;   /* multipart cabinet chains       */
+  char *prevname, *nextname;           /* and their filenames            */
+  char *previnfo, *nextinfo;           /* and their visible names        */
+  struct folder *folders;              /* first folder in this cabinet   */
+  struct file *files;                  /* first file in this cabinet     */
+  UBYTE block_resv;                    /* reserved space in datablocks   */
+  UBYTE flags;                         /* header flags                   */
+};
+
+struct folder {
+  struct folder *next;
+  struct cabinet *cab[CAB_SPLITMAX];   /* cabinet(s) this folder spans   */
+  off_t offset[CAB_SPLITMAX];          /* offset to data blocks          */
+  UWORD comp_type;                     /* compression format/window size */
+  ULONG comp_size;                     /* compressed size of folder      */
+  UBYTE num_splits;                    /* number of split blocks + 1     */
+  UWORD num_blocks;                    /* total number of blocks         */
+  struct file *contfile;               /* the first split file           */
+};
+
+struct file {
+  struct file *next;                   /* next file in sequence          */
+  struct folder *folder;               /* folder that contains this file */
+  char *filename;                      /* output name of file            */
+  FILE *fh;                            /* open file handle or NULL       */
+  ULONG length;                        /* uncompressed length of file    */
+  ULONG offset;                        /* uncompressed offset in folder  */
+  UWORD index;                         /* magic index number of folder   */
+  UWORD time, date, attribs;           /* MS-DOS time/date/attributes    */
+};
+
+
+/* structure offsets */
+#define cfhead_Signature         (0x00)
+#define cfhead_CabinetSize       (0x08)
+#define cfhead_FileOffset        (0x10)
+#define cfhead_MinorVersion      (0x18)
+#define cfhead_MajorVersion      (0x19)
+#define cfhead_NumFolders        (0x1A)
+#define cfhead_NumFiles          (0x1C)
+#define cfhead_Flags             (0x1E)
+#define cfhead_SetID             (0x20)
+#define cfhead_CabinetIndex      (0x22)
+#define cfhead_SIZEOF            (0x24)
+#define cfheadext_HeaderReserved (0x00)
+#define cfheadext_FolderReserved (0x02)
+#define cfheadext_DataReserved   (0x03)
+#define cfheadext_SIZEOF         (0x04)
+#define cffold_DataOffset        (0x00)
+#define cffold_NumBlocks         (0x04)
+#define cffold_CompType          (0x06)
+#define cffold_SIZEOF            (0x08)
+#define cffile_UncompressedSize  (0x00)
+#define cffile_FolderOffset      (0x04)
+#define cffile_FolderIndex       (0x08)
+#define cffile_Date              (0x0A)
+#define cffile_Time              (0x0C)
+#define cffile_Attribs           (0x0E)
+#define cffile_SIZEOF            (0x10)
+#define cfdata_CheckSum          (0x00)
+#define cfdata_CompressedSize    (0x04)
+#define cfdata_UncompressedSize  (0x06)
+#define cfdata_SIZEOF            (0x08)
+
+/* flags */
+#define cffoldCOMPTYPE_MASK            (0x000f)
+#define cffoldCOMPTYPE_NONE            (0x0000)
+#define cffoldCOMPTYPE_MSZIP           (0x0001)
+#define cffoldCOMPTYPE_QUANTUM         (0x0002)
+#define cffoldCOMPTYPE_LZX             (0x0003)
+#define cfheadPREV_CABINET             (0x0001)
+#define cfheadNEXT_CABINET             (0x0002)
+#define cfheadRESERVE_PRESENT          (0x0004)
+#define cffileCONTINUED_FROM_PREV      (0xFFFD)
+#define cffileCONTINUED_TO_NEXT        (0xFFFE)
+#define cffileCONTINUED_PREV_AND_NEXT  (0xFFFF)
+#define cffile_A_RDONLY                (0x01)
+#define cffile_A_HIDDEN                (0x02)
+#define cffile_A_SYSTEM                (0x04)
+#define cffile_A_ARCH                  (0x20)
+#define cffile_A_EXEC                  (0x40)
+#define cffile_A_NAME_IS_UTF           (0x80)
+
+
+/*--------------------------------------------------------------------------*/
+/* our archiver information / state */
+
+/* LZX stuff */
+
+/* some constants defined by the LZX specification */
+#define LZX_MIN_MATCH                (2)
+#define LZX_MAX_MATCH                (257)
+#define LZX_NUM_CHARS                (256)
+#define LZX_BLOCKTYPE_INVALID        (0)   /* also blocktypes 4-7 invalid */
+#define LZX_BLOCKTYPE_VERBATIM       (1)
+#define LZX_BLOCKTYPE_ALIGNED        (2)
+#define LZX_BLOCKTYPE_UNCOMPRESSED   (3)
+#define LZX_PRETREE_NUM_ELEMENTS     (20)
+#define LZX_ALIGNED_NUM_ELEMENTS     (8)   /* aligned offset tree #elements */
+#define LZX_NUM_PRIMARY_LENGTHS      (7)   /* this one missing from spec! */
+#define LZX_NUM_SECONDARY_LENGTHS    (249) /* length tree #elements */
+
+/* LZX huffman defines: tweak tablebits as desired */
+#define LZX_PRETREE_MAXSYMBOLS  (LZX_PRETREE_NUM_ELEMENTS)
+#define LZX_PRETREE_TABLEBITS   (6)
+#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
+#define LZX_MAINTREE_TABLEBITS  (12)
+#define LZX_LENGTH_MAXSYMBOLS   (LZX_NUM_SECONDARY_LENGTHS+1)
+#define LZX_LENGTH_TABLEBITS    (12)
+#define LZX_ALIGNED_MAXSYMBOLS  (LZX_ALIGNED_NUM_ELEMENTS)
+#define LZX_ALIGNED_TABLEBITS   (7)
+
+#define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */
+
+#define LZX_DECLARE_TABLE(tbl) \
+  UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\
+  UBYTE tbl##_len  [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]
+
+struct LZXstate {
+    UBYTE *window;         /* the actual decoding window              */
+    ULONG window_size;     /* window size (32Kb through 2Mb)          */
+    ULONG actual_size;     /* window size when it was first allocated */
+    ULONG window_posn;     /* current offset within the window        */
+    ULONG R0, R1, R2;      /* for the LRU offset system               */
+    UWORD main_elements;   /* number of main tree elements            */
+    int   header_read;     /* have we started decoding at all yet?    */
+    UWORD block_type;      /* type of this block                      */
+    ULONG block_length;    /* uncompressed length of this block       */
+    ULONG block_remaining; /* uncompressed bytes still left to decode */
+    ULONG frames_read;     /* the number of CFDATA blocks processed   */
+    LONG  intel_filesize;  /* magic header value used for transform   */
+    LONG  intel_curpos;    /* current offset in transform space       */
+    int   intel_started;   /* have we seen any translatable data yet? */
+
+    LZX_DECLARE_TABLE(PRETREE);
+    LZX_DECLARE_TABLE(MAINTREE);
+    LZX_DECLARE_TABLE(LENGTH);
+    LZX_DECLARE_TABLE(ALIGNED);
+};
+
+
+/* generic stuff */
+#define CAB(x) (decomp_state.x)
+#define ZIP(x) (decomp_state.methods.zip.x)
+#define QTM(x) (decomp_state.methods.qtm.x)
+#define LZX(x) (decomp_state.methods.lzx.x)
+#define DECR_OK           (0)
+#define DECR_DATAFORMAT   (1)
+#define DECR_ILLEGALDATA  (2)
+#define DECR_NOMEMORY     (3)
+#define DECR_CHECKSUM     (4)
+#define DECR_INPUT        (5)
+#define DECR_OUTPUT       (6)
+
+/* CAB data blocks are <= 32768 bytes in uncompressed form. Uncompressed
+ * blocks have zero growth. MSZIP guarantees that it won't grow above
+ * uncompressed size by more than 12 bytes. LZX guarantees it won't grow
+ * more than 6144 bytes.
+ */
+#define CAB_BLOCKMAX (32768)
+#define CAB_INPUTMAX (CAB_BLOCKMAX+6144)
+
+struct {
+  struct folder *current; /* current folder we're extracting from  */
+  ULONG offset;           /* uncompressed offset within folder     */
+  UBYTE *outpos;          /* (high level) start of data to use up  */
+  UWORD outlen;           /* (high level) amount of data to use up */
+  UWORD split;            /* at which split in current folder?     */
+  int (*decompress)(int, int); /* the chosen compression func      */
+  UBYTE inbuf[CAB_INPUTMAX+2]; /* +2 for lzx bitbuffer overflows!  */
+  UBYTE outbuf[CAB_BLOCKMAX];
+  union {
+//    struct ZIPstate zip;
+//    struct QTMstate qtm;
+    struct LZXstate lzx;
+  } methods;
+} decomp_state;
+
+/* LZX decruncher */
+
+/* Microsoft's LZX document and their implementation of the
+ * com.ms.util.cab Java package do not concur.
+ *
+ * In the LZX document, there is a table showing the correlation between
+ * window size and the number of position slots. It states that the 1MB
+ * window = 40 slots and the 2MB window = 42 slots. In the implementation,
+ * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
+ * first slot whose position base is equal to or more than the required
+ * window size'. This would explain why other tables in the document refer
+ * to 50 slots rather than 42.
+ *
+ * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
+ * is not defined in the specification.
+ *
+ * The LZX document does not state the uncompressed block has an
+ * uncompressed length field. Where does this length field come from, so
+ * we can know how large the block is? The implementation has it as the 24
+ * bits following after the 3 blocktype bits, before the alignment
+ * padding.
+ *
+ * The LZX document states that aligned offset blocks have their aligned
+ * offset huffman tree AFTER the main and length trees. The implementation
+ * suggests that the aligned offset tree is BEFORE the main and length
+ * trees.
+ *
+ * The LZX document decoding algorithm states that, in an aligned offset
+ * block, if an extra_bits value is 1, 2 or 3, then that number of bits
+ * should be read and the result added to the match offset. This is
+ * correct for 1 and 2, but not 3, where just a huffman symbol (using the
+ * aligned tree) should be read.
+ *
+ * Regarding the E8 preprocessing, the LZX document states 'No translation
+ * may be performed on the last 6 bytes of the input block'. This is
+ * correct.  However, the pseudocode provided checks for the *E8 leader*
+ * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
+ * from the end, this would cause the next four bytes to be modified, at
+ * least one of which would be in the last 6 bytes, which is not allowed
+ * according to the spec.
+ *
+ * The specification states that the huffman trees must always contain at
+ * least one element. However, many CAB files contain blocks where the
+ * length tree is completely empty (because there are no matches), and
+ * this is expected to succeed.
+ */
+
+
+/* LZX uses what it calls 'position slots' to represent match offsets.
+ * What this means is that a small 'position slot' number and a small
+ * offset from that slot are encoded instead of one large offset for
+ * every match.
+ * - lzx_position_base is an index to the position slot bases
+ * - lzx_extra_bits states how many bits of offset-from-base data is needed.
+ */
+static ULONG lzx_position_base[51];
+static UBYTE extra_bits[51];
+
+int LZXinit(int window) {
+  ULONG wndsize = 1 << window;
+  int i, j, posn_slots;
+
+  /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
+  /* if a previously allocated window is big enough, keep it     */
+  if (window < 15 || window > 21) return DECR_DATAFORMAT;
+  if (LZX(actual_size) < wndsize) {
+    if (LZX(window)) free(LZX(window));
+    LZX(window) = NULL;
+  }
+  if (!LZX(window)) {
+    if (!(LZX(window) = malloc(wndsize))) return DECR_NOMEMORY;
+    LZX(actual_size) = wndsize;
+  }
+  LZX(window_size) = wndsize;
+
+  /* initialise static tables */
+  for (i=0, j=0; i <= 50; i += 2) {
+    extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */
+    if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
+  }
+  for (i=0, j=0; i <= 50; i++) {
+    lzx_position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
+    j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
+  }
+
+  /* calculate required position slots */
+       if (window == 20) posn_slots = 42;
+  else if (window == 21) posn_slots = 50;
+  else posn_slots = window << 1;
+
+  /*posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
+  
+
+  LZX(R0)  =  LZX(R1)  = LZX(R2) = 1;
+  LZX(main_elements)   = LZX_NUM_CHARS + (posn_slots << 3);
+  LZX(header_read)     = 0;
+  LZX(frames_read)     = 0;
+  LZX(block_remaining) = 0;
+  LZX(block_type)      = LZX_BLOCKTYPE_INVALID;
+  LZX(intel_curpos)    = 0;
+  LZX(intel_started)   = 0;
+  LZX(window_posn)     = 0;
+
+  /* initialise tables to 0 (because deltas will be applied to them) */
+  for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) LZX(MAINTREE_len)[i] = 0;
+  for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   LZX(LENGTH_len)[i]   = 0;
+
+  return DECR_OK;
+}
+
+/* Bitstream reading macros (LZX / intel little-endian byte order)
+ *
+ * INIT_BITSTREAM    should be used first to set up the system
+ * READ_BITS(var,n)  takes N bits from the buffer and puts them in var
+ *
+ * ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer.
+ *                   it can guarantee up to 17 bits (i.e. it can read in
+ *                   16 new bits when there is down to 1 bit in the buffer,
+ *                   and it can read 32 bits when there are 0 bits in the
+ *                   buffer).
+ * PEEK_BITS(n)      extracts (without removing) N bits from the bit buffer
+ * REMOVE_BITS(n)    removes N bits from the bit buffer
+ *
+ * These bit access routines work by using the area beyond the MSB and the
+ * LSB as a free source of zeroes. This avoids having to mask any bits.
+ * So we have to know the bit width of the bitbuffer variable.
+ */
+
+#define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)
+
+/* Quantum reads bytes in normal order; LZX is little-endian order */
+#define ENSURE_BITS(n)                                                  \
+  while (bitsleft < (n)) {					        \
+    bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft);	\
+    bitsleft += 16; inpos+=2;						\
+  }
+
+#define PEEK_BITS(n)   (bitbuf >> (ULONG_BITS - (n)))
+#define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n)))
+
+#define READ_BITS(v,n) do {						\
+  if (n) {								\
+    ENSURE_BITS(n);							\
+    (v) = PEEK_BITS(n);							\
+    REMOVE_BITS(n);							\
+  }									\
+  else {								\
+    (v) = 0;								\
+  }									\
+} while (0)
+
+/* Huffman macros */
+
+#define TABLEBITS(tbl)   (LZX_##tbl##_TABLEBITS)
+#define MAXSYMBOLS(tbl)  (LZX_##tbl##_MAXSYMBOLS)
+#define SYMTABLE(tbl)    (LZX(tbl##_table))
+#define LENTABLE(tbl)    (LZX(tbl##_len))
+
+/* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths.
+ * In reality, it just calls make_decode_table() with the appropriate
+ * values - they're all fixed by some #defines anyway, so there's no point
+ * writing each call out in full by hand.
+ */
+#define BUILD_TABLE(tbl)						\
+  if (make_decode_table(						\
+    MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl)	\
+  )) { return DECR_ILLEGALDATA; }
+
+
+/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
+ * bitstream using the stated table and puts it in var.
+ */
+#define READ_HUFFSYM(tbl,var) do {					\
+  ENSURE_BITS(16);							\
+  hufftbl = SYMTABLE(tbl);						\
+  if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) {	\
+    j = 1 << (ULONG_BITS - TABLEBITS(tbl));				\
+    do {								\
+      j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0;			\
+      if (!j) { return DECR_ILLEGALDATA; }	                        \
+    } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl));			\
+  }									\
+  j = LENTABLE(tbl)[(var) = i];						\
+  REMOVE_BITS(j);							\
+} while (0)
+
+
+/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
+ * first to last in the given table. The code lengths are stored in their
+ * own special LZX way.
+ */
+#define READ_LENGTHS(tbl,first,last) do { \
+  lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \
+  if (lzx_read_lens(LENTABLE(tbl),(first),(last),&lb)) { \
+    return DECR_ILLEGALDATA; \
+  } \
+  bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \
+} while (0)
+
+
+/* make_decode_table(nsyms, nbits, length[], table[])
+ *
+ * This function was coded by David Tritscher. It builds a fast huffman
+ * decoding table out of just a canonical huffman code lengths table.
+ *
+ * nsyms  = total number of symbols in this huffman tree.
+ * nbits  = any symbols with a code length of nbits or less can be decoded
+ *          in one lookup of the table.
+ * length = A table to get code lengths from [0 to syms-1]
+ * table  = The table to fill up with decoded symbols and pointers.
+ *
+ * Returns 0 for OK or 1 for error
+ */
+
+int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) {
+  register UWORD sym;
+  register ULONG leaf;
+  register UBYTE bit_num = 1;
+  ULONG fill;
+  ULONG pos         = 0; /* the current position in the decode table */
+  ULONG table_mask  = 1 << nbits;
+  ULONG bit_mask    = table_mask >> 1; /* don't do 0 length codes */
+  ULONG next_symbol = bit_mask; /* base of allocation for long codes */
+
+  /* fill entries for codes short enough for a direct mapping */
+  while (bit_num <= nbits) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] == bit_num) {
+        leaf = pos;
+
+        if((pos += bit_mask) > table_mask) return 1; /* table overrun */
+
+        /* fill all possible lookups of this symbol with the symbol itself */
+        fill = bit_mask;
+        while (fill-- > 0) table[leaf++] = sym;
+      }
+    }
+    bit_mask >>= 1;
+    bit_num++;
+  }
+
+  /* if there are any codes longer than nbits */
+  if (pos != table_mask) {
+    /* clear the remainder of the table */
+    for (sym = pos; sym < table_mask; sym++) table[sym] = 0;
+
+    /* give ourselves room for codes to grow by up to 16 more bits */
+    pos <<= 16;
+    table_mask <<= 16;
+    bit_mask = 1 << 15;
+
+    while (bit_num <= 16) {
+      for (sym = 0; sym < nsyms; sym++) {
+        if (length[sym] == bit_num) {
+          leaf = pos >> 16;
+          for (fill = 0; fill < bit_num - nbits; fill++) {
+            /* if this path hasn't been taken yet, 'allocate' two entries */
+            if (table[leaf] == 0) {
+              table[(next_symbol << 1)] = 0;
+              table[(next_symbol << 1) + 1] = 0;
+              table[leaf] = next_symbol++;
+            }
+            /* follow the path and select either left or right for next bit */
+            leaf = table[leaf] << 1;
+            if ((pos >> (15-fill)) & 1) leaf++;
+          }
+          table[leaf] = sym;
+
+          if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
+        }
+      }
+      bit_mask >>= 1;
+      bit_num++;
+    }
+  }
+
+  /* full table? */
+  if (pos == table_mask) return 0;
+
+  /* either erroneous table, or all elements are 0 - let's find out. */
+  for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
+  return 0;
+}
+
+struct lzx_bits {
+  ULONG bb;
+  int bl;
+  UBYTE *ip;
+};
+
+int lzx_read_lens(UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) {
+  ULONG i,j, x,y;
+  int z;
+
+  register ULONG bitbuf = lb->bb;
+  register int bitsleft = lb->bl;
+  UBYTE *inpos = lb->ip;
+  UWORD *hufftbl;
+  
+  for (x = 0; x < 20; x++) {
+    READ_BITS(y, 4);
+    LENTABLE(PRETREE)[x] = y;
+  }
+  BUILD_TABLE(PRETREE);
+
+  for (x = first; x < last; ) {
+    READ_HUFFSYM(PRETREE, z);
+    if (z == 17) {
+      READ_BITS(y, 4); y += 4;
+      while (y--) lens[x++] = 0;
+    }
+    else if (z == 18) {
+      READ_BITS(y, 5); y += 20;
+      while (y--) lens[x++] = 0;
+    }
+    else if (z == 19) {
+      READ_BITS(y, 1); y += 4;
+      READ_HUFFSYM(PRETREE, z);
+      z = lens[x] - z; if (z < 0) z += 17;
+      while (y--) lens[x++] = z;
+    }
+    else {
+      z = lens[x] - z; if (z < 0) z += 17;
+      lens[x++] = z;
+    }
+  }
+
+  lb->bb = bitbuf;
+  lb->bl = bitsleft;
+  lb->ip = inpos;
+  return 0;
+}
+
+int LZXdecompress(int inlen, int outlen) {
+  UBYTE *inpos  = CAB(inbuf);
+  UBYTE *endinp = inpos + inlen;
+  UBYTE *window = LZX(window);
+  UBYTE *runsrc, *rundest;
+  UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */
+
+  ULONG window_posn = LZX(window_posn);
+  ULONG window_size = LZX(window_size);
+  ULONG R0 = LZX(R0);
+  ULONG R1 = LZX(R1);
+  ULONG R2 = LZX(R2);
+
+  register ULONG bitbuf;
+  register int bitsleft;
+  ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */
+  struct lzx_bits lb; /* used in READ_LENGTHS macro */
+
+  int togo = outlen, this_run, main_element, aligned_bits;
+  int match_length, copy_length, length_footer, extra, verbatim_bits;
+
+  INIT_BITSTREAM;
+
+  /* read header if necessary */
+  if (!LZX(header_read)) {
+    i = j = 0;
+    READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); }
+    LZX(intel_filesize) = (i << 16) | j; /* or 0 if not encoded */
+    LZX(header_read) = 1;
+  }
+
+  /* main decoding loop */
+  while (togo > 0) {
+    /* last block finished, new block expected */
+    if (LZX(block_remaining) == 0) {
+      if (LZX(block_type) == LZX_BLOCKTYPE_UNCOMPRESSED) {
+        if (LZX(block_length) & 1) inpos++; /* realign bitstream to word */
+        INIT_BITSTREAM;
+      }
+
+      READ_BITS(LZX(block_type), 3);
+      READ_BITS(i, 16);
+      READ_BITS(j, 8);
+      LZX(block_remaining) = LZX(block_length) = (i << 8) | j;
+
+      switch (LZX(block_type)) {
+      case LZX_BLOCKTYPE_ALIGNED:
+        for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; }
+        BUILD_TABLE(ALIGNED);
+        /* rest of aligned header is same as verbatim */
+
+      case LZX_BLOCKTYPE_VERBATIM:
+        READ_LENGTHS(MAINTREE, 0, 256);
+        READ_LENGTHS(MAINTREE, 256, LZX(main_elements));
+        BUILD_TABLE(MAINTREE);
+        if (LENTABLE(MAINTREE)[0xE8] != 0) LZX(intel_started) = 1;
+
+        READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
+        BUILD_TABLE(LENGTH);
+        break;
+
+      case LZX_BLOCKTYPE_UNCOMPRESSED:
+        LZX(intel_started) = 1; /* because we can't assume otherwise */
+        ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */
+        if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */
+        R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
+        R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
+        R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
+        break;
+
+      default:
+        return DECR_ILLEGALDATA;
+      }
+    }
+
+    /* buffer exhaustion check */
+    if (inpos > endinp) {
+      /* it's possible to have a file where the next run is less than
+       * 16 bits in size. In this case, the READ_HUFFSYM() macro used
+       * in building the tables will exhaust the buffer, so we should
+       * allow for this, but not allow those accidentally read bits to
+       * be used (so we check that there are at least 16 bits
+       * remaining - in this boundary case they aren't really part of
+       * the compressed data)
+       */
+      if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA;
+    }
+
+    while ((this_run = LZX(block_remaining)) > 0 && togo > 0) {
+      if (this_run > togo) this_run = togo;
+      togo -= this_run;
+      LZX(block_remaining) -= this_run;
+
+      /* apply 2^x-1 mask */
+      window_posn &= window_size - 1;
+      /* runs can't straddle the window wraparound */
+      if ((window_posn + this_run) > window_size)
+        return DECR_DATAFORMAT;
+
+      switch (LZX(block_type)) {
+
+      case LZX_BLOCKTYPE_VERBATIM:
+        while (this_run > 0) {
+          READ_HUFFSYM(MAINTREE, main_element);
+
+          if (main_element < LZX_NUM_CHARS) {
+            /* literal: 0 to LZX_NUM_CHARS-1 */
+            window[window_posn++] = main_element;
+            this_run--;
+          }
+          else {
+            /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+            main_element -= LZX_NUM_CHARS;
+  
+            match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+            if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+              READ_HUFFSYM(LENGTH, length_footer);
+              match_length += length_footer;
+            }
+            match_length += LZX_MIN_MATCH;
+  
+            match_offset = main_element >> 3;
+  
+            if (match_offset > 2) {
+              /* not repeated offset */
+              if (match_offset != 3) {
+                extra = extra_bits[match_offset];
+                READ_BITS(verbatim_bits, extra);
+                match_offset = lzx_position_base[match_offset] 
+                               - 2 + verbatim_bits;
+              }
+              else {
+                match_offset = 1;
+              }
+  
+              /* update repeated offset LRU queue */
+              R2 = R1; R1 = R0; R0 = match_offset;
+            }
+            else if (match_offset == 0) {
+              match_offset = R0;
+            }
+            else if (match_offset == 1) {
+              match_offset = R1;
+              R1 = R0; R0 = match_offset;
+            }
+            else /* match_offset == 2 */ {
+              match_offset = R2;
+              R2 = R0; R0 = match_offset;
+            }
+
+            rundest = window + window_posn;
+            this_run -= match_length;
+
+            /* copy any wrapped around source data */
+            if (window_posn >= match_offset) {
+	      /* no wrap */
+              runsrc = rundest - match_offset;
+            } else {
+              runsrc = rundest + (window_size - match_offset);
+              copy_length = match_offset - window_posn;
+              if (copy_length < match_length) {
+                match_length -= copy_length;
+                window_posn += copy_length;
+                while (copy_length-- > 0) *rundest++ = *runsrc++;
+                runsrc = window;
+              }
+            }
+            window_posn += match_length;
+
+            /* copy match data - no worries about destination wraps */
+            while (match_length-- > 0) *rundest++ = *runsrc++;
+          }
+        }
+        break;
+
+      case LZX_BLOCKTYPE_ALIGNED:
+        while (this_run > 0) {
+          READ_HUFFSYM(MAINTREE, main_element);
+  
+          if (main_element < LZX_NUM_CHARS) {
+            /* literal: 0 to LZX_NUM_CHARS-1 */
+            window[window_posn++] = main_element;
+            this_run--;
+          }
+          else {
+            /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+            main_element -= LZX_NUM_CHARS;
+  
+            match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+            if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+              READ_HUFFSYM(LENGTH, length_footer);
+              match_length += length_footer;
+            }
+            match_length += LZX_MIN_MATCH;
+  
+            match_offset = main_element >> 3;
+  
+            if (match_offset > 2) {
+              /* not repeated offset */
+              extra = extra_bits[match_offset];
+              match_offset = lzx_position_base[match_offset] - 2;
+              if (extra > 3) {
+                /* verbatim and aligned bits */
+                extra -= 3;
+                READ_BITS(verbatim_bits, extra);
+                match_offset += (verbatim_bits << 3);
+                READ_HUFFSYM(ALIGNED, aligned_bits);
+                match_offset += aligned_bits;
+              }
+              else if (extra == 3) {
+                /* aligned bits only */
+                READ_HUFFSYM(ALIGNED, aligned_bits);
+                match_offset += aligned_bits;
+              }
+              else if (extra > 0) { /* extra==1, extra==2 */
+                /* verbatim bits only */
+                READ_BITS(verbatim_bits, extra);
+                match_offset += verbatim_bits;
+              }
+              else /* extra == 0 */ {
+                /* ??? */
+                match_offset = 1;
+              }
+  
+              /* update repeated offset LRU queue */
+              R2 = R1; R1 = R0; R0 = match_offset;
+            }
+            else if (match_offset == 0) {
+              match_offset = R0;
+            }
+            else if (match_offset == 1) {
+              match_offset = R1;
+              R1 = R0; R0 = match_offset;
+            }
+            else /* match_offset == 2 */ {
+              match_offset = R2;
+              R2 = R0; R0 = match_offset;
+            }
+
+            rundest = window + window_posn;
+            this_run -= match_length;
+
+            /* copy any wrapped around source data */
+            if (window_posn >= match_offset) {
+	      /* no wrap */
+              runsrc = rundest - match_offset;
+            } else {
+              runsrc = rundest + (window_size - match_offset);
+              copy_length = match_offset - window_posn;
+              if (copy_length < match_length) {
+                match_length -= copy_length;
+                window_posn += copy_length;
+                while (copy_length-- > 0) *rundest++ = *runsrc++;
+                runsrc = window;
+              }
+            }
+            window_posn += match_length;
+
+            /* copy match data - no worries about destination wraps */
+            while (match_length-- > 0) *rundest++ = *runsrc++;
+          }
+        }
+        break;
+
+      case LZX_BLOCKTYPE_UNCOMPRESSED:
+        if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA;
+        memcpy(window + window_posn, inpos, (size_t) this_run);
+        inpos += this_run; window_posn += this_run;
+        break;
+
+      default:
+        return DECR_ILLEGALDATA; /* might as well */
+      }
+
+    }
+  }
+
+  if (togo != 0) return DECR_ILLEGALDATA;
+  memcpy(CAB(outbuf), window + ((!window_posn) ? window_size : window_posn) -
+    outlen, (size_t) outlen);
+
+  LZX(window_posn) = window_posn;
+  LZX(R0) = R0;
+  LZX(R1) = R1;
+  LZX(R2) = R2;
+
+  /* intel E8 decoding */
+  if ((LZX(frames_read)++ < 32768) && LZX(intel_filesize) != 0) {
+    if (outlen <= 6 || !LZX(intel_started)) {
+      LZX(intel_curpos) += outlen;
+    }
+    else {
+      UBYTE *data    = CAB(outbuf);
+      UBYTE *dataend = data + outlen - 10;
+      LONG curpos    = LZX(intel_curpos);
+      LONG filesize  = LZX(intel_filesize);
+      LONG abs_off, rel_off;
+
+      LZX(intel_curpos) = curpos + outlen;
+
+      while (data < dataend) {
+        if (*data++ != 0xE8) { curpos++; continue; }
+        abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
+        if ((abs_off >= -curpos) && (abs_off < filesize)) {
+          rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
+          data[0] = (UBYTE) rel_off;
+          data[1] = (UBYTE) (rel_off >> 8);
+          data[2] = (UBYTE) (rel_off >> 16);
+          data[3] = (UBYTE) (rel_off >> 24);
+        }
+        data += 4;
+        curpos += 5;
+      }
+    }
+  }
+  return DECR_OK;
+}
+
+
+
+
+/* all the file IO is abstracted into these routines:
+ * cabinet_(open|close|read|seek|skip|getoffset)
+ * file_(open|close|write)
+ */
+
+/* opens a file for output, returns success */
+int file_open(struct file *fi) {
+  int ok = 0;
+
+  fi->fh = fopen("fontfile.ttf", "wb");
+  if (fi->fh) ok = 1;
+
+  if (!ok) {
+    perror(fi->filename);
+  }
+
+  return ok;
+}
+
+/* closes a completed file, updates protections and timestamp */
+void file_close(struct file *fi) {
+  if (fi->fh) {
+    fclose(fi->fh);
+  }
+  fi->fh = NULL;
+}
+
+int file_write(struct file *fi, UBYTE *buf, size_t length) {
+  if (fwrite((void *)buf, 1, length, fi->fh) != length) {
+    perror(fi->filename);
+    return 0;
+  }
+  return 1;
+}
+
+
+/* don't actuall close the fd since it's stdin.  :-) */
+void cabinet_close(struct cabinet *cab) {
+  cab->fh = NULL;
+}
+
+void cabinet_seek(struct cabinet *cab, off_t offset) {
+  if (fseek(cab->fh, offset, SEEK_SET) < 0) {
+    perror(cab->filename);
+  }
+}
+
+void cabinet_skip(struct cabinet *cab, off_t distance) {
+  if (fseek(cab->fh, distance, SEEK_CUR) < 0) {
+    perror(cab->filename);
+  }
+}
+
+off_t cabinet_getoffset(struct cabinet *cab) {
+  return ftell(cab->fh);
+}
+
+/* read data from a cabinet, returns success */
+int cabinet_read(struct cabinet *cab, UBYTE *buf, size_t length) {
+  size_t avail = (size_t) (cab->filelen - cabinet_getoffset(cab));
+  if (length > avail) {
+    fprintf(stderr, "%s: WARNING; cabinet is truncated\n", cab->filename);
+    length = avail;
+  }
+  if (fread((void *)buf, 1, length, cab->fh) != length) {
+    perror(cab->filename);
+    return 0;
+  }
+  return 1;
+}
+
+/* try to open a cabinet file, returns success */
+/* doesn't actually do much since stdin is already open */
+int cabinet_open(struct cabinet *cab) {
+  /* seek to end of file */
+  fseek(stdin, 0, SEEK_END);
+
+  /* get length of file */
+  cab->filelen = ftell(stdin);
+
+  /* return to the start of the file */
+  fseek(stdin, 0, SEEK_SET);
+
+  cab->fh = stdin;
+  return 1;
+}
+
+/* allocate and read an aribitrarily long string from the cabinet */
+char *cabinet_read_string(struct cabinet *cab) {
+  off_t len=256, base = cabinet_getoffset(cab), maxlen = cab->filelen - base;
+  int ok = 0, i;
+  UBYTE *buf = NULL;
+  do {
+    if (len > maxlen) len = maxlen;
+    if (!(buf = realloc(buf, (size_t) len))) break;
+    if (!cabinet_read(cab, buf, (size_t) len)) break;
+
+    /* search for a null terminator in what we've just read */
+    for (i=0; i < len; i++) {
+      if (!buf[i]) {ok=1; break;}
+    }
+
+    if (!ok) {
+      if (len == maxlen) {
+        fprintf(stderr, "%s: WARNING; cabinet is truncated\n", cab->filename);
+        break;
+      }
+      len += 256;
+      cabinet_seek(cab, base);
+    }
+  } while (!ok);
+
+  if (!ok) {
+    if (buf) free(buf); else fprintf(stderr, "out of memory!\n");
+    return NULL;
+  }
+
+  /* otherwise, set the stream to just after the string and return */
+  cabinet_seek(cab, base + ((off_t) strlen((char *) buf)) + 1);
+  return (char *) buf;
+}
+
+/* reads the header and all folder and file entries in this cabinet */
+int cabinet_read_entries(struct cabinet *cab) {
+  int num_folders, num_files, header_resv, folder_resv = 0, i;
+  struct folder *fol, *linkfol = NULL;
+  struct file *file, *linkfile = NULL;
+  off_t base_offset;
+  UBYTE buf[64];
+
+  /* read in the CFHEADER */
+  base_offset = cabinet_getoffset(cab);
+  if (!cabinet_read(cab, buf, cfhead_SIZEOF)) {
+    return 0;
+  }
+  
+  /* check basic MSCF signature */
+  if (EndGetI32(buf+cfhead_Signature) != 0x4643534d) {
+    fprintf(stderr, "%s: not a Microsoft cabinet file\n", cab->filename);
+    return 0;
+  }
+
+  /* get the number of folders */
+  num_folders = EndGetI16(buf+cfhead_NumFolders);
+  if (num_folders == 0) {
+    fprintf(stderr, "%s: no folders in cabinet\n", cab->filename);
+    return 0;
+  }
+
+  /* get the number of files */
+  num_files = EndGetI16(buf+cfhead_NumFiles);
+  if (num_files == 0) {
+    fprintf(stderr, "%s: no files in cabinet\n", cab->filename);
+    return 0;
+  }
+
+  /* just check the header revision */
+  if ((buf[cfhead_MajorVersion] > 1) ||
+      (buf[cfhead_MajorVersion] == 1 && buf[cfhead_MinorVersion] > 3))
+  {
+    fprintf(stderr, "%s: WARNING; cabinet format version > 1.3\n",
+	    cab->filename);
+  }
+
+  /* read the reserved-sizes part of header, if present */
+  cab->flags = EndGetI16(buf+cfhead_Flags);
+  if (cab->flags & cfheadRESERVE_PRESENT) {
+    if (!cabinet_read(cab, buf, cfheadext_SIZEOF)) return 0;
+    header_resv     = EndGetI16(buf+cfheadext_HeaderReserved);
+    folder_resv     = buf[cfheadext_FolderReserved];
+    cab->block_resv = buf[cfheadext_DataReserved];
+
+    if (header_resv > 60000) {
+      fprintf(stderr, "%s: WARNING; header reserved space > 60000\n",
+	      cab->filename);
+    }
+
+    /* skip the reserved header */
+    if (header_resv) fseek(cab->fh, (off_t) header_resv, SEEK_CUR);
+  }
+
+  if (cab->flags & cfheadPREV_CABINET) {
+    cab->prevname = cabinet_read_string(cab);
+    if (!cab->prevname) return 0;
+    cab->previnfo = cabinet_read_string(cab);
+  }
+
+  if (cab->flags & cfheadNEXT_CABINET) {
+    cab->nextname = cabinet_read_string(cab);
+    if (!cab->nextname) return 0;
+    cab->nextinfo = cabinet_read_string(cab);
+  }
+
+  /* read folders */
+  for (i = 0; i < num_folders; i++) {
+    if (!cabinet_read(cab, buf, cffold_SIZEOF)) return 0;
+    if (folder_resv) cabinet_skip(cab, folder_resv);
+
+    fol = (struct folder *) calloc(1, sizeof(struct folder));
+    if (!fol) { fprintf(stderr, "out of memory!\n"); return 0; }
+
+    fol->cab[0]     = cab;
+    fol->offset[0]  = base_offset + (off_t) EndGetI32(buf+cffold_DataOffset);
+    fol->num_blocks = EndGetI16(buf+cffold_NumBlocks);
+    fol->comp_type  = EndGetI16(buf+cffold_CompType);
+
+    if (!linkfol) cab->folders = fol; else linkfol->next = fol;
+    linkfol = fol;
+  }
+
+  /* read files */
+  for (i = 0; i < num_files; i++) {
+    if (!cabinet_read(cab, buf, cffile_SIZEOF)) return 0;
+    file = (struct file *) calloc(1, sizeof(struct file));
+    if (!file) { fprintf(stderr, "out of memory!\n"); return 0; }
+      
+    file->length   = EndGetI32(buf+cffile_UncompressedSize);
+    file->offset   = EndGetI32(buf+cffile_FolderOffset);
+    file->index    = EndGetI16(buf+cffile_FolderIndex);
+    file->time     = EndGetI16(buf+cffile_Time);
+    file->date     = EndGetI16(buf+cffile_Date);
+    file->attribs  = EndGetI16(buf+cffile_Attribs);
+    file->filename = cabinet_read_string(cab);
+    if (!file->filename) return 0;
+    if (!linkfile) cab->files = file; else linkfile->next = file;
+    linkfile = file;
+  }
+  return 1;
+}
+
+
+/* this does the tricky job of running through every file in the cabinet,
+ * including spanning cabinets, and working out which file is in which
+ * folder in which cabinet. It also throws out the duplicate file entries
+ * that appear in spanning cabinets. There is memory leakage here because
+ * those entries are not freed. See the XAD CAB client for an
+ * implementation of this that correctly frees the discarded file entries.
+ */
+struct file *process_files(struct cabinet *basecab) {
+  struct cabinet *cab;
+  struct file *outfi = NULL, *linkfi = NULL, *nextfi, *fi, *cfi;
+  struct folder *fol, *firstfol, *lastfol = NULL, *predfol;
+  int i, mergeok;
+
+  for (cab = basecab; cab; cab = cab->nextcab) {
+    /* firstfol = first folder in this cabinet */
+    /* lastfol  = last folder in this cabinet */
+    /* predfol  = last folder in previous cabinet (or NULL if first cabinet) */
+    predfol = lastfol;
+    firstfol = cab->folders;
+    for (lastfol = firstfol; lastfol->next;) lastfol = lastfol->next;
+    mergeok = 1;
+
+    for (fi = cab->files; fi; fi = nextfi) {
+      i = fi->index;
+      nextfi = fi->next;
+
+      if (i < cffileCONTINUED_FROM_PREV) {
+        for (fol = firstfol; fol && i--; ) fol = fol->next;
+        fi->folder = fol; /* NULL if an invalid folder index */
+      }
+      else {
+        /* folder merging */
+        if (i == cffileCONTINUED_TO_NEXT
+        ||  i == cffileCONTINUED_PREV_AND_NEXT) {
+          if (cab->nextcab && !lastfol->contfile) lastfol->contfile = fi;
+        }
+
+        if (i == cffileCONTINUED_FROM_PREV
+        ||  i == cffileCONTINUED_PREV_AND_NEXT) {
+          /* these files are to be continued in yet another
+           * cabinet, don't merge them in just yet */
+          if (i == cffileCONTINUED_PREV_AND_NEXT) mergeok = 0;
+
+          /* only merge once per cabinet */
+          if (predfol) {
+            if ((cfi = predfol->contfile)
+            && (cfi->offset == fi->offset)
+            && (cfi->length == fi->length)
+            && (strcmp(cfi->filename, fi->filename) == 0)
+            && (predfol->comp_type == firstfol->comp_type)) {
+              /* increase the number of splits */
+              if ((i = ++(predfol->num_splits)) > CAB_SPLITMAX) {
+                mergeok = 0;
+                fprintf(stderr, "%s: internal error, increase CAB_SPLITMAX\n",
+                  basecab->filename);
+              }
+              else {
+                /* copy information across from the merged folder */
+                predfol->offset[i] = firstfol->offset[0];
+                predfol->cab[i]    = firstfol->cab[0];
+                predfol->next      = firstfol->next;
+                predfol->contfile  = firstfol->contfile;
+
+                if (firstfol == lastfol) lastfol = predfol;
+                firstfol = predfol;
+                predfol = NULL; /* don't merge again within this cabinet */
+              }
+            }
+            else {
+              /* if the folders won't merge, don't add their files */
+              mergeok = 0;
+            }
+          }
+
+          if (mergeok) fi->folder = firstfol;
+        }
+      }
+
+      if (fi->folder) {
+        if (linkfi) linkfi->next = fi; else outfi = fi;
+        linkfi = fi;
+      }
+    } /* for (fi= .. */
+  } /* for (cab= ...*/
+
+  return outfi;
+}
+
+/* validates and reads file entries from a cabinet at offset [offset] in
+ * file [name]. Returns a cabinet structure if successful, or NULL
+ * otherwise.
+ */
+struct cabinet *load_cab_offset(char *name, off_t offset) {
+  struct cabinet *cab = (struct cabinet *) calloc(1, sizeof(struct cabinet));
+  int ok;
+  if (!cab) return NULL;
+
+  cab->filename = name;
+  if ((ok = cabinet_open(cab))) {
+    cabinet_seek(cab, offset);
+    ok = cabinet_read_entries(cab);
+    cabinet_close(cab);
+  }
+
+  if (ok) return cab;
+  free(cab);
+  return NULL;
+}
+
+/* Searches a file for embedded cabinets (also succeeds on just normal
+ * cabinet files). The first result of this search will be returned, and
+ * the remaining results will be chained to it via the cab->next structure
+ * member.
+ */
+#define SEARCH_SIZE (32*1024)
+UBYTE search_buf[SEARCH_SIZE];
+
+struct cabinet *find_cabs_in_file() {
+  struct cabinet *cab, *cab2, *firstcab = NULL, *linkcab = NULL;
+  UBYTE *pstart = &search_buf[0], *pend, *p;
+  ULONG offset, caboff, cablen, foffset, filelen;
+  size_t length;
+  int state = 0, found = 0, ok = 0;
+
+char *name = "standard input";
+
+  /* open the file and search for cabinet headers */
+  if ((cab = (struct cabinet *) calloc(1, sizeof(struct cabinet)))) {
+    cab->filename = name;
+    if (cabinet_open(cab)) {
+      filelen = (ULONG) cab->filelen;
+      for (offset = 0; offset < filelen; offset += length) {
+	/* search length is either the full length of the search buffer,
+	 * or the amount of data remaining to the end of the file,
+	 * whichever is less.
+	 */
+	length = filelen - offset;
+	if (length > SEARCH_SIZE) length = SEARCH_SIZE;
+
+	/* fill the search buffer with data from disk */
+	if (!cabinet_read(cab, search_buf, length)) break;
+
+	/* read through the entire buffer. */
+	p = pstart;
+	pend = &search_buf[length];
+	while (p < pend) {
+	  switch (state) {
+	  /* starting state */
+	  case 0:
+	    /* we spend most of our time in this while loop, looking for
+	     * a leading 'M' of the 'MSCF' signature
+	     */
+	    while (*p++ != 0x4D && p < pend);
+	    if (p < pend) state = 1; /* if we found tht 'M', advance state */
+	    break;
+
+	  /* verify that the next 3 bytes are 'S', 'C' and 'F' */
+	  case 1: state = (*p++ == 0x53) ? 2 : 0; break;
+	  case 2: state = (*p++ == 0x43) ? 3 : 0; break;
+	  case 3: state = (*p++ == 0x46) ? 4 : 0; break;
+
+	  /* we don't care about bytes 4-7 */
+	  /* bytes 8-11 are the overall length of the cabinet */
+	  case 8:  cablen  = *p++;       state++; break;
+	  case 9:  cablen |= *p++ << 8;  state++; break;
+	  case 10: cablen |= *p++ << 16; state++; break;
+	  case 11: cablen |= *p++ << 24; state++; break;
+
+	  /* we don't care about bytes 12-15 */
+	  /* bytes 16-19 are the offset within the cabinet of the filedata */
+	  case 16: foffset  = *p++;       state++; break;
+	  case 17: foffset |= *p++ << 8;  state++; break;
+	  case 18: foffset |= *p++ << 16; state++; break;
+	  case 19: foffset |= *p++ << 24;
+	    /* now we have recieved 20 bytes of potential cab header. */
+	    /* work out the offset in the file of this potential cabinet */
+	    caboff = offset + (p-pstart) - 20;
+
+	    /* check that the files offset is less than the alleged length
+	     * of the cabinet, and that the offset + the alleged length are
+	     * 'roughly' within the end of overall file length
+	     */
+	    if ((foffset < cablen) &&
+		((caboff + foffset) < (filelen + 32)) &&
+		((caboff + cablen) < (filelen + 32)) )
+	    {
+	      /* found a potential result - try loading it */
+	      found++;
+	      cab2 = load_cab_offset(name, (off_t) caboff);
+	      if (cab2) {
+		/* success */
+		ok++;
+
+		/* cause the search to restart after this cab's data. */
+		offset = caboff + cablen;
+		if (offset < cab->filelen) cabinet_seek(cab, offset);
+		length = 0;
+		p = pend;
+
+		/* link the cab into the list */
+		if (linkcab == NULL) firstcab = cab2;
+		else linkcab->next = cab2;
+		linkcab = cab2;
+	      }
+	    }
+	    state = 0;
+	    break;
+	  default:
+	    p++, state++; break;
+	  }
+	}
+      }
+      cabinet_close(cab);
+    }
+    free(cab);
+  }
+
+  /* if there were cabinets that were found but are not ok, point this out */
+  if (found > ok) {
+    fprintf(stderr, "%s: WARNING; found %d bad cabinets\n", name, found-ok);
+  }
+
+  /* if no cabinets were found, let the user know */
+  if (!firstcab) {
+    fprintf(stderr, "%s: not a Microsoft cabinet file.\n", name);
+  }
+  return firstcab;
+}
+
+/* UTF translates two-byte unicode characters into 1, 2 or 3 bytes.
+ * %000000000xxxxxxx -> %0xxxxxxx
+ * %00000xxxxxyyyyyy -> %110xxxxx %10yyyyyy
+ * %xxxxyyyyyyzzzzzz -> %1110xxxx %10yyyyyy %10zzzzzz
+ *
+ * Therefore, the inverse is as follows:
+ * First char:
+ *  0x00 - 0x7F = one byte char
+ *  0x80 - 0xBF = invalid
+ *  0xC0 - 0xDF = 2 byte char (next char only 0x80-0xBF is valid)
+ *  0xE0 - 0xEF = 3 byte char (next 2 chars only 0x80-0xBF is valid)
+ *  0xF0 - 0xFF = invalid
+ */
+
+ULONG checksum(UBYTE *data, UWORD bytes, ULONG csum) {
+  int len;
+  ULONG ul = 0;
+
+  for (len = bytes >> 2; len--; data += 4) {
+    csum ^= ((data[0]) | (data[1]<<8) | (data[2]<<16) | (data[3]<<24));
+  }
+
+  switch (bytes & 3) {
+  case 3: ul |= *data++ << 16;
+  case 2: ul |= *data++ <<  8;
+  case 1: ul |= *data;
+  }
+  csum ^= ul;
+
+  return csum;
+}
+
+int decompress(struct file *fi, int savemode) {
+  ULONG bytes = savemode ? fi->length : fi->offset - CAB(offset);
+  struct cabinet *cab = CAB(current)->cab[CAB(split)];
+  UBYTE buf[cfdata_SIZEOF], *data;
+  UWORD inlen, len, outlen, cando;
+  ULONG cksum;
+  LONG err;
+
+  while (bytes > 0) {
+    /* cando = the max number of bytes we can do */
+    cando = CAB(outlen);
+    if (cando > bytes) cando = bytes;
+
+    /* if cando != 0 */
+    if (cando && savemode) file_write(fi, CAB(outpos), cando);
+
+    CAB(outpos) += cando;
+    CAB(outlen) -= cando;
+    bytes -= cando; if (!bytes) break;
+
+    /* we only get here if we emptied the output buffer */
+
+    /* read data header + data */
+    inlen = outlen = 0;
+    while (outlen == 0) {
+      /* read the block header, skip the reserved part */
+      if (!cabinet_read(cab, buf, cfdata_SIZEOF)) return DECR_INPUT;
+      cabinet_skip(cab, cab->block_resv);
+
+      /* we shouldn't get blocks over CAB_INPUTMAX in size */
+      data = CAB(inbuf) + inlen;
+      len = EndGetI16(buf+cfdata_CompressedSize);
+      inlen += len;
+      if (inlen > CAB_INPUTMAX) return DECR_INPUT;
+      if (!cabinet_read(cab, data, len)) return DECR_INPUT;
+
+      /* clear two bytes after read-in data */
+      data[len+1] = data[len+2] = 0;
+
+      /* perform checksum test on the block (if one is stored) */
+      cksum = EndGetI32(buf+cfdata_CheckSum);
+      if (cksum && cksum != checksum(buf+4, 4, checksum(data, len, 0))) {
+	/* checksum is wrong */
+	  return DECR_CHECKSUM;
+      }
+
+      /* outlen=0 means this block was part of a split block */
+      outlen = EndGetI16(buf+cfdata_UncompressedSize);
+      if (outlen == 0) {
+        cabinet_close(cab);
+        cab = CAB(current)->cab[++CAB(split)];
+        if (!cabinet_open(cab)) return DECR_INPUT;
+        cabinet_seek(cab, CAB(current)->offset[CAB(split)]);
+      }
+    }
+
+    /* decompress block */
+    if ((err = CAB(decompress)(inlen, outlen))) {
+	return err;
+    }
+
+    CAB(outlen) = outlen;
+    CAB(outpos) = CAB(outbuf);
+  }
+
+  return DECR_OK;
+}
+
+
+void extract_file(struct file *fi) {
+  struct folder *fol = fi->folder, *oldfol = CAB(current);
+  LONG err = DECR_OK;
+
+  /* is a change of folder needed? do we need to reset the current folder? */
+  if (fol != oldfol || fi->offset < CAB(offset)) {
+    UWORD comptype = fol->comp_type;
+    int ct1 = comptype & cffoldCOMPTYPE_MASK;
+    int ct2 = oldfol ? (oldfol->comp_type & cffoldCOMPTYPE_MASK) : 0;
+
+    /* if the archiver has changed, call the old archiver's free() function */
+    if (ct1 != ct2) {
+      switch (ct2) {
+      case cffoldCOMPTYPE_LZX:
+        if (LZX(window)) {
+	  free(LZX(window));
+	  LZX(window) = NULL;
+	}
+	break;
+      case cffoldCOMPTYPE_QUANTUM:
+        fprintf(stderr, "Fatal: Quantum compression scheme free() called, should never happen!\n");
+        exit(1);
+	break;
+      }
+    }
+
+    switch (ct1) {
+    case cffoldCOMPTYPE_LZX:
+      CAB(decompress) = LZXdecompress;
+      err = LZXinit((comptype >> 8) & 0x1f);
+      break;
+
+    case cffoldCOMPTYPE_NONE:
+    case cffoldCOMPTYPE_MSZIP:
+    case cffoldCOMPTYPE_QUANTUM:
+    default:
+      err = DECR_DATAFORMAT;
+    }
+    if (err) goto exit_handler;
+
+    /* initialisation OK, set current folder and reset offset */
+    if (oldfol) cabinet_close(oldfol->cab[CAB(split)]);
+    if (!cabinet_open(fol->cab[0])) goto exit_handler;
+    cabinet_seek(fol->cab[0], fol->offset[0]);
+    CAB(current) = fol;
+    CAB(offset) = 0;
+    CAB(outlen) = 0; /* discard existing block */
+    CAB(split)  = 0;
+  }
+
+  if (fi->offset > CAB(offset)) {
+    /* decode bytes and send them to /dev/null */
+    if ((err = decompress(fi, 0))) goto exit_handler;
+    CAB(offset) = fi->offset;
+  }
+  if (!file_open(fi)) return;
+  err = decompress(fi, 1);
+  if (err) CAB(current) = NULL; else CAB(offset) += fi->length;
+  file_close(fi);
+
+exit_handler:
+  if (err) {
+    char *errmsg, *cabname;
+    switch (err) {
+    case DECR_NOMEMORY:
+      errmsg = "out of memory!\n"; break;
+    case DECR_ILLEGALDATA:
+      errmsg = "%s: illegal or corrupt data\n"; break;
+    case DECR_DATAFORMAT:
+      errmsg = "%s: unsupported data format\n"; break;
+    case DECR_CHECKSUM:
+      errmsg = "%s: checksum error\n"; break;
+    case DECR_INPUT:
+      errmsg = "%s: input error\n"; break;
+    case DECR_OUTPUT:
+      errmsg = "%s: output error\n"; break;
+    default:
+      errmsg = "%s: unknown error (BUG)\n";
+    }
+
+    if (CAB(current)) {
+      cabname = CAB(current)->cab[CAB(split)]->filename;
+    }
+    else {
+      cabname = fi->folder->cab[0]->filename;
+    }
+
+    fprintf(stderr, errmsg, cabname);
+  }
+}
+
+int extract_file_from_cab(char *filename) {
+  int found=0;
+  struct cabinet *basecab, *cab, *cab1, *cab2;
+  struct file *filelist, *fi;
+
+  /* load the file requested */
+  basecab = find_cabs_in_file();
+  if (!basecab) return 1;
+
+  /* iterate over all cabinets found in that file */
+  for (cab = basecab; cab; cab=cab->next) {
+
+    /* bi-directionally load any spanning cabinets -- backwards */
+    for (cab1 = cab; cab1->flags & cfheadPREV_CABINET; cab1 = cab1->prevcab) {
+      cab1->prevcab->nextcab = cab1;
+    }
+
+    /* bi-directionally load any spanning cabinets -- forwards */
+    for (cab2 = cab; cab2->flags & cfheadNEXT_CABINET; cab2 = cab2->nextcab) {
+      cab2->nextcab->prevcab = cab2;
+    }
+
+    filelist = process_files(cab1);
+    CAB(current) = NULL;
+
+    for (fi = filelist; fi; fi = fi->next) {
+      if(strcmp(fi->filename, filename) == 0)
+        {
+	extract_file(fi);
+        found = 1;
+        }
+    }
+  }
+
+ return found;
+}
+
+
+/*
+vm.setUserInfo(0, baseAddr);
+vm.setUserInfo(1, cabstream.length);
+vm.setUserInfo(2, theFontName);
 */
 
 int main(int argc, char** argv) {
-  // FIXME: do some cool stuff here
+   return extract_file_from_cab(_user_info[2]);
 }
+
Index: src/org/xwt/translators/MSPack.java
===================================================================
RCS file: /cvs/xwt/src/org/xwt/translators/MSPack.java,v
retrieving revision 1.1
diff -u -d -B -u -d -r1.1 MSPack.java
--- src/org/xwt/translators/MSPack.java	27 Sep 2003 23:43:30 -0000	1.1
+++ src/org/xwt/translators/MSPack.java	29 Sep 2003 19:50:12 -0000
@@ -29,6 +29,7 @@
             vm.copyout(cabstream, baseAddr, cabstream.length);
             vm.setUserInfo(0, baseAddr);
             vm.setUserInfo(1, cabstream.length);
+//            vm.setUserInfo(2, theFontName);
             
             vm.execute();
             // FIXME: do more stuff here


_______________________________________________
patches mailing list
patches@lists.xwt.org
http://lists.xwt.org/listinfo/patches
[prev in list] [next in list] [prev in thread] [next in thread]
Configure | About | News | Add a list | Sponsored by KoreLogic