[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    l10n-support/scripts
From:       Chusslove Illich <caslav.ilic () gmx ! net>
Date:       2009-10-10 21:05:34
Message-ID: 1255208734.232164.21790.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 1033709 by ilic:

New format of compiled pmaps (01). Code for old format still inside, but not used.

 M  +99 -24    ts-pmap-compile.py  


--- trunk/l10n-support/scripts/ts-pmap-compile.py #1033708:1033709
@@ -55,7 +55,7 @@
                     if i >= slen: raise END_PROP_PARSE
 
                 if i + 1 >= slen:
-                    error("unexpected end of file %1" % fname)
+                    error("unexpected end of file %s" % fname)
 
                 if s[i] != '#':
                     # Separator characters for this entry.
@@ -144,60 +144,135 @@
 
     except END_PROP_PARSE:
         if state != s_nextEntry:
-            error("unexpected end of file in %1" % fname)
+            error("unexpected end of file in %s" % fname)
 
     return pmap
 
 
-def write_map_bin (fname, pmap):
+# Convert integer to 32-bit big-endian byte sequence.
+def int_bin_32 (val):
+    return struct.pack(">i", val)[-4:]
 
-    fh = open(fname, "wb")
 
-    # Output integer value, big-endian, 32-bit.
-    maxb = 4
-    maxval = 2**(maxb*8)
-    def write_int_bin (val):
-        if val >= maxval:
-            error("too large number, must be less then %d" % maxval)
-        fh.write(struct.pack(">i", val)[-maxb:])
+# Convert integer to 64-bit big-endian byte sequence.
+def int_bin_64 (val):
+    return struct.pack(">q", val)[-8:]
 
-    # Output string length in bytes, followed by UTF-8 byte sequence.
-    def write_str_bin (val):
-        val_enc = val.encode("utf-8")
-        write_int_bin(len(val_enc))
-        fh.write(val_enc)
 
+# Convert string to UTF-8 byte sequence,
+# preceded by its length in 32-bit big-endian.
+def str_bin_32 (val):
+    val_enc = val.encode("utf-8")
+    return int_bin_32(len(val_enc)) + val_enc
+
+
+# Binary map format 00.
+def write_map_bin_00 (fh, pmap):
+
     # Magic bytes.
     fh.write("TSPMAP00")
 
     # Number of entries.
-    write_int_bin(len(pmap))
+    fh.write(int_bin_32(len(pmap)))
 
     for ekeys, props in pmap:
         # Number of phrase keys and all phrase keys.
-        write_int_bin(len(ekeys))
+        fh.write(int_bin_32(len(ekeys)))
         for ekey in ekeys:
-            write_str_bin(ekey)
+            fh.write(str_bin_32(ekey))
 
         # Number of properties and all properties.
-        write_int_bin(len(props))
+        fh.write(int_bin_32(len(props)))
         for pkey, pval in props:
-            write_str_bin(pkey)
-            write_str_bin(pval)
+            fh.write(str_bin_32(pkey))
+            fh.write(str_bin_32(pval))
 
-    fh.close()
 
+# Binary map format 01.
+def write_map_bin_01 (fh, pmap):
 
+    offset0 = 0
+    binint32len = len(int_bin_32(0))
+    binint64len = len(int_bin_64(0))
+
+    # Magic bytes.
+    mbytestr = "TSPMAP01"
+    offset0 += len(mbytestr)
+
+    # Compute length of binary representation of all entry keys
+    # additionally equipped with offsets to corresponding property blobs.
+    offset0 += binint32len
+    offset0 += binint64len
+    binekeyslen = 0
+    for ekeys, d1 in pmap:
+        binekeyslen += sum([len(str_bin_32(x)) + binint64len for x in ekeys])
+    offset0 += binekeyslen
+
+    # Construct binary representations of all unique property keys.
+    offset0 += binint32len
+    offset0 += binint64len
+    allpkeys = set()
+    for d1, props in pmap:
+        allpkeys.update([x[0] for x in props])
+    binpkeys = "".join(map(str_bin_32, sorted(allpkeys)))
+    offset0 += len(binpkeys)
+
+    # Construct binary representations of properties for each entry.
+    # Compute byte offsets for each of these binary blobs, in the given order.
+    binprops = []
+    plength = 0
+    poffset = offset0 + binint32len
+    for d1, props in pmap:
+        cbinprops = "".join(sum([map(str_bin_32, x) for x in props], []))
+        cbinprops = "".join([int_bin_32(len(props)), int_bin_32(len(cbinprops)),
+                             cbinprops])
+        offset = poffset + plength
+        binprops.append([cbinprops, offset])
+        poffset = offset
+        plength = len(cbinprops)
+
+    # Construct binary representations of all entry keys with property offsets.
+    allekeys = []
+    binekeys = []
+    for (ekeys, d1), (d2, offset) in zip(pmap, binprops):
+        binoffset = int_bin_64(offset)
+        cbinekeys = "".join([str_bin_32(x) + binoffset for x in ekeys])
+        binekeys.append(cbinekeys)
+        allekeys.extend(ekeys)
+    binekeys = "".join(binekeys)
+    assert(binekeyslen == len(binekeys))
+
+    # Write everything out.
+    fh.write(mbytestr)
+    fh.write(int_bin_32(len(allekeys)))
+    fh.write(int_bin_64(len(binekeys)))
+    fh.write(binekeys)
+    fh.write(int_bin_32(len(allpkeys)))
+    fh.write(int_bin_64(len(binpkeys)))
+    fh.write(binpkeys)
+    fh.write(int_bin_32(len(pmap)))
+    for cbinprops, d1 in binprops:
+        fh.write(cbinprops)
+
+
 def main ():
 
     if len(sys.argv) != 3:
         error("usage: %s <input_file> <output_file>" % cmdname)
 
+    try:
+        import psyco
+        psyco.full()
+    except ImportError:
+        pass
+
     ifile = sys.argv[1]
     ofile = sys.argv[2]
 
     pmap = read_pmap(ifile)
-    write_map_bin(ofile, pmap)
+    ofh = open(ofile, "wb")
+    write_map_bin_01(ofh, pmap)
+    ofh.close()
 
 
 if __name__ == '__main__':
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic