[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: l10n-support/scripts
From: Chusslove Illich <caslav.ilic () gmx ! net>
Date: 2009-10-10 21:05:34
Message-ID: 1255208734.232164.21790.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 1033709 by ilic:
New format of compiled pmaps (01). Code for old format still inside, but not used.
M +99 -24 ts-pmap-compile.py
--- trunk/l10n-support/scripts/ts-pmap-compile.py #1033708:1033709
@@ -55,7 +55,7 @@
if i >= slen: raise END_PROP_PARSE
if i + 1 >= slen:
- error("unexpected end of file %1" % fname)
+ error("unexpected end of file %s" % fname)
if s[i] != '#':
# Separator characters for this entry.
@@ -144,60 +144,135 @@
except END_PROP_PARSE:
if state != s_nextEntry:
- error("unexpected end of file in %1" % fname)
+ error("unexpected end of file in %s" % fname)
return pmap
-def write_map_bin (fname, pmap):
+# Convert integer to 32-bit big-endian byte sequence.
+def int_bin_32 (val):
+ return struct.pack(">i", val)[-4:]
- fh = open(fname, "wb")
- # Output integer value, big-endian, 32-bit.
- maxb = 4
- maxval = 2**(maxb*8)
- def write_int_bin (val):
- if val >= maxval:
- error("too large number, must be less then %d" % maxval)
- fh.write(struct.pack(">i", val)[-maxb:])
+# Convert integer to 64-bit big-endian byte sequence.
+def int_bin_64 (val):
+ return struct.pack(">q", val)[-8:]
- # Output string length in bytes, followed by UTF-8 byte sequence.
- def write_str_bin (val):
- val_enc = val.encode("utf-8")
- write_int_bin(len(val_enc))
- fh.write(val_enc)
+# Convert string to UTF-8 byte sequence,
+# preceded by its length in 32-bit big-endian.
+def str_bin_32 (val):
+ val_enc = val.encode("utf-8")
+ return int_bin_32(len(val_enc)) + val_enc
+
+
+# Binary map format 00.
+def write_map_bin_00 (fh, pmap):
+
# Magic bytes.
fh.write("TSPMAP00")
# Number of entries.
- write_int_bin(len(pmap))
+ fh.write(int_bin_32(len(pmap)))
for ekeys, props in pmap:
# Number of phrase keys and all phrase keys.
- write_int_bin(len(ekeys))
+ fh.write(int_bin_32(len(ekeys)))
for ekey in ekeys:
- write_str_bin(ekey)
+ fh.write(str_bin_32(ekey))
# Number of properties and all properties.
- write_int_bin(len(props))
+ fh.write(int_bin_32(len(props)))
for pkey, pval in props:
- write_str_bin(pkey)
- write_str_bin(pval)
+ fh.write(str_bin_32(pkey))
+ fh.write(str_bin_32(pval))
- fh.close()
+# Binary map format 01.
+def write_map_bin_01 (fh, pmap):
+ offset0 = 0
+ binint32len = len(int_bin_32(0))
+ binint64len = len(int_bin_64(0))
+
+ # Magic bytes.
+ mbytestr = "TSPMAP01"
+ offset0 += len(mbytestr)
+
+ # Compute length of binary representation of all entry keys
+ # additionally equipped with offsets to corresponding property blobs.
+ offset0 += binint32len
+ offset0 += binint64len
+ binekeyslen = 0
+ for ekeys, d1 in pmap:
+ binekeyslen += sum([len(str_bin_32(x)) + binint64len for x in ekeys])
+ offset0 += binekeyslen
+
+ # Construct binary representations of all unique property keys.
+ offset0 += binint32len
+ offset0 += binint64len
+ allpkeys = set()
+ for d1, props in pmap:
+ allpkeys.update([x[0] for x in props])
+ binpkeys = "".join(map(str_bin_32, sorted(allpkeys)))
+ offset0 += len(binpkeys)
+
+ # Construct binary representations of properties for each entry.
+ # Compute byte offsets for each of these binary blobs, in the given order.
+ binprops = []
+ plength = 0
+ poffset = offset0 + binint32len
+ for d1, props in pmap:
+ cbinprops = "".join(sum([map(str_bin_32, x) for x in props], []))
+ cbinprops = "".join([int_bin_32(len(props)), int_bin_32(len(cbinprops)),
+ cbinprops])
+ offset = poffset + plength
+ binprops.append([cbinprops, offset])
+ poffset = offset
+ plength = len(cbinprops)
+
+ # Construct binary representations of all entry keys with property offsets.
+ allekeys = []
+ binekeys = []
+ for (ekeys, d1), (d2, offset) in zip(pmap, binprops):
+ binoffset = int_bin_64(offset)
+ cbinekeys = "".join([str_bin_32(x) + binoffset for x in ekeys])
+ binekeys.append(cbinekeys)
+ allekeys.extend(ekeys)
+ binekeys = "".join(binekeys)
+ assert(binekeyslen == len(binekeys))
+
+ # Write everything out.
+ fh.write(mbytestr)
+ fh.write(int_bin_32(len(allekeys)))
+ fh.write(int_bin_64(len(binekeys)))
+ fh.write(binekeys)
+ fh.write(int_bin_32(len(allpkeys)))
+ fh.write(int_bin_64(len(binpkeys)))
+ fh.write(binpkeys)
+ fh.write(int_bin_32(len(pmap)))
+ for cbinprops, d1 in binprops:
+ fh.write(cbinprops)
+
+
def main ():
if len(sys.argv) != 3:
error("usage: %s <input_file> <output_file>" % cmdname)
+ try:
+ import psyco
+ psyco.full()
+ except ImportError:
+ pass
+
ifile = sys.argv[1]
ofile = sys.argv[2]
pmap = read_pmap(ifile)
- write_map_bin(ofile, pmap)
+ ofh = open(ofile, "wb")
+ write_map_bin_01(ofh, pmap)
+ ofh.close()
if __name__ == '__main__':
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic