[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: l10n-support/pology/l10n/sr/hook
From: Chusslove Illich <caslav.ilic () gmx ! net>
Date: 2010-02-26 19:30:19
Message-ID: 1267212619.773464.27012.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 1096516 by ilic:
Hybridization algorithm now uses diffing only on segments which cannot be hybridized with reflex ticks.
M +44 -67 wconv.py
--- trunk/l10n-support/pology/l10n/sr/hook/wconv.py #1096515:1096516
@@ -359,7 +359,8 @@
reflex_spec_dehyb.append((tick, refmap, reflen_min, reflen_max))
# Derive data for hybridization:
- # [(reflen, [(btrk, {ijkfrm: [(ekvfrm, tick)...]})...])...]
+ # [(reflen, [(btrk, [(ekvlen,
+ # {ijkfrm: [(ekvfrm, tick)...]})...])...])...]
for ijkfrm, ekvfrm in refmap.items():
reflen = len(ijkfrm)
if reflen not in reflex_spec_hyb:
@@ -373,16 +374,22 @@
btrk += 1
if btrk not in subspec:
subspec[btrk] = {}
- if ijkfrm not in subspec[btrk]:
- subspec[btrk][ijkfrm] = []
- subspec[btrk][ijkfrm].append((ekvfrm, tick))
+ ekvlen = len(ekvfrm)
+ if ekvlen not in subspec[btrk]:
+ subspec[btrk][ekvlen] = {}
+ if ijkfrm not in subspec[btrk][ekvlen]:
+ subspec[btrk][ekvlen][ijkfrm] = []
+ subspec[btrk][ekvlen][ijkfrm].append((ekvfrm, tick))
# Put hybridization data into list of pairs up to required depth.
# Sort such that on hybridization reflexes are tried by decreasing length
# and increasing backtrack.
tmplst = []
for reflen, subspec in reflex_spec_hyb.items():
- tmplst.append((reflen, list(sorted(subspec.items()))))
+ tmplst2 = []
+ for ekvlen, subspec2 in subspec.items():
+ tmplst2.append((ekvlen, list(sorted(subspec2.items()))))
+ tmplst.append((reflen, list(sorted(tmplst2))))
reflex_spec_hyb = list(reversed(sorted(tmplst)))
return reflex_spec_dehyb, reflex_spec_hyb
@@ -546,65 +553,35 @@
@rtype: string
"""
- # If character-level diff is done at once, weird segments may appear.
- # Instead, first diff on word-level, then on character-level.
- wdiff = word_diff(text1, text2)
- cdiff = []
- i = 0
- while i < len(wdiff):
- tag1, seg1 = wdiff[i]
- tag2, seg2 = wdiff[i + 1] if i + 1 < len(wdiff) else ("", "")
- if (tag1 == "-" and tag2 == "+") or (tag1 == "+" and tag2 == "-"):
- if tag1 == "+" and tag2 == "-": # reverse from expected order
- seg1, seg2 = seg2, seg1
- cdiff.extend(tdiff(seg1, seg2))
- i += 2
- else:
- cdiff.extend([(tag1, c) for c in seg1])
- i += 1
-
- lenc = len(cdiff)
- cdiff12 = cdiff
- cdiff21 = [({"+": "-", "-": "+", " ": " "}[t], s) for t, s in cdiff]
- i1 = 0; i1p = 0; i2 = 0; i2p = 0; ic = 0
+ len1 = len(text1); len2 = len(text2)
+ i1 = 0; i1p = 0; i2 = 0; i2p = 0
segs = []
while True:
- while ic < lenc and cdiff12[ic][0] == " ":
- ic += 1; i1 += 1; i2 += 1
- if ic == lenc:
+ while i1 < len1 and i2 < len2 and text1[i1] == text2[i2]:
+ i1 += 1; i2 += 1
+ if i1 == len1 and i2 == len2:
segs.append(text1[i1p:]) # same as text2[i2p:]
break
# Try to hybridize difference by jat-reflex ticks.
tick = None
- for cdiff, texte, texti, ie, ii, order12 in (
- (cdiff12, text1, text2, i1, i2, True),
- (cdiff21, text2, text1, i2, i1, False),
+ for texte, texti, ie, ii, order12 in (
+ (text1, text2, i1, i2, True),
+ (text2, text1, i2, i1, False),
):
for leni, subspecs in _reflex_spec_hyb:
- for btrk, refmap in subspecs:
- # Advance the diff to cover Ijekavian reflex and -+ span,
- # accumulating Ekavian reflex length along the way.
- icn = ic - btrk
- if icn < 0:
+ for btrk, subspecs2 in subspecs:
+ iec = ie - btrk
+ iic = ii - btrk
+ if iec < 0 or iic < 0:
continue
- lene = 0; cnti = leni
- while icn < lenc and (cnti > 0 or cdiff[icn][0] != " "):
- if cdiff[icn][0] != "-":
- cnti -= 1
- if cdiff[icn][0] != "+":
- lene += 1
- icn += 1
- if cnti != 0:
- continue
- # Check if collected segments correspond to a mapping rule.
- ieb = ie - btrk
- frme = texte[ie - btrk:ie - btrk + lene]
- iib = ii - btrk
- frmi = texti[iib:iib + leni]
- for cfrme, ctick in refmap.get(frmi, []):
- if cfrme == frme:
- tick = ctick
- break
+ for lene, refmap in subspecs2:
+ frme = texte[ie - btrk:ie - btrk + lene]
+ frmi = texti[ii - btrk:ii - btrk + leni]
+ for cfrme, ctick in refmap.get(frmi, []):
+ if cfrme == frme:
+ tick = ctick
+ break
+ if tick: break
if tick: break
if tick: break
if tick: break
@@ -614,21 +591,21 @@
segs.append(tick + frmi)
i1p = i1 - btrk + (lene if order12 else leni)
i2p = i2 - btrk + (leni if order12 else lene)
- ic = icn
else:
# Hybridization by difference marks not possible.
# Use alternatives directive, or pure Ijekavian.
- frm1 = ""; frm2 = ""
- segs.append(text1[i1p:i1])
- while ic < lenc and cdiff12[ic][0] != " ":
- tag, c = cdiff12[ic]
- if tag == "-":
- frm1 += c; i1 += 1
- else:
- frm2 += c; i2 += 1
- ic += 1
- i1p = i1
- i2p = i2
+ i1b = i1; i2b = i2
+ while i1b >= 0 and text1[i1b].isalpha(): # same as *2*
+ i1b -= 1; i2b -= 1
+ i1b += 1; i2b += 1
+ segs.append(text1[i1p:i1b])
+ wdiff = word_diff(text1[i1b:], text2[i2b:])
+ if wdiff[0][0] == "-":
+ frm1 = wdiff[0][1]; frm2 = wdiff[1][1]
+ else:
+ frm1 = wdiff[1][1]; frm2 = wdiff[0][1]
+ i1p = i1b + len(frm1)
+ i2p = i2b + len(frm2)
if ekord == 1:
segs.append(_dhyb_althead + _delimit([frm1, frm2], delims))
elif ekord == 2:
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic