--nextPart2920567.QRFuHvjJuH Content-Type: multipart/mixed; boundary="Boundary-01=_j5dPIyJiUV/Jqj7" Content-Transfer-Encoding: 7bit Content-Disposition: inline --Boundary-01=_j5dPIyJiUV/Jqj7 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Content-Disposition: inline [xposted to core-devel] Hei, You might remember me as the guy that, four years ago, used internal entiti= es=20 in kleopatra/index.docbook to have a handy shortcut for the menu entries: F5 ViewRedisplay"> =2E.. Use &view-redisplay; to foo bar. Up to now, these entities were squarely removed by the poxml parser, which= =20 made their contents inaccessible to translators. Which, in turn, made it=20 impossible for me to use these handy shortcuts. Since that is clearly an unacceptable state, and since I think that also=20 translators could save a bunch of work if these shortcuts were made availab= le=20 to them, I've sat down and after a few hours of staring at incomprehensible= =20 code, came up with attached patch, which fixes this. In order to be minimally intrusive, I've only enabled entity extraction for= =20 entities starting with "i18n-". That way, the standard entities such as=20 kappname and language don't clobber the .po(t). It's trivial to extract all= =20 internal entities, though. The patch is forwards- and backwards compatible. Old po2xml will ignore the= =20 new data, and new po2xml will leave the entity definition in peace if there= 's=20 no translation for them, On thing I haven't tested is the effect on the other two programs of the po= xml=20 suite. split2po and swappo. I just don't know what they do... I'd like to ask for permission to apply this patch now, in the feature free= ze,=20 because I think it's a bugfix, and because poxml is a development tool that= 's=20 hardly user-visible, so the freeze shouldn't apply to it. Another reason is that I intend to make much, much use of this new facility= =20 when updating kleopatra/index.docbook for Kleopatra 2.0. I'll commit in ~24h unless someone objects. Please keep my @kdab address CC'ed. Thanks, Marc --Boundary-01=_j5dPIyJiUV/Jqj7 Content-Type: text/x-diff; charset="iso-8859-1"; name="poxml-internal-entities.diff" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="poxml-internal-entities.diff" Index: parser.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =2D-- parser.h (revision 813020) +++ parser.h (working copy) @@ -64,6 +64,12 @@ void addAnchor(QString anchor) { anchors.insert(anchor, current); } void increasePara() { current++; } =20 + ParaCounter & operator+=3D( const ParaCounter & other ) { + current +=3D other.current; + anchors.unite( other.anchors ); + return *this; + } + QMap anchors; int current; }; @@ -73,6 +79,12 @@ public: MsgList() {} ParaCounter pc; + + MsgList & operator+=3D( const MsgList & other ) { + Q3ValueList::operator+=3D( other ); + pc +=3D other.pc; + return *this; + } }; =20 class StructureParser : public QXmlDefaultHandler Index: lauri.po =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =2D-- lauri.po (revision 813020) +++ lauri.po (working copy) @@ -14,6 +14,12 @@ "Content-Transfer-Encoding: 8bit\n" "X-Generator: KBabel 0.9.2\n" =20 +#. Tag: !ENTITY +#: lauri.xml:3 +#, no-c-format +msgid "&Ctrl;QFileQuit\">" +msgstr "&Ctrl;QDateiBeenden\">" + #. Tag: title #: lauri.xml:16 #, no-c-format Index: lauri.xml =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =2D-- lauri.xml (revision 813020) +++ lauri.xml (working copy) @@ -1,7 +1,7 @@ =2D Lauri" > + Lauri" > &Ctrl;QFileQuit"> ]> =20 Index: parser.cpp =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =2D-- parser.cpp (revision 813020) +++ parser.cpp (working copy) @@ -11,6 +11,17 @@ =20 using namespace std; =20 +static int countRev( const QString & str, QChar ch, int idx ) { + if ( idx < 0 ) + idx +=3D str.length(); + if ( idx >=3D str.length() ) + idx =3D str.length(); + int count =3D 0; + for ( int i =3D 0 ; i <=3D idx ; ++i ) + count +=3D ( str[i] =3D=3D ch ); + return count; +} + static const char *singletags[] =3D {"beginpage","imagedata", "colspec", "= spanspec", "anchor", "xref", "area", "footnoteref", "void", "inlinegraphic", @@ -931,6 +942,38 @@ QString contents =3D QString::fromUtf8( ccontents ); StructureParser::cleanupTags(contents); =20 + MsgList english; + { + // find internal entities that start with "i18n-", and extract + // their replacement texts: + QRegExp rx( "=3D 0 ; ind= ex =3D rx.indexIn( contents, index ) ) { + const QString name =3D rx.cap( 1 ); + const QChar delim =3D rx.cap( 2 ).at( 0 ); + const int start =3D index; + index =3D contents.indexOf( delim, index + rx.matchedLength() = ); + index =3D contents.indexOf( '>', index ); + if ( !name.startsWith( "i18n-" ) ) + continue; + const QString entity =3D contents.mid( start, index - start + = 1 ); + MsgBlock block; + block.tag =3D "!ENTITY"; + BlockInfo bi; + bi.start_line =3D countRev( contents, '\n', index ) + 1; + bi.start_col =3D start - contents.lastIndexOf( '\n', start ) = =2D 1; + bi.end_line =3D bi.start_line + entity.count( '\n' ); + bi.end_col =3D index - contents.lastIndexOf( '\n', index ) = + 1; +#ifdef POXML_DEBUG + qDebug( "ENTITY %s @ i:%d l:%d c:%d->l:%d c:%d", qPrintable( n= ame ), + index, bi.start_line, bi.start_col, bi.end_line, bi.en= d_col ); +#endif + block.lines.push_back( bi ); + block.msgid =3D entity; + english.push_back( block ); + } + } + + // Remove all entity definitions now: while (true) { int index =3D contents.find("