[prev in list] [next in list] [prev in thread] [next in thread]
List: pykde
Subject: [PyQt] wrong context handling in pylupdate with inner classes
From: Matteo Bertini <naufraghi () develer ! com>
Date: 2014-01-20 11:48:50
Message-ID: 1e7d8f8319fd54a283d9c4310d7e9afd () develer ! com
[Download RAW message or body]
Hi all, I found a little bug in pylupdate, in case of inner classes the
generated ts files has the inner class as context.
Minimal example:
==> wrong_ts_context.py <==
class OuterClass(QObject):
def foo(self):
self.tr("in context OuterClass")
# comment with strange indentation
self.tr("in context OuterClass after comment with strange
indentation")
class _InnerClass(object):
pass
self.tr("in context OuterClass after _InnerClass")
==> wrong_ts_context.ts <==
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE TS><TS version="2.0">
<context>
<name>OuterClass</name>
<message>
<location filename="wrong_ts_context.py" line="3"/>
<source>in context OuterClass</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="wrong_ts_context.py" line="5"/>
<source>in context OuterClass after comment with strange
indentation</source>
<translation type="unfinished"></translation>
</message>
</context>
<context>
<name>_InnerClass</name>
<message>
<location filename="wrong_ts_context.py" line="10"/>
<source>in context OuterClass after _InnerClass</source>
<translation type="unfinished"></translation>
</message>
</context>
</TS>
In big projects it is very hard to find the untranslated strings
because of this behaviour.
PySide fork of pylupdated used to have the same bug:
https://github.com/PySide/Tools/commit/365f37c4c2fef6226baf406576bd9f0d32d6d304
But it handles as indentation the comment lines too, resulting in a
different error:
==> wrong_ts_context.ts <==
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE TS><TS version="2.0">
<context>
<name></name>
<message>
<location filename="wrong_ts_context.py" line="5"/>
<source>in context OuterClass after comment with strange
indentation</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="wrong_ts_context.py" line="10"/>
<source>in context OuterClass after _InnerClass</source>
<translation type="unfinished"></translation>
</message>
</context>
<context>
<name>OuterClass</name>
<message>
<location filename="wrong_ts_context.py" line="3"/>
<source>in context OuterClass</source>
<translation type="unfinished"></translation>
</message>
</context>
</TS>
I translated the fix in pyside adding a check for indent in comments,
the resulting diffs over 4.10.3 (and 5.2 without universal newline
backport) are attached.
==> wrong_ts_context.ts <==
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE TS><TS version="2.0">
<context>
<name>OuterClass</name>
<message>
<location filename="wrong_ts_context.py" line="3"/>
<source>in context OuterClass</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="wrong_ts_context.py" line="5"/>
<source>in context OuterClass after comment with strange
indentation</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="wrong_ts_context.py" line="10"/>
<source>expected to be in context OuterClass but found in
_InnerClass</source>
<translation type="unfinished"></translation>
</message>
</context>
</TS>
Cheers,
Matteo Bertini
--
Matteo Bertini - naufraghi@develer.com
Develer S.r.l. - http://www.develer.com/
.hardware .software .innovation
Tel.: +39 055 3986627 - ext.: 211
["backport-universal-newline.diff" (text/x-c++)]
--- fetchtr.cpp 2013-08-21 07:51:08.000000000 +0200
+++ /dev/fd/63 2014-01-20 12:15:47.915990442 +0100
@@ -91,40 +91,74 @@
// the string to read from and current position in the string (otherwise)
static QString yyInStr;
static int yyInPos;
-static int buf;
+// - 'rawbuf' is used to hold bytes before universal newline translation.
+// - 'buf' is its higher-level counterpart, where every end-of-line appears as
+// a single '\n' character, regardless of the end-of-line style used in input
+// files.
+static int buf, rawbuf;
static int (*getChar)();
static int (*peekChar)();
static bool yyParsingUtf8;
-static int getCharFromFile()
+static int getTranslatedCharFromFile()
{
int c;
- if ( buf < 0 )
+ if ( rawbuf < 0 ) // Empty raw buffer?
c = getc( yyInFile );
else {
+ c = rawbuf;
+ rawbuf = -1; // Declare the raw buffer empty.
+ }
+
+ // Universal newline translation, similar to what Python does
+ if ( c == '\r' ) {
+ c = getc( yyInFile ); // Last byte of a \r\n sequence?
+ if ( c != '\n')
+ {
+ rawbuf = c; // No, put it in 'rawbuf' for later processing.
+ // Logical character that will be seen by higher-level functions
+ c = '\n';
+ }
+ // In all cases, c == '\n' here.
+ }
+
+ return c;
+}
+
+static int getCharFromFile()
+{
+ int c;
+
+ if ( buf < 0 ) { // Empty buffer?
+ c = getTranslatedCharFromFile();
+ } else {
c = buf;
- buf = -1;
+ buf = -1; // Declare the buffer empty.
}
- if ( c == '\n' )
- yyCurLineNo++;
+
+ if ( c == '\n' ) // This is after universal newline translation
+ yyCurLineNo++; // (i.e., a "logical" newline character).
+
return c;
}
static int peekCharFromFile()
{
- int c = getc( yyInFile );
- buf = c;
- return c;
+ // Read a character, possibly performing universal newline translation,
+ // and put it in 'buf' so that the next call to getCharFromFile() finds it
+ // already available.
+ buf = getCharFromFile();
+ return buf;
}
static void startTokenizer( const char *fileName, int (*getCharFunc)(),
int (*peekCharFunc)(), QTextCodec *codecForTr, QTextCodec *codecForSource )
{
yyInPos = 0;
- buf = -1;
+ buf = rawbuf = -1;
getChar = getCharFunc;
peekChar = peekCharFunc;
@@ -312,18 +346,18 @@
if ( yyCh == 'x' ) {
QByteArray hex = "0";
- yyCh = getChar();
- while ( isxdigit(yyCh) ) {
- hex += (char) yyCh;
yyCh = getChar();
- }
+ while ( isxdigit(yyCh) ) {
+ hex += (char) yyCh;
+ yyCh = getChar();
+ }
#if defined(_MSC_VER) && _MSC_VER >= 1400
- sscanf_s( hex, "%x", &n );
+ sscanf_s( hex, "%x", &n );
#else
- sscanf( hex, "%x", &n );
+ sscanf( hex, "%x", &n );
#endif
- if ( yyStringLen < sizeof(yyString) - 1 )
- yyString[yyStringLen++] = (char) n;
+ if ( yyStringLen < sizeof(yyString) - 1 )
+ yyString[yyStringLen++] = (char) n;
} else if ( yyCh >= '0' && yyCh < '8' ) {
QByteArray oct = "";
int n = 0;
@@ -340,6 +374,8 @@
#endif
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = (char) n;
+ } else if ( yyCh == '\n' ) {
+ yyCh = getChar();
} else {
const char *p = strchr( tab, yyCh );
if ( yyStringLen < sizeof(yyString) - 1 )
["fix-wrong-contex-with-inline-classes.diff" (text/x-c++)]
--- fetchtr.cpp.orig 2014-01-20 12:08:20.914186551 +0100
+++ fetchtr.cpp 2014-01-20 12:37:12.782142699 +0100
@@ -91,40 +91,111 @@
// the string to read from and current position in the string (otherwise)
static QString yyInStr;
static int yyInPos;
-static int buf;
+// - 'rawbuf' is used to hold bytes before universal newline translation.
+// - 'buf' is its higher-level counterpart, where every end-of-line appears as
+// a single '\n' character, regardless of the end-of-line style used in input
+// files.
+static int buf, rawbuf;
static int (*getChar)();
static int (*peekChar)();
static bool yyParsingUtf8;
-static int getCharFromFile()
+static int yySpacesPerIndent;
+static int yyContiguousSpaceCount;
+static bool yyUpdateIndentationLevel;
+
+// (Context, indentation level) pair.
+typedef QPair<QByteArray, int> ContextPair;
+// Stack of (Context, indentation level) pairs.
+typedef QStack<ContextPair> ContextStack;
+static ContextStack yyContextStack;
+
+static int yyContextPops;
+
+static int getTranslatedCharFromFile()
{
int c;
- if ( buf < 0 )
+ if ( rawbuf < 0 ) // Empty raw buffer?
c = getc( yyInFile );
else {
+ c = rawbuf;
+ rawbuf = -1; // Declare the raw buffer empty.
+ }
+
+ // Universal newline translation, similar to what Python does
+ if ( c == '\r' ) {
+ c = getc( yyInFile ); // Last byte of a \r\n sequence?
+ if ( c != '\n')
+ {
+ rawbuf = c; // No, put it in 'rawbuf' for later processing.
+ // Logical character that will be seen by higher-level functions
+ c = '\n';
+ }
+ // In all cases, c == '\n' here.
+ }
+
+ return c;
+}
+
+static int getCharFromFile()
+{
+ int c;
+
+ if ( buf < 0 ) { // Empty buffer?
+ c = getTranslatedCharFromFile();
+ } else {
c = buf;
- buf = -1;
+ buf = -1; // Declare the buffer empty.
+ }
+
+ if ( c == '\n' ) { // This is after universal newline translation
+ yyCurLineNo++; // (i.e., a "logical" newline character).
+ }
+ if (c == '\n' || c == '#' ) { // ignore intent in comments
+ yyUpdateIndentationLevel = true;
+ yyContiguousSpaceCount = 0;
+ } else if ( yyUpdateIndentationLevel && ( c == 32 || c == 9 ) ) { // space || tab
+ // FIXME: mixed indentation is unsupported
+ yyContiguousSpaceCount++;
+ } else {
+ // http://docs.python.org/2/reference/lexical_analysis.html#indentation
+ // Leading whitespace (spaces and tabs) at the beginning of a logical
+ // line is used to compute the indentation level of the line, which in
+ // turn is used to determine the grouping of statements.
+ if ( yySpacesPerIndent == 1 && yyContiguousSpaceCount > yySpacesPerIndent )
+ yySpacesPerIndent = yyContiguousSpaceCount;
+
+ if ( yyUpdateIndentationLevel && yyContextStack.count() > 1 ) {
+ ContextPair& current = yyContextStack.top();
+ if ( current.second == 0 && yyContiguousSpaceCount > 0 ) {
+ current.second = yyContiguousSpaceCount; // Pair.second := indentation level
+ yyContiguousSpaceCount = 0;
+ } else if ( yyContiguousSpaceCount < current.second ) {
+ yyContextPops = (current.second - yyContiguousSpaceCount) / yySpacesPerIndent;
+ }
+ }
+ yyUpdateIndentationLevel = false;
}
- if ( c == '\n' )
- yyCurLineNo++;
return c;
}
static int peekCharFromFile()
{
- int c = getc( yyInFile );
- buf = c;
- return c;
+ // Read a character, possibly performing universal newline translation,
+ // and put it in 'buf' so that the next call to getCharFromFile() finds it
+ // already available.
+ buf = getCharFromFile();
+ return buf;
}
static void startTokenizer( const char *fileName, int (*getCharFunc)(),
int (*peekCharFunc)(), QTextCodec *codecForTr, QTextCodec *codecForSource )
{
yyInPos = 0;
- buf = -1;
+ buf = rawbuf = -1;
getChar = getCharFunc;
peekChar = peekCharFunc;
@@ -141,6 +212,11 @@
yyCodecForSource = codecForSource;
yyParsingUtf8 = false;
+ yySpacesPerIndent = 1;
+ yyContiguousSpaceCount = 0;
+ yyUpdateIndentationLevel = false;
+ yyContextStack.clear();
+ yyContextPops = 0;
}
static int getToken()
@@ -312,18 +388,18 @@
if ( yyCh == 'x' ) {
QByteArray hex = "0";
- yyCh = getChar();
- while ( isxdigit(yyCh) ) {
- hex += (char) yyCh;
yyCh = getChar();
- }
+ while ( isxdigit(yyCh) ) {
+ hex += (char) yyCh;
+ yyCh = getChar();
+ }
#if defined(_MSC_VER) && _MSC_VER >= 1400
- sscanf_s( hex, "%x", &n );
+ sscanf_s( hex, "%x", &n );
#else
- sscanf( hex, "%x", &n );
+ sscanf( hex, "%x", &n );
#endif
- if ( yyStringLen < sizeof(yyString) - 1 )
- yyString[yyStringLen++] = (char) n;
+ if ( yyStringLen < sizeof(yyString) - 1 )
+ yyString[yyStringLen++] = (char) n;
} else if ( yyCh >= '0' && yyCh < '8' ) {
QByteArray oct = "";
int n = 0;
@@ -340,6 +416,8 @@
#endif
if ( yyStringLen < sizeof(yyString) - 1 )
yyString[yyStringLen++] = (char) n;
+ } else if ( yyCh == '\n' ) {
+ yyCh = getChar();
} else {
const char *p = strchr( tab, yyCh );
if ( yyStringLen < sizeof(yyString) - 1 )
@@ -583,20 +661,28 @@
static void parse( MetaTranslator *tor, const char *initialContext,
const char *defaultContext )
{
- QMap<QByteArray, QByteArray> qualifiedContexts;
QByteArray context;
QByteArray text;
QByteArray com;
- QByteArray functionContext = initialContext;
QByteArray prefix;
bool utf8 = false;
+ yyContextStack.push(ContextPair(initialContext, 0));
+
yyTok = getToken();
while ( yyTok != Tok_Eof ) {
+
+ if (yyContextPops > 0) {
+ for ( int i = 0; i < yyContextPops; i++)
+ yyContextStack.pop();
+ yyContextPops = 0;
+ }
+
switch ( yyTok ) {
case Tok_class:
yyTok = getToken();
- functionContext = yyIdent;
+ yyContextStack.push(ContextPair(yyIdent, 0));
+ yyContiguousSpaceCount = 0;
yyTok = getToken();
break;
case Tok_tr:
@@ -629,15 +715,12 @@
if (prefix.isNull())
context = defaultContext;
else if (qstrcmp(prefix, "self") == 0)
- context = functionContext;
+ context = yyContextStack.top().first;
else
context = prefix;
prefix = (const char *) 0;
- if (qualifiedContexts.contains(context))
- context = qualifiedContexts[context];
-
if (!text.isEmpty())
{
tor->insert(MetaTranslatorMessage(context, text, com,
_______________________________________________
PyQt mailing list PyQt@riverbankcomputing.com
http://www.riverbankcomputing.com/mailman/listinfo/pyqt
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic