[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: branches/kdepim/enterprise4/kdepimlibs
From: Thomas McGuire <mcguire () kde ! org>
Date: 2009-08-26 16:15:38
Message-ID: 1251303338.639955.30688.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 1015928 by tmcguire:
Merged revisions 1015198 via svnmerge from
svn+ssh://tmcguire@svn.kde.org/home/kde/branches/KDE/4.3/kdepimlibs
........
r1015198 | mkoller | 2009-08-24 23:22:31 +0200 (Mon, 24 Aug 2009) | 9 lines
Backport r1015195 by mkoller from trunk to the 4.3 branch:
CCBUG: 202445
Try much better to discover a URL, even if it is enclosed with
some sort of brackets/quotes and even interrupted by line breaks or white space.
RFC3986 explains this in appendix C
........
_M . (directory)
M +42 -17 kpimutils/linklocator.cpp
M +68 -2 kpimutils/tests/testlinklocator.cpp
** branches/kdepim/enterprise4/kdepimlibs #property svnmerge-integrated
- /branches/KDE/4.3/kdepimlibs:1-986158,990023,990532,990575,990631,990684,991932,9 \
96755,997101,997490,998251,1000615,1007460,1008037,1008812,1009437,1011841,1013328-1013331,1013810
+ /branches/KDE/4.3/kdepimlibs:1-986158,990023,990532,990575,990631,990684,991932,99 \
6755,997101,997490,998251,1000615,1007460,1008037,1008812,1009437,1011841,1013328-1013331,1013810,1015198
--- branches/kdepim/enterprise4/kdepimlibs/kpimutils/linklocator.cpp #1015927:1015928
@@ -107,27 +107,52 @@
{
QString url;
if ( atUrl() ) {
- // for reference: rfc1738:
- // Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
- // reserved characters used for their reserved purposes may be used
- // unencoded within a URL.
- // NOTE: this implementation is not RFC conforming
- int start = mPos;
- while ( mPos < (int)mText.length() &&
- mText[mPos] > ' ' && mText[mPos] != '"' &&
- QString( "<>[]" ).indexOf( mText[mPos] ) == -1 ) {
- ++mPos;
+ // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially \
appendix-C + // Appendix-C mainly says, that when extracting URLs from plain text, \
line breaks shall + // be allowed and should be ignored when the URI is extracted.
+
+ // This implementation follows this recommendation and
+ // allows the URL to be enclosed within different kind of brackets/quotes
+ // If an URL is enclosed, whitespace characters are allowed and removed, \
otherwise + // the URL ends with the first whitespace
+ // Also, if the URL is enclosed in brackets, the URL itself is not allowed
+ // to contain the closing bracket, as this would be detected as the end of the \
URL +
+ QChar beforeUrl, afterUrl;
+
+ // detect if the url has been surrounded by brackets or quotes
+ if ( mPos > 0 ) {
+ beforeUrl = mText[mPos - 1];
+
+ if ( beforeUrl == '(' )
+ afterUrl = ')';
+ else if ( beforeUrl == '[' )
+ afterUrl = ']';
+ else if ( beforeUrl == '<' )
+ afterUrl = '>';
+ else if ( beforeUrl == '>' ) // for e.g. <link>http://.....</link>
+ afterUrl = '<';
+ else if ( beforeUrl == '"' )
+ afterUrl = '"';
}
- // some URLs really end with: # / & - _
- const QString allowedSpecialChars = QString( "#/&-_" );
- while ( mPos > start && mText[mPos-1].isPunct() &&
- allowedSpecialChars.indexOf( mText[mPos-1] ) == -1 ) {
- --mPos;
+ url.reserve( maxUrlLen() ); // avoid allocs
+ int start = mPos;
+ while ( ( mPos < (int)mText.length() ) &&
+ ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
+ ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
+ ( !afterUrl.isNull() && mText[mPos] != afterUrl ) )
+ ) {
+ if ( !mText[mPos].isSpace() ) { // skip whitespace
+ url.append( mText[mPos] );
+ if ( url.length() > maxUrlLen() )
+ break;
+ }
+
+ mPos++;
}
- url = mText.mid( start, mPos - start );
- if ( isEmptyUrl(url) || mPos - start > maxUrlLen() ) {
+ if ( isEmptyUrl(url) || ( url.length() > maxUrlLen() ) ) {
mPos = start;
url = "";
} else {
--- branches/kdepim/enterprise4/kdepimlibs/kpimutils/tests/testlinklocator.cpp \
#1015927:1015928 @@ -112,6 +112,7 @@
brackets << "(" << ")";
brackets << "<" << ">";
brackets << "[" << "]";
+ brackets << "\"" << "\"";
brackets << "<link>" << "</link>";
for (int i = 0; i < brackets.count(); i += 2)
@@ -139,19 +140,40 @@
urls << "user:pass@www.kde.org:1234/sub/path";
urls << "user:pass@www.kde.org:1234/sub/path?a=1";
urls << "user:pass@www.kde.org:1234/sub/path?a=1#anchor";
+ urls << "user:pass@www.kde.org:1234/sub/\npath \n /long/ path \t ?a=1#anchor";
urls << "user:pass@www.kde.org:1234/sub/path/special(123)?a=1#anchor";
urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor";
+ urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla";
+ urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]";
+ urls << "user:pass@www.kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]";
+ urls << "user:pass@www.kde.org:1234/ \n sub/path:with:colon/ \n\t \t \
special(123)?\n\t \n\t a=1#anchor[bla]";
foreach (QString schema, schemas)
{
foreach (QString url, urls)
{
+ // by defintion: if the URL is enclosed in brackets, the URL itself is not \
allowed + // to contain the closing bracket, as this would be detected as the \
end of the URL + if ( ( left.length() == 1 ) && ( url.contains( right[0] ) ) )
+ continue;
+
+ // if the url contains a whitespace, it must be enclosed with brackets
+ if ( (url.contains('\n') || url.contains('\t') || url.contains(' ')) &&
+ left.isEmpty() )
+ continue;
+
QString test(left + schema + url + right);
LinkLocator ll(test, left.length());
QString gotUrl = ll.getUrl();
+ // we want to have the url without whitespace
+ url.remove(' ');
+ url.remove('\n');
+ url.remove('\t');
+
bool ok = ( gotUrl == (schema + url) );
//qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (schema + \
url); + if ( !ok ) qDebug() << "got:" << gotUrl;
QVERIFY2( ok, qPrintable(test) );
}
}
@@ -163,6 +185,10 @@
urlsWithoutSchema << ".kde.org:1234/sub/path?a=1#anchor";
urlsWithoutSchema << ".kde.org:1234/sub/path/special(123)?a=1#anchor";
urlsWithoutSchema << ".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor";
+ urlsWithoutSchema << \
".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla"; + urlsWithoutSchema \
<< ".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]"; + \
urlsWithoutSchema << \
".kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]"; + \
urlsWithoutSchema << ".kde.org:1234/ \n sub/path:with:colon/ \n\t \t \
special(123)?\n\t \n\t a=1#anchor[bla]";
QStringList starts;
starts << "www" << "ftp" << "news:www";
@@ -171,16 +197,55 @@
{
foreach (QString url, urlsWithoutSchema)
{
+ // by defintion: if the URL is enclosed in brackets, the URL itself is not \
allowed + // to contain the closing bracket, as this would be detected as the \
end of the URL + if ( ( left.length() == 1 ) && ( url.contains( right[0] ) ) )
+ continue;
+
+ // if the url contains a whitespace, it must be enclosed with brackets
+ if ( (url.contains('\n') || url.contains('\t') || url.contains(' ')) &&
+ left.isEmpty() )
+ continue;
+
QString test(left + start + url + right);
LinkLocator ll(test, left.length());
QString gotUrl = ll.getUrl();
+ // we want to have the url without whitespace
+ url.remove(' ');
+ url.remove('\n');
+ url.remove('\t');
+
bool ok = ( gotUrl == (start + url) );
//qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (start + \
url);
- QVERIFY2( ok, qPrintable(test) );
+ if ( !ok ) qDebug() << "got:" << gotUrl;
+ QVERIFY2( ok, qPrintable(gotUrl) );
}
}
+ // test max url length
+ QString url = "http://www.kde.org/this/is/a_very_loooooong_url/test/test/test";
+ {
+ LinkLocator ll(url);
+ ll.setMaxUrlLen(10);
+ QVERIFY( ll.getUrl().isEmpty() ); // url too long
+ }
+ {
+ LinkLocator ll(url);
+ ll.setMaxUrlLen(url.length() - 1);
+ QVERIFY( ll.getUrl().isEmpty() ); // url too long
+ }
+ {
+ LinkLocator ll(url);
+ ll.setMaxUrlLen(url.length());
+ QVERIFY( ll.getUrl() == url );
+ }
+ {
+ LinkLocator ll(url);
+ ll.setMaxUrlLen(url.length() + 1);
+ QVERIFY( ll.getUrl() == url );
+ }
+
// mailto
{
QString addr = "mailto:test@kde.org";
@@ -191,7 +256,8 @@
bool ok = ( gotUrl == addr );
//qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << addr;
- QVERIFY2( ok, qPrintable(test) );
+ if ( !ok ) qDebug() << "got:" << gotUrl;
+ QVERIFY2( ok, qPrintable(gotUrl) );
}
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic