[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    branches/kdepim/enterprise4/kdepimlibs
From:       Thomas McGuire <mcguire () kde ! org>
Date:       2009-08-26 16:15:38
Message-ID: 1251303338.639955.30688.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 1015928 by tmcguire:

Merged revisions 1015198 via svnmerge from 
svn+ssh://tmcguire@svn.kde.org/home/kde/branches/KDE/4.3/kdepimlibs

........
  r1015198 | mkoller | 2009-08-24 23:22:31 +0200 (Mon, 24 Aug 2009) | 9 lines
  
  Backport r1015195 by mkoller from trunk to the 4.3 branch:
  
  CCBUG: 202445
  
  Try much better to discover a URL, even if it is enclosed with
  some sort of brackets/quotes and even interrupted by line breaks or white space.
  RFC3986 explains this in appendix C
........


 _M            . (directory)  
 M  +42 -17    kpimutils/linklocator.cpp  
 M  +68 -2     kpimutils/tests/testlinklocator.cpp  


** branches/kdepim/enterprise4/kdepimlibs #property svnmerge-integrated
   - /branches/KDE/4.3/kdepimlibs:1-986158,990023,990532,990575,990631,990684,991932,9 \
96755,997101,997490,998251,1000615,1007460,1008037,1008812,1009437,1011841,1013328-1013331,1013810
  + /branches/KDE/4.3/kdepimlibs:1-986158,990023,990532,990575,990631,990684,991932,99 \
6755,997101,997490,998251,1000615,1007460,1008037,1008812,1009437,1011841,1013328-1013331,1013810,1015198
                
--- branches/kdepim/enterprise4/kdepimlibs/kpimutils/linklocator.cpp #1015927:1015928
@@ -107,27 +107,52 @@
 {
   QString url;
   if ( atUrl() ) {
-    // for reference: rfc1738:
-    // Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
-    // reserved characters used for their reserved purposes may be used
-    // unencoded within a URL.
-    // NOTE: this implementation is not RFC conforming
-    int start = mPos;
-    while ( mPos < (int)mText.length() &&
-            mText[mPos] > ' ' && mText[mPos] != '"' &&
-            QString( "<>[]" ).indexOf( mText[mPos] ) == -1 ) {
-      ++mPos;
+    // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially \
appendix-C +    // Appendix-C mainly says, that when extracting URLs from plain text, \
line breaks shall +    // be allowed and should be ignored when the URI is extracted.
+
+    // This implementation follows this recommendation and
+    // allows the URL to be enclosed within different kind of brackets/quotes
+    // If an URL is enclosed, whitespace characters are allowed and removed, \
otherwise +    // the URL ends with the first whitespace
+    // Also, if the URL is enclosed in brackets, the URL itself is not allowed
+    // to contain the closing bracket, as this would be detected as the end of the \
URL +
+    QChar beforeUrl, afterUrl;
+
+    // detect if the url has been surrounded by brackets or quotes
+    if ( mPos > 0 ) {
+      beforeUrl = mText[mPos - 1];
+
+      if ( beforeUrl == '(' )
+        afterUrl = ')';
+      else if ( beforeUrl == '[' )
+        afterUrl = ']';
+      else if ( beforeUrl == '<' )
+        afterUrl = '>';
+      else if ( beforeUrl == '>' )    // for e.g. <link>http://.....</link>
+        afterUrl = '<';
+      else if ( beforeUrl == '"' )
+        afterUrl = '"';
     }
 
-    // some URLs really end with:  # / & - _
-    const QString allowedSpecialChars = QString( "#/&-_" );
-    while ( mPos > start && mText[mPos-1].isPunct() &&
-            allowedSpecialChars.indexOf( mText[mPos-1] ) == -1 ) {
-      --mPos;
+    url.reserve( maxUrlLen() );  // avoid allocs
+    int start = mPos;
+    while ( ( mPos < (int)mText.length() ) &&
+            ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
+            ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
+              ( !afterUrl.isNull() && mText[mPos] != afterUrl ) )
+          ) {
+      if ( !mText[mPos].isSpace() ) {   // skip whitespace
+        url.append( mText[mPos] );
+        if ( url.length() > maxUrlLen() )
+          break;
+      }
+
+      mPos++;
     }
 
-    url = mText.mid( start, mPos - start );
-    if ( isEmptyUrl(url) || mPos - start > maxUrlLen() ) {
+    if ( isEmptyUrl(url) || ( url.length() > maxUrlLen() ) ) {
       mPos = start;
       url = "";
     } else {
--- branches/kdepim/enterprise4/kdepimlibs/kpimutils/tests/testlinklocator.cpp \
#1015927:1015928 @@ -112,6 +112,7 @@
   brackets << "(" << ")";
   brackets << "<" << ">";
   brackets << "[" << "]";
+  brackets << "\"" << "\"";
   brackets << "<link>" << "</link>";
 
   for (int i = 0; i < brackets.count(); i += 2)
@@ -139,19 +140,40 @@
   urls << "user:pass@www.kde.org:1234/sub/path";
   urls << "user:pass@www.kde.org:1234/sub/path?a=1";
   urls << "user:pass@www.kde.org:1234/sub/path?a=1#anchor";
+  urls << "user:pass@www.kde.org:1234/sub/\npath  \n /long/  path \t  ?a=1#anchor";
   urls << "user:pass@www.kde.org:1234/sub/path/special(123)?a=1#anchor";
   urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor";
+  urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla";
 +  urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]";
 +  urls << "user:pass@www.kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]";
 +  urls << "user:pass@www.kde.org:1234/  \n  sub/path:with:colon/  \n\t   \t   \
special(123)?\n\t  \n\t   a=1#anchor[bla]";  
   foreach (QString schema, schemas)
   {
     foreach (QString url, urls)
     {
+      // by defintion: if the URL is enclosed in brackets, the URL itself is not \
allowed +      // to contain the closing bracket, as this would be detected as the \
end of the URL +      if ( ( left.length() == 1 ) && ( url.contains( right[0] ) ) )
+        continue;
+
+      // if the url contains a whitespace, it must be enclosed with brackets
+      if ( (url.contains('\n') || url.contains('\t') || url.contains(' ')) &&
+           left.isEmpty() )
+        continue;
+
       QString test(left + schema + url + right);
       LinkLocator ll(test, left.length());
       QString gotUrl = ll.getUrl();
 
+      // we want to have the url without whitespace
+      url.remove(' ');
+      url.remove('\n');
+      url.remove('\t');
+
       bool ok = ( gotUrl == (schema + url) );
       //qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (schema + \
url); +      if ( !ok ) qDebug() << "got:" << gotUrl;
       QVERIFY2( ok, qPrintable(test) );
     }
   }
@@ -163,6 +185,10 @@
   urlsWithoutSchema << ".kde.org:1234/sub/path?a=1#anchor";
   urlsWithoutSchema << ".kde.org:1234/sub/path/special(123)?a=1#anchor";
   urlsWithoutSchema << ".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor";
+  urlsWithoutSchema << \
".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla"; +  urlsWithoutSchema \
<< ".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]"; +  \
urlsWithoutSchema << \
".kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]"; +  \
urlsWithoutSchema << ".kde.org:1234/  \n  sub/path:with:colon/  \n\t   \t   \
special(123)?\n\t  \n\t   a=1#anchor[bla]";  
   QStringList starts;
   starts << "www" << "ftp" << "news:www";
@@ -171,16 +197,55 @@
   {
     foreach (QString url, urlsWithoutSchema)
     {
+      // by defintion: if the URL is enclosed in brackets, the URL itself is not \
allowed +      // to contain the closing bracket, as this would be detected as the \
end of the URL +      if ( ( left.length() == 1 ) && ( url.contains( right[0] ) ) )
+        continue;
+
+      // if the url contains a whitespace, it must be enclosed with brackets
+      if ( (url.contains('\n') || url.contains('\t') || url.contains(' ')) &&
+           left.isEmpty() )
+        continue;
+
       QString test(left + start + url + right);
       LinkLocator ll(test, left.length());
       QString gotUrl = ll.getUrl();
 
+      // we want to have the url without whitespace
+      url.remove(' ');
+      url.remove('\n');
+      url.remove('\t');
+
       bool ok = ( gotUrl == (start + url) );
       //qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (start + \
                url);
-      QVERIFY2( ok, qPrintable(test) );
+      if ( !ok ) qDebug() << "got:" << gotUrl;
+      QVERIFY2( ok, qPrintable(gotUrl) );
     }
   }
 
+  // test max url length
+  QString url = "http://www.kde.org/this/is/a_very_loooooong_url/test/test/test";
+  {
+    LinkLocator ll(url);
+    ll.setMaxUrlLen(10);
+    QVERIFY( ll.getUrl().isEmpty() );  // url too long
+  }
+  {
+    LinkLocator ll(url);
+    ll.setMaxUrlLen(url.length() - 1);
+    QVERIFY( ll.getUrl().isEmpty() );  // url too long
+  }
+  {
+    LinkLocator ll(url);
+    ll.setMaxUrlLen(url.length());
+    QVERIFY( ll.getUrl() == url );
+  }
+  {
+    LinkLocator ll(url);
+    ll.setMaxUrlLen(url.length() + 1);
+    QVERIFY( ll.getUrl() == url );
+  }
+
   // mailto
   {
     QString addr = "mailto:test@kde.org";
@@ -191,7 +256,8 @@
 
     bool ok = ( gotUrl == addr );
     //qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << addr;
-    QVERIFY2( ok, qPrintable(test) );
+    if ( !ok ) qDebug() << "got:" << gotUrl;
+    QVERIFY2( ok, qPrintable(gotUrl) );
   }
 }
 


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic