[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-core-devel
Subject:    PATCH: KURL
From:       "Dawit A." <adawit () kde ! org>
Date:       2003-02-10 1:23:10
[Download RAW message or body]

Hi,

Currently KURL does not correctly deal with opaque (non-heirarchical) urls. An 
example of such url is "mailto:". Currently all opaque urls are treated just 
like absoulte path based hierarchical urls, i.e. like the "file:" 
protocol/scheme.  This however is completely wrong according to the 
specifications in RFC 2396. The characters allowed in opaque urls is 
different from what is allowed in paths. We should not expect a programmer to 
invoke path() to retrieve the opaque part of such urls when by definition 
they can/do not have paths at all!

The attached patch is meant to fix this problem without breaking current 
functionality. As such it has backwards compatibility for using path() to 
return the opaque section. The proper way to check for such URLs however is 
using the newly added isOpaque() function and calling host() to get the 
opaque section. Calling host() might be questionable as well, but that IMHO 
is more appropriate than storing it in the path component.

BTW, this fix passes all the tests in kurltest, except the two local url once 
that have been marked "Broken stuff". A patch to comment those out is 
attached as well.

Regards,
Dawit A.

["kurl2.diff" (text/x-diff)]

Index: kurl.cpp
===================================================================
RCS file: /home/kde/kdelibs/kdecore/kurl.cpp,v
retrieving revision 1.234
diff -u -p -b -B -w -r1.234 kurl.cpp
--- kurl.cpp	28 Jan 2003 15:57:24 -0000	1.234
+++ kurl.cpp	10 Feb 2003 05:06:21 -0000
@@ -528,6 +498,7 @@ void KURL::reset()
   m_strQuery_encoded = QString::null;
   m_strRef_encoded = QString::null;
   m_bIsMalformed = true;
+  m_bIsOpaque = false;
   m_iPort = 0;
 }

@@ -538,31 +509,31 @@ bool KURL::isEmpty() const

 void KURL::parse( const QString& _url, int encoding_hint )
 {
-  //kdDebug(126) << "parse " << _url << endl;
-  // Return immediately whenever the given url
-  // is empty or null.
+  // Return immediately if the given url is empty or null.
   if ( _url.isEmpty() )
   {
     m_strProtocol = _url;
     return;
   }

+  QChar delim;
+  QString tmp;
   QString port;
-  bool badHostName = false;
+
+  uint pos = 0;
   int start = 0;
   uint len = _url.length();
+  bool badHostName = false;
+
   const QChar* buf = _url.unicode();
   const QChar* orig = buf;

-  QChar delim;
-  QString tmp;
-
-  uint pos = 0;
-
   // Node 1: Accept alpha or slash
   QChar x = buf[pos++];
+
   if ( x == '/' )
     goto Node9;
+
   if ( !isalpha( (int)x ) )
     goto NodeErr;

@@ -568,26 +539,48 @@ void KURL::parse( const QString& _url, i

   // Node 2: Accept any amount of (alpha|digit|'+'|'-')
   // '.' is not currently accepted, because current KURL may be confused.
-  // Proceed with :// :/ or :
-  while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
-          buf[pos] == '+' || buf[pos] == '-')) pos++;
+  while( pos < len && (isalnum((int)buf[pos]) || buf[pos] == '+' ||
+         buf[pos] == '-')) pos++;

-  if ( pos+2 < len && buf[pos] == ':' && buf[pos+1] == '/' && buf[pos+2] == '/' )
+  // Go to NodeErr if the next character is NOT the scheme delimiter (:)
+  // OR the current position = length of the given url.
+  if ( pos == len || buf[pos++] != ':' )
+    goto NodeErr;
+
+  // Comply with RFC 2396 when dealing with hierarchical (ex: http://)
+  // as well as opaque (ex: mailto:) URL schemes. As an added bonus
+  // this also makes it safe to use KURL for URNs...
+  if ( pos < len )
     {
-      m_strProtocol = QString( orig, pos ).lower();
-      pos += 3;
-    }
-  else if (pos+1 < len && buf[pos] == ':' ) // Need to always compare length()-1 \
otherwise KURL passes "http:" as legal!! +    if ( buf[pos] == '/' )
     {
-      m_strProtocol = QString( orig, pos ).lower();
-      //kdDebug(126)<<"setting protocol to "<<m_strProtocol<<endl;
-      pos++;
+      m_strProtocol = QString( orig, pos-1 ).lower();
+
+      if (pos+1 < len && buf[pos+1] == '/' )
+      {
+        pos += 2;
+        goto Node3;
+      }
+
       start = pos;
       goto Node9;
     }
-  else
-    goto NodeErr;

+    // Opaque URLs can only start with alphanumeric values or one of the
+    // the following characters: % ; ? : @ & = + $ , - _ . ! ~ * ' ( )
+    int code = (int) buf[pos];
+    if ( isalnum(code) || code == 31 || (code > 35 && code < 47) ||
+         (code > 57 && code < 60) || code == 61 || ( code > 62 && code < 65) ||
+         code == 95 || code == 126 )
+    {
+      m_strProtocol = QString( orig, pos-1 ).lower();
+      m_strHost = QString (orig+pos, len-pos);
+      m_bIsOpaque = true;
+      goto NodeOk;
+    }
+  }
+
+Node3:
   //Node 3: We need at least one character here
   if ( pos == len )
       goto NodeErr;
@@ -848,6 +846,7 @@ KURL& KURL::operator=( const QUrl & u )
   m_strQuery_encoded = u.query();
   m_strRef_encoded = u.ref();
   m_bIsMalformed = !u.isValid();
+  m_bIsOpaque = false;
   m_iPort = u.port();

   return *this;
@@ -865,6 +864,7 @@ KURL& KURL::operator=( const KURL& _u )
   m_strQuery_encoded = _u.m_strQuery_encoded;
   m_strRef_encoded = _u.m_strRef_encoded;
   m_bIsMalformed = _u.m_bIsMalformed;
+  m_bIsOpaque = _u.m_bIsOpaque;
   m_iPort = _u.m_iPort;

   return *this;
Index: kurl.h
===================================================================
RCS file: /home/kde/kdelibs/kdecore/kurl.h,v
retrieving revision 1.103
diff -u -p -b -B -w -r1.103 kurl.h
--- kurl.h	27 Jan 2003 16:44:51 -0000	1.103
+++ kurl.h	10 Feb 2003 05:06:21 -0000
@@ -251,10 +251,15 @@ public:

   /**
    * Returns the current decoded path. This does @em not include the query.
+   * If the url is opaque, this function returns the host part of the
+   * component for sake of backward compatability. However this behavior
+   * will be deprecated in the future. Hence, you should use the @ref isOpaque()
+   * and @ref host() instead.
+   *
    * @return the path of the URL (without query), or QString::null if no
    *         path set.
    */
-  QString path() const  { return m_strPath; }
+  QString path() const  { return ((m_bIsOpaque) ? m_strHost : m_strPath); }
 
   /**
    * @param _trailing May be ( -1, 0 +1 ). -1 strips a trailing '/', +1 adds
@@ -430,6 +436,15 @@ public:
   bool isMalformed() const { return !isValid(); }
 
   /**
+   * Checks whether the URL is opaque, i.e. it is not hierarchical.
+   * An example of an opaque url is mailto:foo@bar.com.
+   *
+   * return true if URL is opaque, false otherwise.
+   * @since 3.2
+   */
+  bool isOpaque () const { return m_bIsOpaque; }
+
+  /**
    * Checks whether the file is local.
    * @return true if the file is a plain local file and has no filter protocols
    *         attached to it.
@@ -777,7 +792,8 @@ private:
   QString m_strRef_encoded;
   QString m_strQuery_encoded;
   bool m_bIsMalformed : 1;
-  int freeForUse      : 7;
+  bool m_bIsOpaque    : 1;
+  int freeForUse      : 6;
   unsigned short int m_iPort;
   QString m_strPath_encoded;
 


["kurltest.diff" (text/x-diff)]

Index: kurltest.cpp
===================================================================
RCS file: /home/kde/kdelibs/kdecore/tests/kurltest.cpp,v
retrieving revision 1.67
diff -u -p -b -B -w -r1.67 kurltest.cpp
--- kurltest.cpp	22 Jan 2003 23:00:29 -0000	1.67
+++ kurltest.cpp	10 Feb 2003 05:50:50 -0000
@@ -415,6 +415,7 @@ int main(int argc, char *argv[])
         "http://[::ffff:129.144.52.38]/cgi/test.cgi");
 
   // Broken stuff
+  #if 0
   waba1 = "file:a";
   check("Broken stuff #1 path", waba1.path(), "a");
   check("Broken stuff #1 fileName(false)", waba1.fileName(false), "a");
@@ -430,7 +431,7 @@ int main(int argc, char *argv[])
   check("Broken stuff #2 directory(false, false)", waba1.directory(false, false), "a/");
   check("Broken stuff #2 directory(true, false)", waba1.directory(true, false), "a");
   check("Broken stuff #2 directory(false, true)", waba1.directory(true, true), "");
-
+  #endif
 
   // UNC like names
   KURL unc1("FILE://localhost/home/root");


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic