[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-core-devel
Subject:    KURL-patch
From:       Tobias Anton <TA () ESC-Electronics ! de>
Date:       2001-08-10 20:53:21
[Download RAW message or body]

This patch allows to reverse the order of query and fragment identifier.

broken urls like this:

http://www.yahoo.com/index.html#angels?ser=15

can fault-tolerantly be parsed into correct urls like this one:

http://www.yahoo.com/index.html?ser=15#angels

A mozilla testcase exposed this behaviour.

According to the rfc, the query precedes the fragment identifier,
but many people don't know that. Since query and fragment identifier are 
always encoded, no incompatibilities should arise from the patch.

Any objections?

Tobias
["kurl.patch" (text/x-diff)]

Index: kurl.cpp
===================================================================
RCS file: /home/kde/kdelibs/kdecore/kurl.cpp,v
retrieving revision 1.168
diff -u -r1.168 kurl.cpp
--- kurl.cpp	2001/07/19 07:42:41	1.168
+++ kurl.cpp	2001/07/28 11:36:43
@@ -188,7 +188,7 @@
     unsigned int character = segment[ i++ ].unicode();
     if ((character == ' ') || (character > 255))
        bKeepEncoded = false;
-    if (character == '%' ) 
+    if (character == '%' )
     {
       char a = i+1 < old_length ? hex2int( segment[i].latin1() ) : -1;
       char b = i+1 < old_length ? hex2int( segment[i+1].latin1() ) : -1;
@@ -265,7 +265,7 @@
   // RFC 2396!! (dA)
   QString result;
   int cdUp, orig_pos, pos;
-  
+
   cdUp = 0;
   pos = orig_pos = len;
   while ( pos && (pos = path.findRev('/',--pos)) != -1 )
@@ -278,7 +278,7 @@
       // Ignore any occurances of '.' This includes entries
       // that simply do not make sense like /..../
       if ( (len!=0 || !cleanDirSeparator) && (len != 1 || path[pos+1] != '.') )
-      {	    
+      {
         if ( !cdUp )
           result = path.mid(pos, len+1) + result;
         else
@@ -287,10 +287,10 @@
     }
     orig_pos = pos;
   }
-    
+
   if ( result.isEmpty() )
     result = "/";
-  
+
   // Restore the trailing '/'
   len = result.length();
   if ( len > 0 && result.right(1)[0] != '/' && slash )
@@ -506,6 +506,7 @@
 
 void KURL::parse( const QString& _url, int encoding_hint )
 {
+    kdDebug(126) << "parse " << _url << endl;
   // Return immediately whenever the given url
   // is empty or null.
   if ( _url.isEmpty() )
@@ -521,6 +522,9 @@
   QChar* orig = buf;
   memcpy( buf, _url.unicode(), len * sizeof( QChar ) );
 
+  QChar delim;
+  QString tmp;
+
   uint pos = 0;
 
   // Node 1: Accept alpha or slash
@@ -554,7 +558,7 @@
     {
       m_strProtocol = QString( orig, pos ).lower();
       pos++;
-      goto Node11;
+      goto Node9;
     }
   else
     goto NodeErr;
@@ -566,10 +570,10 @@
 
   // Node 4: Accept any amount of characters.
   if (buf[pos] == '[')     // An IPv6 host follows.
-      goto Node8;  
+      goto Node8;
   // Terminate on / or @ or ? or #
   x = buf[pos];
-  while( (x != ':') && (x != '@') && (x != '/') && (x != '?') && (x != '#') && (pos < len) ) 
+  while( (x != ':') && (x != '@') && (x != '/') && (x != '?') && (x != '#') && (pos < len) )
      x = buf[++pos];
   if ( pos == len )
     {
@@ -605,9 +609,9 @@
   start = pos++;
 
   // Node 6: Read everything until @, /, ? or #
-  while( (pos < len) && 
-		(buf[pos] != '@') && 
-		(buf[pos] != '/') && 
+  while( (pos < len) &&
+		(buf[pos] != '@') &&
+		(buf[pos] != '/') &&
 		(buf[pos] != '?') &&
 		(buf[pos] != '#')) pos++;
   // If we now have a '@' the ':' seperates user and password.
@@ -647,8 +651,8 @@
     if (pos < len) pos++; // Skip ']'
     if (pos == len)
        goto NodeOk;
-  } 
-  else 
+  }
+  else
   {
     // Non IPv6 address
     start = pos++;
@@ -659,7 +663,7 @@
     {
        m_strHost = decode(QString( buf + start, pos - start ), 0, encoding_hint);
        goto NodeOk;
-    } 
+    }
     m_strHost = decode(QString( buf + start, pos - start ), 0, encoding_hint);
   }
   x = buf[pos];
@@ -686,37 +690,40 @@
   if ( pos == len )
     goto NodeOk;
   start = pos++;
+
+ Node9: // parse path until query or reference reached
+
+  while( buf[pos] != '#' && buf[pos]!='?' && pos < len ) pos++;
 
-  // Node 9: Accept any character and # or terminate
- Node9:
-  while( buf[pos] != '#' && pos < len ) pos++;
+  tmp = QString( buf + start, pos - start );
+  setEncodedPathAndQuery( tmp, encoding_hint );
+
   if ( pos == len )
-    {
-      QString tmp( buf + start, len - start );
-      setEncodedPathAndQuery( tmp, encoding_hint );
-      // setEncodedPathAndQuery( QString( buf + start, pos - start ) );
       goto NodeOk;
-    }
-  else if ( buf[pos] != '#' )
-    goto NodeErr;
-  setEncodedPathAndQuery( QString( buf + start, pos - start ), encoding_hint );
-  pos++;
 
-  // Node 10: Accept all the rest
-  m_strRef_encoded = QString( buf + pos, len - pos );
-  goto NodeOk;
+  start = pos + 1;
 
-  // Node 11 We need at least one character
- Node11:
-  start = pos;
-  if ( pos++ == len )
-    goto NodeOk; // Wrong, but since a fix was applied up top it is a non-issue here!!!!
-                 // Just for the record an opaque URL such as "mailto:" is always required
-                 // to have at least one more character other than a '/' following the colon.
-  // Node 12: Accept the res
-  setEncodedPathAndQuery( QString( buf + start, len - start ), encoding_hint );
-  goto NodeOk;
+ Node10: // parse query or reference depending on what comes first
+  delim = (buf[pos++]=='#'?'?':'#');
 
+  while(buf[pos]!=delim && pos < len) pos++;
+
+  tmp = QString(buf + start, pos - start);
+  if (delim=='#')
+      m_strQuery_encoded = tmp;
+  else
+      m_strRef_encoded = tmp;
+
+  if (pos == len)
+      goto NodeOk;
+
+ Node11: // feed the rest into the remaining variable
+  tmp = QString( buf + pos + 1, len - pos - 1);
+  if (delim == '#')
+      m_strRef_encoded = tmp;
+  else
+      m_strQuery_encoded = tmp;
+
  NodeOk:
   delete []orig;
   m_bIsMalformed = false; // Valid URL
@@ -981,10 +988,22 @@
   }
 
   // TODO apply encoding_hint to the query
-  tmp += m_strQuery_encoded;
+  if (!m_strQuery_encoded.isNull())
+      tmp += '?' + m_strQuery_encoded;
   return tmp;
 }
 
+void KURL::setEncodedPath( const QString& _txt, int encoding_hint )
+{
+    m_strPath_encoded = _txt;
+
+  bool keepEncoded;
+  m_strPath = decode( m_strPath_encoded, &keepEncoded, encoding_hint );
+  if (!keepEncoded)
+     m_strPath_encoded = QString::null;
+}
+
+
 void KURL::setEncodedPathAndQuery( const QString& _txt, int encoding_hint )
 {
   int pos = _txt.find( '?' );
@@ -996,7 +1015,7 @@
   else
   {
     m_strPath_encoded = _txt.left( pos );
-    m_strQuery_encoded = _txt.right(_txt.length() - pos);
+    m_strQuery_encoded = _txt.right(_txt.length() - pos + 1);
   }
   bool keepEncoded;
   m_strPath = decode( m_strPath_encoded, &keepEncoded, encoding_hint );
@@ -1063,7 +1082,7 @@
       u += "@";
     }
     bool IPv6 = (m_strHost.find(':') != -1);
-    if (IPv6) 
+    if (IPv6)
        u += '[' + m_strHost + ']';
     else
        u += encode(m_strHost, true, encoding_hint);
@@ -1108,7 +1127,7 @@
       u += "@";
     }
     bool IPv6 = (m_strHost.find(':') != -1);
-    if (IPv6) 
+    if (IPv6)
        u += '[' + m_strHost + ']';
     else
        u += lazy_encode(m_strHost);
@@ -1124,8 +1143,8 @@
   }
 
   u += trailingSlash( _trailing, lazy_encode( m_strPath ) );
-
-  u += m_strQuery_encoded;
+  if (!m_strQuery_encoded.isNull())
+      u += '?' + m_strQuery_encoded;
 
   if ( hasRef() )
   {
@@ -1270,7 +1289,7 @@
 
   int i = result.findRev( "/" );
   // If ( i == -1 ) => the first character is not a '/'
-  // So it's some URL like file:blah.tgz, with no path 
+  // So it's some URL like file:blah.tgz, with no path
   if ( i == -1 )
     return QString::null;
 
@@ -1459,8 +1478,10 @@
 
 void KURL::setQuery( const QString &_txt, int )
 {
-   if (_txt.length() && (_txt[0] !='?'))
-      m_strQuery_encoded = "?" + _txt;
+   if (!_txt.length())
+       return;
+   if (_txt[0] =='?')
+      m_strQuery_encoded = _txt.mid(1);
    else
       m_strQuery_encoded = _txt;
 }
Index: kurl.h
===================================================================
RCS file: /home/kde/kdelibs/kdecore/kurl.h,v
retrieving revision 1.76
diff -u -r1.76 kurl.h
--- kurl.h	2001/07/19 07:42:41	1.76
+++ kurl.h	2001/07/28 11:36:53
@@ -229,6 +229,8 @@
    */
   void setEncodedPathAndQuery( const QString& _txt, int encoding_hint = 0 );
 
+  void setEncodedPath(const QString& _txt, int encoding_hint = 0 );
+
   /**
    * @return The concatenation if the encoded path , '?' and the encoded query.
    *


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic