[prev in list] [next in list] [prev in thread] [next in thread] 

List:       squirrelmail-cvs
Subject:    [SM-CVS] SF.net SVN: squirrelmail:[13902]
From:       pdontthink () users ! sourceforge ! net
Date:       2010-02-04 20:13:23
Message-ID: E1Nd84p-00062x-GO () sfp-svn-6 ! v30 ! ch3 ! sourceforge ! com
[Download RAW message or body]

Revision: 13902
          http://squirrelmail.svn.sourceforge.net/squirrelmail/?rev=13902&view=rev
Author:   pdontthink
Date:     2010-02-04 20:13:23 +0000 (Thu, 04 Feb 2010)

Log Message:
-----------
Multibyte strings (notably subjects) are now handled correctly (#2824813, #2925731)

Modified Paths:
--------------
    branches/SM-1_4-STABLE/squirrelmail/doc/ChangeLog
    branches/SM-1_4-STABLE/squirrelmail/functions/mailbox_display.php
    branches/SM-1_4-STABLE/squirrelmail/functions/strings.php

Modified: branches/SM-1_4-STABLE/squirrelmail/doc/ChangeLog
===================================================================
--- branches/SM-1_4-STABLE/squirrelmail/doc/ChangeLog	2010-02-04 20:05:51 UTC (rev \
                13901)
+++ branches/SM-1_4-STABLE/squirrelmail/doc/ChangeLog	2010-02-04 20:13:23 UTC (rev \
13902) @@ -14,6 +14,8 @@
   - Make base URL autodetection more robust; fixes some lighttpd issues
     (probably #1741469).
   - Encoded From headers now properly quoted (#2830141).
+  - Multibyte strings (notably subjects) are now handled correctly (#2824813,
+    #2925731).
 
 Version 1.4.20 RC2 - 17 Aug 2009
 --------------------------------

Modified: branches/SM-1_4-STABLE/squirrelmail/functions/mailbox_display.php
===================================================================
--- branches/SM-1_4-STABLE/squirrelmail/functions/mailbox_display.php	2010-02-04 \
                20:05:51 UTC (rev 13901)
+++ branches/SM-1_4-STABLE/squirrelmail/functions/mailbox_display.php	2010-02-04 \
20:13:23 UTC (rev 13902) @@ -201,7 +201,7 @@
     $col = 0;
     $msg['SUBJECT'] = decodeHeader($msg['SUBJECT']);
 //    $subject = processSubject($msg['SUBJECT'], $indent_array[$msg['ID']]);
-    $subject = truncateWithEntities(str_replace(' ',' ',$msg['SUBJECT']), \
$truncate_subject); +    $subject = sm_truncate_string(str_replace(' ',' \
',$msg['SUBJECT']), $truncate_subject, '...', TRUE);  if (sizeof($index_order)) {
         foreach ($index_order as $index_order_part) {
             switch ($index_order_part) {
@@ -217,7 +217,7 @@
                 $from_xtra = 'title="' . $senderFrom . '"';
                 echo html_tag( 'td',
                     html_tag('label',
-                               $italic . $bold . $flag . $fontstr . \
truncateWithEntities($senderName, $truncate_sender) . +                               \
$italic . $bold . $flag . $fontstr . sm_truncate_string($senderName, \
                $truncate_sender, '...', TRUE) .
                                $fontstr_end . $flag_end . $bold_end . $italic_end,
                            '','','for="msg'.$msg['ID'].'"'),
                            'left',

Modified: branches/SM-1_4-STABLE/squirrelmail/functions/strings.php
===================================================================
--- branches/SM-1_4-STABLE/squirrelmail/functions/strings.php	2010-02-04 20:05:51 UTC \
                (rev 13901)
+++ branches/SM-1_4-STABLE/squirrelmail/functions/strings.php	2010-02-04 20:13:23 UTC \
(rev 13902) @@ -138,51 +138,85 @@
 }
 
 /**
- * Truncates a string and take care of html encoded characters
- *
- * @param string  $s string to truncate
- * @param int $iTrimAt Trim at nn characters
- * @return string  Trimmed string
- */
-function truncateWithEntities($s, $iTrimAt) {
-    global $languages, $squirrelmail_language;
+  * Truncates the given string so that it has at
+  * most $max_chars characters.  NOTE that a "character"
+  * may be a multibyte character, or (optionally), an
+  * HTML entity , so this function is different than
+  * using substr() or mb_substr().
+  * 
+  * NOTE that if $elipses is given and used, the returned
+  *      number of characters will be $max_chars PLUS the
+  *      length of $elipses
+  * 
+  * @param string  $string    The string to truncate
+  * @param int     $max_chars The maximum allowable characters
+  * @param string  $elipses   A string that will be added to
+  *                           the end of the truncated string
+  *                           (ONLY if it is truncated) (OPTIONAL;
+  *                           default not used)
+  * @param boolean $html_entities_as_chars Whether or not to keep
+  *                                        HTML entities together
+  *                                        (OPTIONAL; default ignore
+  *                                        HTML entities)
+  *
+  * @return string The truncated string
+  *
+  * @since 1.4.20 and 1.5.2 (replaced truncateWithEntities())
+  *
+  */
+function sm_truncate_string($string, $max_chars, $elipses='',
+                            $html_entities_as_chars=FALSE)
+{
 
-    $ent_strlen = strlen($s);
-    if (($iTrimAt <= 0) || ($ent_strlen <= $iTrimAt))
-        return $s;
+   // if the length of the string is less than
+   // the allowable number of characters, just
+   // return it as is (even if it contains any
+   // HTML entities, that would just make the
+   // actual length even smaller)
+   //
+   $actual_strlen = sq_strlen($string, 'auto');
+   if ($max_chars <= 0 || $actual_strlen <= $max_chars)
+      return $string;
 
 
-    if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
-        function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
-            return $languages[$squirrelmail_language]['XTRA_CODE']('strimwidth', $s, \
                $iTrimAt);
-    } else {
-        /*
-         * see if this is entities-encoded string
-         * If so, Iterate through the whole string, find out
-         * the real number of characters, and if more
-         * than $iTrimAt, substr with an updated trim value.
-         */
-        $trim_val = $iTrimAt;
-        $ent_offset = 0;
-        $ent_loc = 0;
-        while ( $ent_loc < $trim_val && (($ent_loc = strpos($s, '&', $ent_offset)) \
                !== false) &&
-                (($ent_loc_end = strpos($s, ';', $ent_loc+3)) !== false) ) {
-            $trim_val += ($ent_loc_end-$ent_loc);
-            $ent_offset  = $ent_loc_end+1;
-        }
+   // if needed, count the number of HTML entities in
+   // the string up to the maximum character limit,
+   // pushing that limit up for each entity found
+   //
+   $adjusted_max_chars = $max_chars;
+   if ($html_entities_as_chars)
+   {
 
-        if (($trim_val > $iTrimAt) && ($ent_strlen > $trim_val) && \
                (strpos($s,';',$trim_val) < ($trim_val + 6))) {
-            $i = strpos($s,';',$trim_val);
-            if ($i !== false) {
-                $trim_val = strpos($s,';',$trim_val)+1;
-            }
-        }
-        // only print '...' when we're actually dropping part of the subject
-        if ($ent_strlen <= $trim_val)
-            return $s;
-    }
+      $entity_pos = -1;
+      while (($entity_pos = sq_strpos($string, '&', $entity_pos + 1)) !== FALSE
+          && ($entity_end_pos = sq_strpos($string, ';', $entity_pos)) !== FALSE
+          && $entity_pos <= $adjusted_max_chars)
+      {
+         $adjusted_max_chars += $entity_end_pos - $entity_pos;
+      }
 
-    return substr_replace($s, '...', $trim_val);
+
+      // this isn't necessary because sq_substr() would figure this
+      // out anyway, but we can avoid a sq_substr() call and we
+      // know that we don't have to add an elipses (this is now
+      // an accurate comparison, since $adjusted_max_chars, like
+      // $actual_strlen, does not take into account HTML entities)
+      //
+      if ($actual_strlen <= $adjusted_max_chars)
+         return $string;
+
+   }
+
+
+   // get the truncated string
+   //
+   $truncated_string = sq_substr($string, 0, $adjusted_max_chars);
+
+
+   // return with added elipses
+   //
+   return $truncated_string . $elipses;
+
 }
 
 /**
@@ -768,35 +802,357 @@
  * @since 1.5.1 and 1.4.6
  * @return integer number of characters in string
  */
-function sq_strlen($str, $charset=null){
-    // default option
-    if (is_null($charset)) return strlen($str);
+function sq_strlen($string, $charset=NULL){
 
-    // lowercase charset name
-    $charset=strtolower($charset);
+   // NULL charset?  Just use strlen()
+   //
+   if (is_null($charset))
+      return strlen($string);
 
-    // use automatic charset detection, if function call asks for it
-    if ($charset=='auto') {
-        global $default_charset;
-        set_my_charset();
-        $charset=$default_charset;
-    }
 
-    // Use mbstring only with listed charsets
-    $aList_of_mb_charsets=array('utf-8','big5','gb2312','gb18030','euc-jp','euc-cn','euc-tw','euc-kr');
 +   // use current character set?
+   //
+   if ($charset == 'auto')
+   {
+//FIXME: is there any reason why this cannot be a global flag used by all string \
wrapper functions? +      static $auto_charset;
+      if (!isset($auto_charset))
+      {
+         global $default_charset, $squirrelmail_language;
+         set_my_charset();
+         $auto_charset = $default_charset;
+         if ($squirrelmail_language == 'ja_JP') $auto_charset = 'euc-jp';
+      }
+      $charset = $auto_charset;
+   }
 
-    // calculate string length according to charset
-    if (in_array($charset,$aList_of_mb_charsets) && \
                in_array($charset,sq_mb_list_encodings())) {
-        $real_length = mb_strlen($str,$charset);
-    } else {
-        // own strlen detection code is removed because missing strpos,
-        // strtoupper and substr implementations break string wrapping.
-        $real_length=strlen($str);
-    }
-    return $real_length;
+
+   // standardize character set name
+   //
+   $charset = strtolower($charset);
+
+
+/* ===== FIXME: this list is not used in 1.5.x, but if we need it, unless this \
differs between all our string function wrappers, we should store this info in the \
session +   // only use mbstring with the following character sets
+   //
+   $sq_strlen_mb_charsets = array(
+      'utf-8',
+      'big5',
+      'gb2312',
+      'gb18030',
+      'euc-jp',
+      'euc-cn',
+      'euc-tw',
+      'euc-kr'
+   );
+
+
+   // now we can use mb_strlen() if needed
+   //
+   if (in_array($charset, $sq_strlen_mb_charsets)
+    && in_array($charset, sq_mb_list_encodings()))
+===== */
+//FIXME: is there any reason why this cannot be a static global array used by all \
string wrapper functions? +   if (in_array($charset, sq_mb_list_encodings()))
+      return mb_strlen($string, $charset);
+
+
+   // else use normal strlen()
+   //
+   return strlen($string);
+
 }
 
 /**
+  * This is a replacement for PHP's strpos() that is
+  * multibyte-aware.
+  *
+  * @param string $haystack The string to search within
+  * @param string $needle   The substring to search for
+  * @param int    $offset   The offset from the beginning of $haystack
+  *                         from which to start searching
+  *                         (OPTIONAL; default none)
+  * @param string $charset  The charset of the given string.  A value of NULL
+  *                         here will force the use of PHP's standard strpos().
+  *                         (OPTIONAL; default is "auto", which indicates that
+  *                         the user's current charset should be used).
+  *
+  * @return mixed The integer offset of the next $needle in $haystack,
+  *               if found, or FALSE if not found
+  *
+  */
+function sq_strpos($haystack, $needle, $offset=0, $charset='auto')
+{
+
+   // NULL charset?  Just use strpos()
+   //
+   if (is_null($charset))
+      return strpos($haystack, $needle, $offset);
+
+
+   // use current character set?
+   //
+   if ($charset == 'auto')
+   {
+//FIXME: is there any reason why this cannot be a global flag used by all string \
wrapper functions? +      static $auto_charset;
+      if (!isset($auto_charset))
+      {
+         global $default_charset, $squirrelmail_language;
+         set_my_charset();
+         $auto_charset = $default_charset;
+         if ($squirrelmail_language == 'ja_JP') $auto_charset = 'euc-jp';
+      }
+      $charset = $auto_charset;
+   }
+
+
+   // standardize character set name
+   //
+   $charset = strtolower($charset);
+
+
+/* ===== FIXME: this list is not used in 1.5.x, but if we need it, unless this \
differs between all our string function wrappers, we should store this info in the \
session +   // only use mbstring with the following character sets
+   //
+   $sq_strpos_mb_charsets = array(
+      'utf-8',
+      'big5',
+      'gb2312',
+      'gb18030',
+      'euc-jp',
+      'euc-cn',
+      'euc-tw',
+      'euc-kr'
+   );
+
+
+   // now we can use mb_strpos() if needed
+   //
+   if (in_array($charset, $sq_strpos_mb_charsets)
+    && in_array($charset, sq_mb_list_encodings()))
+===== */
+//FIXME: is there any reason why this cannot be a static global array used by all \
string wrapper functions? +   if (in_array($charset, sq_mb_list_encodings()))
+      return mb_strpos($haystack, $needle, $offset, $charset);
+
+
+   // else use normal strpos()
+   //
+   return strpos($haystack, $needle, $offset);
+
+}
+
+/**
+  * This is a replacement for PHP's substr() that is
+  * multibyte-aware.
+  *
+  * @param string $string  The string to operate upon
+  * @param int    $start   The offset at which to begin substring extraction
+  * @param int    $length  The number of characters after $start to return
+  *                        NOTE that if you need to specify a charset but
+  *                        want to achieve normal substr() behavior where
+  *                        $length is not specified, use NULL (OPTIONAL;
+  *                        default from $start to end of string)
+  * @param string $charset The charset of the given string.  A value of NULL
+  *                        here will force the use of PHP's standard substr().
+  *                        (OPTIONAL; default is "auto", which indicates that
+  *                        the user's current charset should be used).
+  *
+  * @return string The desired substring
+  *
+  * Of course, you can use more advanced (e.g., negative) values
+  * for $start and $length as needed - see the PHP manual for more
+  * information:  http://www.php.net/manual/function.substr.php
+  *
+  */
+function sq_substr($string, $start, $length=NULL, $charset='auto')
+{
+
+   // if $length is NULL, use the full string length...
+   // we have to do this to mimick the use of substr()
+   // where $length is not given
+   //
+   if (is_null($length))
+      $length = sq_strlen($length, $charset);
+
+   
+   // NULL charset?  Just use substr()
+   //
+   if (is_null($charset))
+      return substr($string, $start, $length);
+
+
+   // use current character set?
+   //
+   if ($charset == 'auto')
+   {
+//FIXME: is there any reason why this cannot be a global flag used by all string \
wrapper functions? +      static $auto_charset;
+      if (!isset($auto_charset))
+      {
+         global $default_charset, $squirrelmail_language;
+         set_my_charset();
+         $auto_charset = $default_charset;
+         if ($squirrelmail_language == 'ja_JP') $auto_charset = 'euc-jp';
+      }
+      $charset = $auto_charset;
+   }
+
+
+   // standardize character set name
+   //
+   $charset = strtolower($charset);
+
+
+/* ===== FIXME: this list is not used in 1.5.x, but if we need it, unless this \
differs between all our string function wrappers, we should store this info in the \
session +   // only use mbstring with the following character sets
+   //
+   $sq_substr_mb_charsets = array(
+      'utf-8',
+      'big5',
+      'gb2312',
+      'gb18030',
+      'euc-jp',
+      'euc-cn',
+      'euc-tw',
+      'euc-kr'
+   );
+
+
+   // now we can use mb_substr() if needed
+   //
+   if (in_array($charset, $sq_substr_mb_charsets)
+    && in_array($charset, sq_mb_list_encodings()))
+===== */
+//FIXME: is there any reason why this cannot be a global array used by all string \
wrapper functions? +   if (in_array($charset, sq_mb_list_encodings()))
+      return mb_substr($string, $start, $length, $charset);
+
+
+   // else use normal substr()
+   //
+   return substr($string, $start, $length);
+
+}
+
+/**
+  * This is a replacement for PHP's substr_replace() that is
+  * multibyte-aware.
+  *
+  * @param string $string      The string to operate upon
+  * @param string $replacement The string to be inserted
+  * @param int    $start       The offset at which to begin substring replacement
+  * @param int    $length      The number of characters after $start to remove
+  *                            NOTE that if you need to specify a charset but
+  *                            want to achieve normal substr_replace() behavior
+  *                            where $length is not specified, use NULL (OPTIONAL;
+  *                            default from $start to end of string)
+  * @param string $charset     The charset of the given string.  A value of NULL
+  *                            here will force the use of PHP's standard substr().
+  *                            (OPTIONAL; default is "auto", which indicates that
+  *                            the user's current charset should be used).
+  *
+  * @return string The manipulated string
+  *
+  * Of course, you can use more advanced (e.g., negative) values
+  * for $start and $length as needed - see the PHP manual for more
+  * information:  http://www.php.net/manual/function.substr-replace.php
+  *
+  */
+function sq_substr_replace($string, $replacement, $start, $length=NULL,
+                           $charset='auto')
+{
+
+   // NULL charset?  Just use substr_replace()
+   //
+   if (is_null($charset))
+      return is_null($length) ? substr_replace($string, $replacement, $start)
+                              : substr_replace($string, $replacement, $start, \
$length); +
+
+   // use current character set?
+   //
+   if ($charset == 'auto')
+   {
+//FIXME: is there any reason why this cannot be a global flag used by all string \
wrapper functions? +      static $auto_charset;
+      if (!isset($auto_charset))
+      {
+         global $default_charset, $squirrelmail_language;
+         set_my_charset();
+         $auto_charset = $default_charset;
+         if ($squirrelmail_language == 'ja_JP') $auto_charset = 'euc-jp';
+      }
+      $charset = $auto_charset;
+   }
+
+
+   // standardize character set name
+   //
+   $charset = strtolower($charset);
+
+
+/* ===== FIXME: this list is not used in 1.5.x, but if we need it, unless this \
differs between all our string function wrappers, we should store this info in the \
session +   // only use mbstring with the following character sets
+   //
+   $sq_substr_replace_mb_charsets = array(
+      'utf-8',
+      'big5',
+      'gb2312',
+      'gb18030',
+      'euc-jp',
+      'euc-cn',
+      'euc-tw',
+      'euc-kr'
+   );
+
+
+   // now we can use our own implementation using
+   // mb_substr() and mb_strlen() if needed
+   //
+   if (in_array($charset, $sq_substr_replace_mb_charsets)
+    && in_array($charset, sq_mb_list_encodings()))
+===== */
+//FIXME: is there any reason why this cannot be a global array used by all string \
wrapper functions? +   if (in_array($charset, sq_mb_list_encodings()))
+   {
+
+      $string_length = mb_strlen($string, $charset);
+
+      if ($start < 0)
+         $start = max(0, $string_length + $start);
+
+      else if ($start > $string_length)
+         $start = $string_length;
+
+      if ($length < 0)
+         $length = max(0, $string_length - $start + $length);
+
+      else if (is_null($length) || $length > $string_length)
+         $length = $string_length;
+
+      if ($start + $length > $string_length)
+         $length = $string_length - $start;
+
+      return mb_substr($string, 0, $start, $charset)
+           . $replacement
+           . mb_substr($string,
+                       $start + $length,
+                       $string_length, // FIXME: I can't see why this is needed:  - \
$start - $length, +                       $charset);
+
+   }
+
+
+   // else use normal substr_replace()
+   //
+   return is_null($length) ? substr_replace($string, $replacement, $start)
+                           : substr_replace($string, $replacement, $start, $length);
+
+}
+
+/**
  * Replacement of mb_list_encodings function
  *
  * This function provides replacement for function that is available only


This was sent by the SourceForge.net collaborative development platform, the world's \
largest Open Source development site.

------------------------------------------------------------------------------
The Planet: dedicated and managed hosting, cloud storage, colocation
Stay online with enterprise data centers and the best network in the business
Choose flexible plans and management services without long-term contracts
Personal 24x7 support from experience hosting pros just a phone call away.
http://p.sf.net/sfu/theplanet-com
-----
squirrelmail-cvs mailing list
List address: squirrelmail-cvs@lists.sourceforge.net
List info (subscribe/unsubscribe/change options): \
                https://lists.sourceforge.net/lists/listinfo/squirrelmail-cvs
Repository: http://squirrelmail.org/svn


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic