'[Mono-patches] r67173 - in trunk/mono/eglib: . src test'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       mono-patches
Subject:    [Mono-patches] r67173 - in trunk/mono/eglib: . src test
From:       "Atsushi Enomoto (ginga () kit ! hi-ho ! ne ! jp)"
Date:       2006-10-31 4:01:04
Message-ID: 20061031040104.7023C9472C () mono-cvs ! ximian ! com
[Download RAW message or body]

Author: atsushi
Date: 2006-10-30 23:01:04 -0500 (Mon, 30 Oct 2006)
New Revision: 67173

Modified:
   trunk/mono/eglib/ChangeLog
   trunk/mono/eglib/src/gutf8.c
   trunk/mono/eglib/test/utf8.c
Log:
2006-10-30  Atsushi Enomoto  <atsushi@ximian.com>

        * src/gutf8.c : several fixes:
          - fixed incorrect mb_size clear and mb_remain computation.
          - initialize items_written to 0 for error case.
          - in utf8_to_utf16_len differentiate error messages
            completely.
          - in g_utf8_to_utf16 use guchar instead of gchar.
        * test/utf8.c : added test case string in test_utf8_seq() to
          both test_utf8_to_utf16() and test_utf16_to_utf8().



Modified: trunk/mono/eglib/ChangeLog
===================================================================

--- trunk/mono/eglib/ChangeLog	2006-10-31 04:00:03 UTC (rev 67172)
+++ trunk/mono/eglib/ChangeLog	2006-10-31 04:01:04 UTC (rev 67173)
@@ -1,3 +1,13 @@
+2006-10-30  Atsushi Enomoto  <atsushi@ximian.com>
+
+	* src/gutf8.c : several fixes:
+	  - fixed incorrect mb_size clear and mb_remain computation.
+	  - initialize items_written to 0 for error case.
+	  - in utf8_to_utf16_len differentiate error messages completely.
+	  - in g_utf8_to_utf16 use guchar instead of gchar.
+	* test/utf8.c : added test case string in test_utf8_seq() to
+	  both test_utf8_to_utf16() and test_utf16_to_utf8().
+
 2006-10-21  Miguel de Icaza  <miguel@novell.com>
 
 	* src/gunicode.c (g_filename_from_utf8): Use g_strlcpy here. 

Modified: trunk/mono/eglib/src/gutf8.c
===================================================================
--- trunk/mono/eglib/src/gutf8.c	2006-10-31 04:00:03 UTC (rev 67172)
+++ trunk/mono/eglib/src/gutf8.c	2006-10-31 04:01:04 UTC (rev 67173)
@@ -29,7 +29,7 @@
 	   the conversion core below simply resets erroreous bits */
 	glong utf16_len;
 	gunichar2 *ret;
-	gchar ch, mb_size, mb_remain;
+	guchar ch, mb_size, mb_remain;
 	guint32 codepoint;
 	glong in_pos, out_pos;
 
@@ -42,6 +42,8 @@
 	if (error)
 		*error = NULL;
 
+	if (items_written)
+		*items_written = 0;
 	utf16_len = utf8_to_utf16_len (str, len, items_read, error);
 	if (error)
 		if (*error)
@@ -54,35 +56,37 @@
 	for (in_pos = 0; len < 0 ? str [in_pos] : in_pos < len; in_pos++) {
 		ch = (guchar) str [in_pos];
 		if (mb_size == 0) {
-			if (0 < ch)
+			if (ch < 0x80)
 				ret [out_pos++] = ch;
 			else if ((ch & 0xE0) == 0xC0) {
 				codepoint = ch & 0x1F;
-				mb_remain = mb_size = 2;
+				mb_size = 2;
 			} else if ((ch & 0xF0) == 0xE0) {
 				codepoint = ch & 0x0F;
-				mb_remain = mb_size = 3;
+				mb_size = 3;
 			} else if ((ch & 0xF8) == 0xF0) {
 				codepoint = ch & 7;
-				mb_remain = mb_size = 4;
+				mb_size = 4;
 			} else if ((ch & 0xFC) == 0xF8) {
 				codepoint = ch & 3;
-				mb_remain = mb_size = 5;
+				mb_size = 5;
 			} else if ((ch & 0xFE) == 0xFC) {
 				codepoint = ch & 3;
-				mb_remain = mb_size = 6;
+				mb_size = 6;
 			} else {
 				/* invalid utf-8 sequence */
 				codepoint = 0;
 				mb_remain = mb_size = 0;
 			}
+			if (mb_size > 1)
+				mb_remain = mb_size - 1;
 		} else {
 			if ((ch & 0xC0) == 0x80) {
 				codepoint = (codepoint << 6) | (ch & 0x3F);
 				if (--mb_remain == 0) {
 					/* multi byte character is fully consumed now. */
 					if (codepoint < 0x10000) {
-						ret [out_pos++] = codepoint;
+						ret [out_pos++] = codepoint % 0x10000;
 					} else if (codepoint < 0x110000) {
 						/* surrogate pair */
 						codepoint -= 0x10000;
@@ -91,8 +95,9 @@
 					} else {
 						/* invalid utf-8 sequence (excess) */
 						codepoint = 0;
-						mb_remain = mb_size = 0;
+						mb_remain = 0;
 					}
+					mb_size = 0;
 				}
 			} else {
 				/* invalid utf-8 sequence */
@@ -130,23 +135,23 @@
 				ret++;
 			else if ((ch & 0xE0) == 0xC0) {
 				codepoint = ch & 0x1F;
-				mb_remain = mb_size = 2;
+				mb_size = 2;
 			} else if ((ch & 0xF0) == 0xE0) {
 				codepoint = ch & 0x0F;
-				mb_remain = mb_size = 3;
+				mb_size = 3;
 			} else if ((ch & 0xF8) == 0xF0) {
 				codepoint = ch & 7;
-				mb_remain = mb_size = 4;
+				mb_size = 4;
 			} else if ((ch & 0xFC) == 0xF8) {
 				codepoint = ch & 3;
-				mb_remain = mb_size = 5;
+				mb_size = 5;
 			} else if ((ch & 0xFE) == 0xFC) {
 				codepoint = ch & 3;
-				mb_remain = mb_size = 6;
+				mb_size = 6;
 			} else {
 				/* invalid utf-8 sequence */
 				if (error) {
-					g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid \
utf-8 sequence at %d", in_pos); +					g_set_error (error, G_CONVERT_ERROR, \
G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d (illegal first \
byte)", in_pos);  if (items_read)
 						*items_read = in_pos;
 					return -1;
@@ -155,6 +160,8 @@
 					mb_remain = mb_size = 0;
 				}
 			}
+			if (mb_size > 1)
+				mb_remain = mb_size - 1;
 		} else {
 			if ((ch & 0xC0) == 0x80) {
 				codepoint = (codepoint << 6) | (ch & 0x3F);
@@ -187,7 +194,7 @@
 								return -1;
 							} else {
 								codepoint = 0;
-								mb_remain = mb_size = 0;
+								mb_remain = 0;
 								overlong = FALSE;
 							}
 						}
@@ -205,14 +212,15 @@
 							return -1;
 						} else {
 							codepoint = 0;
-							mb_remain = mb_size = 0;
+							mb_remain = 0;
 						}
 					}
+					mb_size = 0;
 				}
 			} else {
 				/* invalid utf-8 sequence */
 				if (error) {
-					g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid \
utf-8 sequence at %d", in_pos); +					g_set_error (error, G_CONVERT_ERROR, \
G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d (illegal following \
bytes)", in_pos);  if (items_read)
 						*items_read = in_pos;
 					return -1;
@@ -246,6 +254,8 @@
 	out_pos = 0;
 	surrogate = FALSE;
 
+	if (items_written)
+		*items_written = 0;
 	utf8_len = utf16_to_utf8_len (str, len, items_read, error);
 	if (error)
 		if (*error)

Modified: trunk/mono/eglib/test/utf8.c
===================================================================
--- trunk/mono/eglib/test/utf8.c	2006-10-31 04:00:03 UTC (rev 67172)
+++ trunk/mono/eglib/test/utf8.c	2006-10-31 04:01:04 UTC (rev 67173)
@@ -82,11 +82,10 @@
 RESULT
 test_utf16_to_utf8 ()
 {
-	const gchar *src0 = "", *src1 = "ABCDE";
-	gunichar2 str0 [1], str1 [6];
+	const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
+	gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
 	RESULT result;
 
-	str0 [0] = 0;
 	gchar_to_gunichar2 (str1, src1);
 
 	/* empty string */
@@ -97,6 +96,9 @@
 	result = compare_utf16_to_utf8 (src1, str1, 5, 5);
 	if (result != OK)
 		return result;
+	result = compare_utf16_to_utf8 (src2, str2, 2, 4);
+	if (result != OK)
+		return result;
 
 	return OK;
 }
@@ -174,28 +176,35 @@
 RESULT
 test_utf8_seq ()
 {
-	const gchar *src = "\345\271\264\47";
+	const gchar *src = "\xE5\xB9\xB4\x27";
 	glong in_read, out_read;
 	//gunichar2 expected [6];
 	GError *error = NULL;
+	gunichar2 *dst;
 
 	printf ("got: %s\n", src);
-	g_utf8_to_utf16 (src, strlen (src), &in_read, &out_read, &error);
+	dst = g_utf8_to_utf16 (src, strlen (src), &in_read, &out_read, &error);
 	if (error != NULL){
 		return error->message;
 	}
-	
+
+	if (in_read != 4) {
+		return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
+	}
+	if (out_read != 2) {
+		return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
+	}
+
 	return OK;
 }
 
 RESULT
 test_utf8_to_utf16 ()
 {
-	const gchar *src0 = "", *src1 = "ABCDE";
-	gunichar2 str0 [1], str1 [6];
+	const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
+	gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
 	RESULT result;
 
-	str0 [0] = 0;
 	gchar_to_gunichar2 (str1, src1);
 
 	/* empty string */
@@ -206,6 +215,9 @@
 	result = compare_utf8_to_utf16 (str1, src1, 5, 5);
 	if (result != OK)
 		return result;
+	result = compare_utf8_to_utf16 (str2, src2, 4, 2);
+	if (result != OK)
+		return result;
 
 	return OK;
 }

_______________________________________________
Mono-patches maillist  -  Mono-patches@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-patches


[prev in list] [next in list] [prev in thread] [next in thread]