[prev in list] [next in list] [prev in thread] [next in thread]
List: mono-patches
Subject: [Mono-patches] r67173 - in trunk/mono/eglib: . src test
From: "Atsushi Enomoto (ginga () kit ! hi-ho ! ne ! jp)"
Date: 2006-10-31 4:01:04
Message-ID: 20061031040104.7023C9472C () mono-cvs ! ximian ! com
[Download RAW message or body]
Author: atsushi
Date: 2006-10-30 23:01:04 -0500 (Mon, 30 Oct 2006)
New Revision: 67173
Modified:
trunk/mono/eglib/ChangeLog
trunk/mono/eglib/src/gutf8.c
trunk/mono/eglib/test/utf8.c
Log:
2006-10-30 Atsushi Enomoto <atsushi@ximian.com>
* src/gutf8.c : several fixes:
- fixed incorrect mb_size clear and mb_remain computation.
- initialize items_written to 0 for error case.
- in utf8_to_utf16_len differentiate error messages
completely.
- in g_utf8_to_utf16 use guchar instead of gchar.
* test/utf8.c : added test case string in test_utf8_seq() to
both test_utf8_to_utf16() and test_utf16_to_utf8().
Modified: trunk/mono/eglib/ChangeLog
===================================================================
--- trunk/mono/eglib/ChangeLog 2006-10-31 04:00:03 UTC (rev 67172)
+++ trunk/mono/eglib/ChangeLog 2006-10-31 04:01:04 UTC (rev 67173)
@@ -1,3 +1,13 @@
+2006-10-30 Atsushi Enomoto <atsushi@ximian.com>
+
+ * src/gutf8.c : several fixes:
+ - fixed incorrect mb_size clear and mb_remain computation.
+ - initialize items_written to 0 for error case.
+ - in utf8_to_utf16_len differentiate error messages completely.
+ - in g_utf8_to_utf16 use guchar instead of gchar.
+ * test/utf8.c : added test case string in test_utf8_seq() to
+ both test_utf8_to_utf16() and test_utf16_to_utf8().
+
2006-10-21 Miguel de Icaza <miguel@novell.com>
* src/gunicode.c (g_filename_from_utf8): Use g_strlcpy here.
Modified: trunk/mono/eglib/src/gutf8.c
===================================================================
--- trunk/mono/eglib/src/gutf8.c 2006-10-31 04:00:03 UTC (rev 67172)
+++ trunk/mono/eglib/src/gutf8.c 2006-10-31 04:01:04 UTC (rev 67173)
@@ -29,7 +29,7 @@
the conversion core below simply resets erroreous bits */
glong utf16_len;
gunichar2 *ret;
- gchar ch, mb_size, mb_remain;
+ guchar ch, mb_size, mb_remain;
guint32 codepoint;
glong in_pos, out_pos;
@@ -42,6 +42,8 @@
if (error)
*error = NULL;
+ if (items_written)
+ *items_written = 0;
utf16_len = utf8_to_utf16_len (str, len, items_read, error);
if (error)
if (*error)
@@ -54,35 +56,37 @@
for (in_pos = 0; len < 0 ? str [in_pos] : in_pos < len; in_pos++) {
ch = (guchar) str [in_pos];
if (mb_size == 0) {
- if (0 < ch)
+ if (ch < 0x80)
ret [out_pos++] = ch;
else if ((ch & 0xE0) == 0xC0) {
codepoint = ch & 0x1F;
- mb_remain = mb_size = 2;
+ mb_size = 2;
} else if ((ch & 0xF0) == 0xE0) {
codepoint = ch & 0x0F;
- mb_remain = mb_size = 3;
+ mb_size = 3;
} else if ((ch & 0xF8) == 0xF0) {
codepoint = ch & 7;
- mb_remain = mb_size = 4;
+ mb_size = 4;
} else if ((ch & 0xFC) == 0xF8) {
codepoint = ch & 3;
- mb_remain = mb_size = 5;
+ mb_size = 5;
} else if ((ch & 0xFE) == 0xFC) {
codepoint = ch & 3;
- mb_remain = mb_size = 6;
+ mb_size = 6;
} else {
/* invalid utf-8 sequence */
codepoint = 0;
mb_remain = mb_size = 0;
}
+ if (mb_size > 1)
+ mb_remain = mb_size - 1;
} else {
if ((ch & 0xC0) == 0x80) {
codepoint = (codepoint << 6) | (ch & 0x3F);
if (--mb_remain == 0) {
/* multi byte character is fully consumed now. */
if (codepoint < 0x10000) {
- ret [out_pos++] = codepoint;
+ ret [out_pos++] = codepoint % 0x10000;
} else if (codepoint < 0x110000) {
/* surrogate pair */
codepoint -= 0x10000;
@@ -91,8 +95,9 @@
} else {
/* invalid utf-8 sequence (excess) */
codepoint = 0;
- mb_remain = mb_size = 0;
+ mb_remain = 0;
}
+ mb_size = 0;
}
} else {
/* invalid utf-8 sequence */
@@ -130,23 +135,23 @@
ret++;
else if ((ch & 0xE0) == 0xC0) {
codepoint = ch & 0x1F;
- mb_remain = mb_size = 2;
+ mb_size = 2;
} else if ((ch & 0xF0) == 0xE0) {
codepoint = ch & 0x0F;
- mb_remain = mb_size = 3;
+ mb_size = 3;
} else if ((ch & 0xF8) == 0xF0) {
codepoint = ch & 7;
- mb_remain = mb_size = 4;
+ mb_size = 4;
} else if ((ch & 0xFC) == 0xF8) {
codepoint = ch & 3;
- mb_remain = mb_size = 5;
+ mb_size = 5;
} else if ((ch & 0xFE) == 0xFC) {
codepoint = ch & 3;
- mb_remain = mb_size = 6;
+ mb_size = 6;
} else {
/* invalid utf-8 sequence */
if (error) {
- g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid \
utf-8 sequence at %d", in_pos); + g_set_error (error, G_CONVERT_ERROR, \
G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d (illegal first \
byte)", in_pos); if (items_read)
*items_read = in_pos;
return -1;
@@ -155,6 +160,8 @@
mb_remain = mb_size = 0;
}
}
+ if (mb_size > 1)
+ mb_remain = mb_size - 1;
} else {
if ((ch & 0xC0) == 0x80) {
codepoint = (codepoint << 6) | (ch & 0x3F);
@@ -187,7 +194,7 @@
return -1;
} else {
codepoint = 0;
- mb_remain = mb_size = 0;
+ mb_remain = 0;
overlong = FALSE;
}
}
@@ -205,14 +212,15 @@
return -1;
} else {
codepoint = 0;
- mb_remain = mb_size = 0;
+ mb_remain = 0;
}
}
+ mb_size = 0;
}
} else {
/* invalid utf-8 sequence */
if (error) {
- g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid \
utf-8 sequence at %d", in_pos); + g_set_error (error, G_CONVERT_ERROR, \
G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d (illegal following \
bytes)", in_pos); if (items_read)
*items_read = in_pos;
return -1;
@@ -246,6 +254,8 @@
out_pos = 0;
surrogate = FALSE;
+ if (items_written)
+ *items_written = 0;
utf8_len = utf16_to_utf8_len (str, len, items_read, error);
if (error)
if (*error)
Modified: trunk/mono/eglib/test/utf8.c
===================================================================
--- trunk/mono/eglib/test/utf8.c 2006-10-31 04:00:03 UTC (rev 67172)
+++ trunk/mono/eglib/test/utf8.c 2006-10-31 04:01:04 UTC (rev 67173)
@@ -82,11 +82,10 @@
RESULT
test_utf16_to_utf8 ()
{
- const gchar *src0 = "", *src1 = "ABCDE";
- gunichar2 str0 [1], str1 [6];
+ const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
+ gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
RESULT result;
- str0 [0] = 0;
gchar_to_gunichar2 (str1, src1);
/* empty string */
@@ -97,6 +96,9 @@
result = compare_utf16_to_utf8 (src1, str1, 5, 5);
if (result != OK)
return result;
+ result = compare_utf16_to_utf8 (src2, str2, 2, 4);
+ if (result != OK)
+ return result;
return OK;
}
@@ -174,28 +176,35 @@
RESULT
test_utf8_seq ()
{
- const gchar *src = "\345\271\264\47";
+ const gchar *src = "\xE5\xB9\xB4\x27";
glong in_read, out_read;
//gunichar2 expected [6];
GError *error = NULL;
+ gunichar2 *dst;
printf ("got: %s\n", src);
- g_utf8_to_utf16 (src, strlen (src), &in_read, &out_read, &error);
+ dst = g_utf8_to_utf16 (src, strlen (src), &in_read, &out_read, &error);
if (error != NULL){
return error->message;
}
-
+
+ if (in_read != 4) {
+ return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
+ }
+ if (out_read != 2) {
+ return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
+ }
+
return OK;
}
RESULT
test_utf8_to_utf16 ()
{
- const gchar *src0 = "", *src1 = "ABCDE";
- gunichar2 str0 [1], str1 [6];
+ const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
+ gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
RESULT result;
- str0 [0] = 0;
gchar_to_gunichar2 (str1, src1);
/* empty string */
@@ -206,6 +215,9 @@
result = compare_utf8_to_utf16 (str1, src1, 5, 5);
if (result != OK)
return result;
+ result = compare_utf8_to_utf16 (str2, src2, 4, 2);
+ if (result != OK)
+ return result;
return OK;
}
_______________________________________________
Mono-patches maillist - Mono-patches@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-patches
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic