'[Mono-patches] r45251 -'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       mono-patches
Subject:    [Mono-patches] r45251 -
From:       "Atsushi Enomoto (ginga () kit ! hi-ho ! ne ! jp)" <atsushi () mono-cvs ! ximian ! com>
Date:       2005-05-31 11:06:13
Message-ID: 20050531110613.5E53394765 () mono-cvs ! ximian ! com
[Download RAW message or body]

Author: atsushi
Date: 2005-05-31 07:06:13 -0400 (Tue, 31 May 2005)
New Revision: 45251

Modified:
   branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
   branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/Collation-notes.txt
   branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
 Log:
2005-05-31  Atsushi Enomoto  <atsushi@ximian.com>

	* Collation-notes.txt : wrong comment cleanup and spelling fixes.
	* create-mscompat-collation-table.cs : added diacritic support for
	  Latin letters (as long as covered in primary weight).



Modified: branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
===================================================================

--- branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog	2005-05-31 \
                10:59:45 UTC (rev 45250)
+++ branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog	2005-05-31 \
11:06:13 UTC (rev 45251) @@ -1,5 +1,11 @@
 2005-05-31  Atsushi Enomoto  <atsushi@ximian.com>
 
+	* Collation-notes.txt : wrong comment cleanup and spelling fixes.
+	* create-mscompat-collation-table.cs : added diacritic support for
+	  Latin letters (as long as covered in primary weight).
+
+2005-05-31  Atsushi Enomoto  <atsushi@ximian.com>
+
 	* Makefile : minor fixes. Added warning lines to generated sources.
 
 2005-05-31  Atsushi Enomoto  <atsushi@ximian.com>

Modified: branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/Collation-notes.txt
 ===================================================================
--- branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/Collation-notes.txt	2005-05-31 \
                10:59:45 UTC (rev 45250)
+++ branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/Collation-notes.txt	2005-05-31 \
11:06:13 UTC (rev 45251) @@ -213,15 +213,6 @@
 
 **** level 2
 
-	<del>
-	For Japanese voice marks, it just sums the count up.
-
-	There also seems special rule for Thai (E01-E4F) e.g. E47 works like
-	Japanese voice marks.
-
-	For other letters, there will be a table.
-	</del>
-
 	It looks like all level 2 keys are just accumulated, however without
 	considering overflow. It sometimes makes sense (e.g. diaeresis and
 	acute) but it causes many conflicts (e.g. "A\u0308\u0301" and "\u1EA6"
@@ -251,11 +242,9 @@
 		- 4 Alef/Bet/Gimel/Dalet (2135-2138)
 		- 8 Final form in presentation form B in FE82-FEF2
 		- 18 Medial form in presentation form B in FE8C-FEF4
-
-		Grep "ISOLATED", "FINAL" or "MEDIAL" against UnicodeData.txt
-		(and filter by codepoints).
-		or alternatively, see DerivedDecompositionType.txt.
-
+		     Grep "ISOLATED", "FINAL" or "MEDIAL" on UnicodeData.txt
+		     (and filter by codepoints).
+		     or alternatively, see DerivedDecompositionType.txt.
 		- 22 6A9 (TODO: what is it?)
 		- 28 6AA (TODO: what is it?)
 
@@ -423,7 +412,7 @@
 		-10: dot above
 		-11: middle dot
 		-12: circumflex
-		-13: diaresis
+		-13: diaeresis
 		-14: caron
 		     Note that 1C4-1C6 are covered but they are also expanded.
 		-15: breve (cyrillic are also covered? at least 4C1/4C2 are.)
@@ -439,19 +428,19 @@
 		     (i.e. they not one-to-one mapping. Neither that every
 		     "stroke" are mapped to 1E, nor not every 1E are mapped to
 		     "stroke".)
-		-1F: diaresis and acute | with circumflex and grave | l slash
+		-1F: diaeresis and acute | with circumflex and grave | l slash
 			beware "symbol slash"
-		-20: diaresis and grave | 19B,19F
+		-20: diaeresis and grave | 19B,19F
 		-21: breve and acute | D8,F8
 		-22: caron and dot above | breve and grave
 		-23: macron and acute
 		-24: macron and grave
-		-25: diaresis and caron | dot above and macron | tilde and acute
+		-25: diaeresis and caron | dot above and macron | tilde and acute
 		-26: ring above and acute
-		-28: diaresis and macron | cedilla and acute |
-		     macron and diaresis
+		-28: diaeresis and macron | cedilla and acute |
+		     macron and diaeresis
 		-29: circumflex and tilde
-		-2A: tilde and diaresis
+		-2A: tilde and diaeresis
 		-2B: stroke and acute
 		-2C: breve and tilde
 		-2F: cedilla and breve
@@ -466,7 +455,7 @@
 		-55: line below | circumflex and hook above
 		-57: palatal hook (actually only 1AB)
 		-58: dot below except for cp{1EA0,1EA1}
-		-59: "retroflex" (without "WITH") | diaresis below | 1EA0,1EA1
+		-59: "retroflex" (without "WITH") | diaeresis below | 1EA0,1EA1
 		-5A: ring below | 1E76,1E77
 		-60: circumflex below except for cp{1E76,1E77} | horn and acute
 		-61: breve below | horn and grave

Modified: branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
 ===================================================================
--- branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs	2005-05-31 \
                10:59:45 UTC (rev 45250)
+++ branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs	2005-05-31 \
11:06:13 UTC (rev 45251) @@ -84,16 +84,77 @@
 
 		bool [] isSmallCapital = new bool [char.MaxValue + 1];
 		bool [] isUppercase = new bool [char.MaxValue + 1];
+
 		byte [] decompType = new byte [char.MaxValue + 1];
 		int [] decompIndex = new int [char.MaxValue + 1];
 		int [] decompLength = new int [char.MaxValue + 1];
 		int [] decompValues;
 		decimal [] decimalValue = new decimal [char.MaxValue + 1];
 
+		byte [] diacritical = new byte [char.MaxValue + 1];
+
+		string [] diacritics = new string [] {
+			" ACUTE;", " GRAVE;", " DOT ABOVE;", " MIDDLE DOT;",
+			" CIRCUMFLEX;", " DIAERESIS;", " CARON;", " BREVE;",
+			" DIALYTIKA AND TONOS;", " MACRON;", " TILDE;", " RING ABOVE;",
+			" OGONEK;", " CEDILLA;",
+			" DOUBLE ACUTE;", " ACUTE AND DOT ABOVE;",
+			" STROKE;", " CIRCUMFLEX AND ACUTE;",
+			" DIAERESIS AND ACUTE;", "WITH CIRCUMFLEX AND GRAVE;", " L SLASH;",
+			" DIAERESIS AND GRAVE;",
+			" BREVE AND ACUTE;",
+			" CARON AND DOT ABOVE;", " BREVE AND GRAVE;",
+			" MACRON AND ACUTE;",
+			" MACRON AND GRAVE;",
+			" DIAERESIS AND CARON", " DOT ABOVE AND MACRON", " TILDE AND ACUTE",
+			" RING ABOVE AND ACUTE",
+			" DIAERESIS AND MACRON", " CEDILLA AND ACUTE", " MACRON AND DIAERESIS",
+			" CIRCUMFLEX AND TILDE",
+			" TILDE AND DIAERESIS",
+			" STROKE AND ACUTE",
+			" BREVE AND TILDE",
+			" CEDILLA AND BREVE",
+			" OGONEK AND MACRON",
+			" HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
+			" DOUBLE GRAVE;",
+			" INVERTED BREVE",
+			" PRECEDED BY APOSTROPHE",
+			" HORN;",
+			" LINE BELOW;", " CIRCUMFLEX AND HOOK ABOVE",
+			" PALATAL HOOK",
+			" DOT BELOW;",
+			" RETROFLEX;", "DIAERESIS BELOW",
+			" RING BELOW",
+			" CIRCUMFLEX BELOW", "HORN AND ACUTE",
+			" BREVE BELOW;", " HORN AND GRAVE",
+			" TILDE BELOW",
+			" DOT BELOW AND DOT ABOVE",
+			" RIGHT HALF RING", " HORN AND TILDE",
+			" CIRCUMFLEX AND DOT BELOW",
+			" BREVE AND DOT BELOW",
+			" DOT BELOW AND MACRON",
+			" HORN AND HOOK ABOVE",
+			" HORN AND DOT",
+			};
+		byte [] diacriticWeights = new byte [] {
+			0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
+			0x17, 0x19, 0x1A, 0x1B, 0x1C,
+			0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
+			0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
+			0x25, 0x25, 0x25, 0x26, 0x28, 0x28, 0x28,
+			0x29, 0x2A, 0x2B, 0x2C, 0x2F, 0x30,
+			0x43, 0x43, 0x43, 0x44, 0x46, 0x48,
+			0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x5A,
+			0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 
+			0x69, 0x69, 0x6A, 0x6D, 0x6E,
+			0x95, 0xAA
+			};
+
 		char [] orderedCyrillic;
 		char [] orderedGurmukhi;
 		char [] orderedGujarati;
 		char [] orderedGeorgian;
+
 		static readonly char [] orderedTamilConsonants = new char [] {
 			// based on traditional Tamil consonants, except for
 			// Grantha (where Microsoft breaks traditionalism).
@@ -154,6 +215,8 @@
 			Result.WriteLine ("static int [] level2 = new int [] {");
 			for (int i = 0; i < map.Length; i++) {
 				int value = map [i].Level2;
+				if (map [i].Category == 0xE)
+					value |= diacritical [i];
 				if (value == 0)
 					Result.Write ("0,");
 				else
@@ -250,6 +313,10 @@
 			if (s.IndexOf ("SMALL CAPITAL") > 0)
 				isSmallCapital [cp] = true;
 
+			for (int d = 0; d < diacritics.Length; d++)
+				if (s.IndexOf (diacritics [d]) > 0)
+					diacritical [cp] |= diacriticWeights [d];
+
 			// normalizationType
 			string decomp = values [4];
 			idx = decomp.IndexOf ('<');
@@ -951,10 +1018,10 @@
 
 	internal struct CharMapEntry
 	{
-		public readonly byte Category;
-		public readonly byte Level1;
-		public readonly byte Level2; // It is always single byte.
-		public readonly bool Defined;
+		public byte Category;
+		public byte Level1;
+		public byte Level2; // It is always single byte.
+		public bool Defined;
 
 		public CharMapEntry (byte category, byte level1, byte level2)
 		{

_______________________________________________
Mono-patches maillist  -  Mono-patches@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-patches


[prev in list] [next in list] [prev in thread] [next in thread]