[prev in list] [next in list] [prev in thread] [next in thread] 

List:       mono-patches
Subject:    [Mono-patches] [mono/monodevelop] 698fdcfb: [TextEditor] Added support for simplified chinese encodi
From:       Mike_Krüger_(mkrueger () xamarin ! com) <mono-patches () lists ! ximian ! c
Date:       2013-10-31 4:47:10
Message-ID: 000001420cd62526-08291718-396e-4165-bef9-ea39dcd5fb13-000000 () email ! amazonses ! com
[Download RAW message or body]


   Branch: refs/heads/master
     Home: https://github.com/mono/monodevelop
  Compare: https://github.com/mono/monodevelop/compare/614c78b40b8e...698fdcfb5ee8

   Commit: 698fdcfb5ee87c74817fdd0f21413f8cccbb9368
   Author: Mike Krüger <mkrueger@xamarin.com> (mkrueger)
     Date: 2013-10-31 04:46:04 GMT
      URL: https://github.com/mono/monodevelop/commit/698fdcfb5ee87c74817fdd0f21413f8cccbb9368

[TextEditor] Added support for simplified chinese encoding detection.

Changed paths:
  M main/src/core/Mono.Texteditor/Mono.TextEditor.Utils/TextFileUtility.cs
  M main/src/core/MonoDevelop.TextEditor.Tests/Mono.TextEditor.Tests/TextFileReaderTests.cs

Modified: main/src/core/Mono.Texteditor/Mono.TextEditor.Utils/TextFileUtility.cs
===================================================================
@@ -61,6 +61,7 @@ static TextFileUtility ()
 			// Encoding verifiers
 			var verifierList = new List<Verifier> () {
 				new Utf8Verifier (),
+				new GB18030CodePageVerifier (),
 				new WindowsCodePageVerifier (),
 				new UnicodeVerifier (),
 				new BigEndianUnicodeVerifier (),
@@ -757,6 +758,88 @@ protected override void Init ()
 				}
 			}
 		}
+
+		/// <summary>
+		/// Try to detect chinese encoding.
+		/// </summary>
+		class GB18030CodePageVerifier : Verifier
+		{
+			const byte Valid  = 1;
+			const byte Second = 2;
+			const byte Third  = 3;
+			const byte Fourth  = 4;
+			const byte NotValid  = 5;
+
+			const byte LAST = 6;
+			static byte[][] table;
+			static Encoding EncodingWindows;
+
+			public override byte InitalState { get { return NotValid; } }
+
+			public override Encoding Encoding { get { return EncodingWindows; } }
+
+			public override byte[][] StateTable { get { return table; } }
+
+			public override bool IsEncodingValid (byte state)
+			{
+				return state == Valid; 
+			}
+
+			int WindowsCodePage {
+				get {
+					return 54936;
+				}
+			}
+
+			public override bool IsSupported {
+				get {
+					try {
+						return Encoding.GetEncoding (WindowsCodePage) != null;
+					} catch (Exception) {
+						return false;
+					}
+				}
+			}
+
+			protected override void Init ()
+			{
+				EncodingWindows = Encoding.GetEncoding (WindowsCodePage);
+				table = new byte[LAST][];
+				table [0] = errorTable;
+				for (int i = 1; i < LAST; i++)
+					table [i] = new byte[(int)byte.MaxValue + 1];
+
+				for (int i = 0x00; i <= 0x80; i++)
+					table [Valid] [i] = Valid;
+				for (int i = 0x81; i <= 0xFE; i++)
+					table [Valid] [i] = Second;
+				table [Valid] [0xFF] = Error;
+
+				// need to encounter a multi byte sequence first.
+				for (int i = 0x00; i <= 0x80; i++)
+					table [NotValid] [i] = NotValid;
+				for (int i = 0x81; i <= 0xFE; i++)
+					table [NotValid] [i] = Second;
+				table [NotValid] [0xFF] = Error;
+
+				for (int i = 0x00; i <= 0xFF; i++)
+					table [Second] [i] = Error;
+				for (int i = 0x40; i <= 0xFE; i++)
+					table [Second] [i] = Valid;
+				for (int i = 0x30; i <= 0x39; i++)
+					table [Second] [i] = Third;
+
+				for (int i = 0x00; i <= 0xFF; i++)
+					table [Third] [i] = Error;
+				for (int i = 0x81; i <= 0xFE; i++)
+					table [Third] [i] = Fourth;
+
+				for (int i = 0x00; i <= 0xFF; i++)
+					table [Fourth] [i] = Error;
+				for (int i = 0x30; i <= 0x39; i++)
+					table [Fourth] [i] = Valid;
+			}
+		}
 		#endregion
 	}
 }

Modified: main/src/core/MonoDevelop.TextEditor.Tests/Mono.TextEditor.Tests/TextFileReaderTests.cs
===================================================================
@@ -135,6 +135,14 @@ public void TestBug4564 ()
 			byte[] input = new byte[] { (byte)'a',(byte)'a', 0xEF, 0xBB, 0xBF };
 			Assert.AreEqual ("aa\uFEFF", TextFileUtility.GetText (input));
 		}
+
+		[Test()]
+		public void TestGB18030 ()
+		{
+			var src = "南北西东";
+			byte[] input = Encoding.GetEncoding (54936).GetBytes (src);
+			Assert.AreEqual (src, TextFileUtility.GetText (input));
+		}
 	}
 }
 
_______________________________________________
Mono-patches maillist  -  Mono-patches@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-patches

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic