[prev in list] [next in list] [prev in thread] [next in thread]
List: mysql-internals
Subject: Re: czech case sensitive collate for win1250 encoding
From: "Pavel Stehule" <pavel.stehule () hotmail ! com>
Date: 2005-12-28 12:49:28
Message-ID: BAY20-F676DE0705FA237BB16C86F9360 () phx ! gbl
[Download RAW message or body]
I am wrong, you are true. This patch adds case insensitive collate.
>Dear Pavel,
>
>Thank you very much for your efforts in making MySQL
>better and useful for more people!
>
>I'm sorry, can you clarify please, why did you add case
>sensitive collation? MySQL currently has cp1520_czech_cs,
>which is case sensitive. Is it wrong? Why do we need another
>case sensitive collation?
>
>MySQL is missing a case *insensitive* collation Czech for cp1250.
>I guess you meant to add a case insensitive collation, right?
>
>Unfortunately your patch doesn't look to do that.
>You added the "sort_order_win1250ch_ci" array, but
>in fact it is not even used by ORDER BY or GROUP BY or DISTINCT,
>or in indexes. It is only used in LIKE code in sql/item_cmpfunc.cc.
>
>
>What adding a case insensitive collation would need is:
>- a new NEXT_CMP_VALUE_CI for case insensitive comparisons,
> which should do only one pass, unlike two passes in NEXT_CMP_VALUE.
>
>- a new case insensitive counterparts using new NEXT_CMP_VALUE_CI
> for all these four functions:
>
> my_strnncoll_win1250ch
> my_strnncollsp_win1250ch
> my_strnxfrm_win1250ch
> my_like_range_win1250ch
>
>
>
>Maybe I'm not right and missing something.
>
>Can you please try the following queries with your new collation:
>
>CREATE TABLE t1
>(a varchar(64) character set cp1250 collate cp1250_czech_ci);
>INSERT INTO t1 VALUES ('H'),('h'),('I'),('i'),('CH'),('ch');
>SELECT DISTINCT a FROM t1;
>SELECT a, COUNT(*) FROM t1 GROUP BY a;
>SELECT a FROM t1 ORDER BY a, BINARY a;
>
>Thanks!
>
>
>Pavel Stehule wrote:
>>Hello
>>
>>My colleague and I prepared patch witch
>>a) add case sensitive collation for win1250 encoding
>>b) clean some mistakes in names,
>>
>>Please, can you add patch to distribution?
>>Thank You
>>
>>Pavel Stehule
>>
>>_________________________________________________________________
>>Najdete si svou lasku a nove pratele na Match.com. http://www.msn.cz/
>>
>>
>>------------------------------------------------------------------------
>>
>>diff -c -r mysql-5.0.16/config/ac-macros/character_sets.m4
>>mysql-5.0.16patch/config/ac-macros/character_sets.m4
>>*** mysql-5.0.16/config/ac-macros/character_sets.m4 2005-11-15
>>01:12:45.000000000 +0100
>>--- mysql-5.0.16patch/config/ac-macros/character_sets.m4 2005-12-27
>>14:45:10.000000000 +0100
>>***************
>>*** 225,231 ****
>> ;;
>> cp1250)
>> default_charset_default_collation="cp1250_general_ci"
>>! default_charset_collations="cp1250_general_ci cp1250_czech_cs
>>cp1250_bin"
>> ;;
>> cp1251)
>> default_charset_default_collation="cp1251_general_ci"
>>--- 225,231 ----
>> ;;
>> cp1250)
>> default_charset_default_collation="cp1250_general_ci"
>>! default_charset_collations="cp1250_general_ci cp1250_czech_cs
>>cp1250_czech_ci cp1250_bin"
>> ;;
>> cp1251)
>> default_charset_default_collation="cp1251_general_ci"
>>diff -c -r mysql-5.0.16/configure mysql-5.0.16patch/configure
>>*** mysql-5.0.16/configure 2005-11-15 01:14:33.000000000 +0100
>>--- mysql-5.0.16patch/configure 2005-12-27 14:46:31.000000000 +0100
>>***************
>>*** 2610,2616 ****
>> ;;
>> cp1250)
>> default_charset_default_collation="cp1250_general_ci"
>>! default_charset_collations="cp1250_general_ci cp1250_czech_cs
>>cp1250_bin"
>> ;;
>> cp1251)
>> default_charset_default_collation="cp1251_general_ci"
>>--- 2610,2616 ----
>> ;;
>> cp1250)
>> default_charset_default_collation="cp1250_general_ci"
>>! default_charset_collations="cp1250_general_ci cp1250_czech_cs
>>cp1250_czech_ci cp1250_bin"
>> ;;
>> cp1251)
>> default_charset_default_collation="cp1251_general_ci"
>>diff -c -r mysql-5.0.16/include/m_ctype.h
>>mysql-5.0.16patch/include/m_ctype.h
>>*** mysql-5.0.16/include/m_ctype.h 2005-11-15 01:12:34.000000000 +0100
>>--- mysql-5.0.16patch/include/m_ctype.h 2005-12-27 14:45:54.000000000
>>+0100
>>***************
>>*** 268,273 ****
>>--- 268,274 ----
>> extern CHARSET_INFO my_charset_ujis_bin;
>> extern CHARSET_INFO my_charset_utf8_general_ci;
>> extern CHARSET_INFO my_charset_utf8_bin;
>>+ extern CHARSET_INFO my_charset_cp1250_czech_cs;
>> extern CHARSET_INFO my_charset_cp1250_czech_ci;
>> /* declarations for simple charsets */
>>diff -c -r mysql-5.0.16/mysys/charset-def.c
>>mysql-5.0.16patch/mysys/charset-def.c
>>*** mysql-5.0.16/mysys/charset-def.c 2005-11-15 01:12:46.000000000 +0100
>>--- mysql-5.0.16patch/mysys/charset-def.c 2005-12-27 14:44:32.000000000
>>+0100
>>***************
>>*** 88,93 ****
>>--- 88,94 ----
>> #ifdef HAVE_CHARSET_cp1250
>> add_compiled_collation(&my_charset_cp1250_czech_ci);
>>+ add_compiled_collation(&my_charset_cp1250_czech_cs);
>> #endif
>> #ifdef HAVE_CHARSET_cp932
>>diff -c -r mysql-5.0.16/scripts/mysqlbug
>>mysql-5.0.16patch/scripts/mysqlbug
>>*** mysql-5.0.16/scripts/mysqlbug 2005-11-15 01:22:17.000000000 +0100
>>--- mysql-5.0.16patch/scripts/mysqlbug 2005-12-27 15:18:23.000000000 +0100
>>***************
>>*** 8,15 ****
>> COMPILATION_COMMENT="Source distribution"
>> BUGmysql="mysql@lists.mysql.com"
>> # This is set by configure
>>! COMP_ENV_INFO="CC='ccache gcc' CFLAGS='' CXX='ccache gcc'
>>CXXFLAGS='-felide-constructors -fno-exceptions -fno-rtti' LDFLAGS=''
>>ASFLAGS=''"
>>! CONFIGURE_LINE="./configure '--with-embedded-server'
>>'--with-berkeley-db' '--with-innodb' '--enable-thread-safe-client'
>>'--with-extra-charsets=complex' '--with-ndbcluster' 'CC=ccache gcc'
>>'CXXFLAGS=-felide-constructors -fno-exceptions -fno-rtti' 'CXX=ccache
>>gcc'"
>> LIBC_INFO=""
>> for pat in /lib/libc.* /lib/libc-* /usr/lib/libc.* /usr/lib/libc-*
>>--- 8,15 ----
>> COMPILATION_COMMENT="Source distribution"
>> BUGmysql="mysql@lists.mysql.com"
>> # This is set by configure
>>! COMP_ENV_INFO="CC='gcc' CFLAGS='' CXX='g++' CXXFLAGS='' LDFLAGS=''
>>ASFLAGS=''"
>>! CONFIGURE_LINE="./configure '--prefix=/var/lib' '--exec-prefix=/usr'
>>'--datadir=/var/lib/mysql' '--sysconfdir=/etc'
>>'--localstatedir=/var/lib/mysql'
>>'--with-unix-socket-path=/var/lib/mysql/mysql.sock'
>>'--with-mysqld-user=mysql' '--enable-local-infile' '--with-charset=cp1250'
>>'--with-extra-charsets=complex' '--with-mysql=/usr/local/mysql'"
>> LIBC_INFO=""
>> for pat in /lib/libc.* /lib/libc-* /usr/lib/libc.* /usr/lib/libc-*
>>***************
>>*** 231,238 ****
>> >Class: $CLASS_C
>> >Release: mysql-${VERSION} ($COMPILATION_COMMENT)
>> `test -n "$MYSQL_SERVER" && echo ">Server: $MYSQL_SERVER"`
>>! >C compiler: gcc (GCC) 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
>>! >C++ compiler: gcc (GCC) 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
>> >Environment:
>> $ENVIRONMENT_C
>> `test -n "$SYSTEM" && echo "System: $SYSTEM"`
>>--- 231,238 ----
>> >Class: $CLASS_C
>> >Release: mysql-${VERSION} ($COMPILATION_COMMENT)
>> `test -n "$MYSQL_SERVER" && echo ">Server: $MYSQL_SERVER"`
>>! >C compiler: gcc (GCC) 4.0.2 20051125 (Red Hat 4.0.2-8)
>>! >C++ compiler: g++ (GCC) 4.0.2 20051125 (Red Hat 4.0.2-8)
>> >Environment:
>> $ENVIRONMENT_C
>> `test -n "$SYSTEM" && echo "System: $SYSTEM"`
>>diff -c -r mysql-5.0.16/sql/share/charsets/cp1250.xml
>>mysql-5.0.16patch/sql/share/charsets/cp1250.xml
>>*** mysql-5.0.16/sql/share/charsets/cp1250.xml 2005-11-15
>>01:23:45.000000000 +0100
>>--- mysql-5.0.16patch/sql/share/charsets/cp1250.xml 2005-12-27
>>14:34:08.000000000 +0100
>>***************
>>*** 154,159 ****
>>--- 154,162 ----
>> </collation>
>> <collation name="cp1250_czech_ci"/>
>>+ <collation name="cp1250_czech_cs"/>
>>+ + <collation name="cp1250_bin" flag="binary"/>
>> diff -c -r mysql-5.0.16/sql/share/charsets/Index.xml
>>mysql-5.0.16patch/sql/share/charsets/Index.xml
>>*** mysql-5.0.16/sql/share/charsets/Index.xml 2005-11-15
>>01:23:47.000000000 +0100
>>--- mysql-5.0.16patch/sql/share/charsets/Index.xml 2005-12-27
>>14:33:05.000000000 +0100
>>***************
>>*** 1,6 ****
>> <?xml version='1.0' encoding="utf-8"?>
>> ! <charsets max-id="98">
>> <copyright>
>> Copyright (C) 2003 MySQL AB
>>--- 1,6 ----
>> <?xml version='1.0' encoding="utf-8"?>
>> ! <charsets max-id="99">
>> <copyright>
>> Copyright (C) 2003 MySQL AB
>>***************
>>*** 373,378 ****
>>--- 373,381 ----
>> <collation name="cp1250_czech_cs" id="34" order="Czech">
>> <flag>compiled</flag>
>> </collation>
>>+ <collation name="cp1250_czech_ci" id="99" order="Czech">
>>+ <flag>compiled</flag>
>>+ </collation>
>> <collation name="cp1250_bin" id="66" order="Binary" flag="binary"/>
>> </charset>
>> diff -c -r mysql-5.0.16/strings/ctype-win1250ch.c
>>mysql-5.0.16patch/strings/ctype-win1250ch.c
>>*** mysql-5.0.16/strings/ctype-win1250ch.c 2005-11-15 01:12:46.000000000
>>+0100
>>--- mysql-5.0.16patch/strings/ctype-win1250ch.c 2005-12-27
>>15:32:37.000000000 +0100
>>***************
>>*** 261,267 ****
>> ! static uchar NEAR sort_order_win1250ch[] = {
>> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
>> 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
>> 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
>>--- 261,267 ----
>> ! static uchar NEAR sort_order_win1250ch_cs[] = {
>> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
>> 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
>> 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
>>***************
>>*** 280,285 ****
>>--- 280,305 ----
>> 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
>>254, 255
>> };
>> + static uchar NEAR sort_order_win1250ch_ci[] = {
>>+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
>>+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
>>+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
>>+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
>>+ 64, 65, 71, 72, 76, 78, 83, 84, 85, 86, 90, 91, 92, 96, 97,100,
>>+ 105,106,107,110,114,117,122,123,124,125,127,131,132,133,134,135,
>>+ 136, 65, 71, 72, 76, 78, 83, 84, 85, 86, 90, 91, 92, 96, 97,100,
>>+ 105,106,107,110,114,117,122,123,124,125,127,137,138,139,140, 0,
>>+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
>>+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255,
>>+ 66,255, 93,255, 94,111,255,255,255,112,113,115,128,255,129,130,
>>+ 255, 66,255, 93,255, 94,111,255,255,112,113,115,128,255,129,130,
>>+ 108, 67, 68, 69, 70, 95, 73, 75, 74, 79, 81, 82, 80, 89, 87, 77,
>>+ 255, 98, 99,101,102,103,104,255,109,119,118,120,121,126,116,255,
>>+ 108, 67, 68, 69, 70, 95, 73, 75, 74, 79, 81, 82, 80, 89, 88, 77,
>>+ 255, 98, 99,101,102,103,104,255,109,119,118,120,121,126,116,255,
>>+ };
>>+ + static uchar NEAR _sort_order_win1250ch1[] = {
>> 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81,
>> 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81,
>>***************
>>*** 662,667 ****
>>--- 682,702 ----
>> }
>> + static MY_COLLATION_HANDLER my_collation_czech_cs_handler =
>>+ {
>>+ NULL, /* init */
>>+ my_strnncoll_win1250ch,
>>+ my_strnncollsp_win1250ch,
>>+ my_strnxfrm_win1250ch,
>>+ my_strnxfrmlen_simple,
>>+ my_like_range_win1250ch,
>>+ my_wildcmp_8bit,
>>+ my_strcasecmp_8bit,
>>+ my_instr_simple,
>>+ my_hash_sort_simple,
>>+ my_propagate_simple
>>+ };
>>+ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
>> {
>> NULL, /* init */
>>***************
>>*** 678,684 ****
>> };
>> ! CHARSET_INFO my_charset_cp1250_czech_ci =
>> {
>> 34,0,0, /* number */
>> MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
>>--- 713,719 ----
>> };
>> ! CHARSET_INFO my_charset_cp1250_czech_cs =
>> {
>> 34,0,0, /* number */
>> MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
>>***************
>>*** 689,695 ****
>> ctype_win1250ch,
>> to_lower_win1250ch,
>> to_upper_win1250ch,
>>! sort_order_win1250ch,
>> NULL, /* contractions */
>> NULL, /* sort_order_big*/
>> tab_cp1250_uni, /* tab_to_uni */
>>--- 724,730 ----
>> ctype_win1250ch,
>> to_lower_win1250ch,
>> to_upper_win1250ch,
>>! sort_order_win1250ch_cs,
>> NULL, /* contractions */
>> NULL, /* sort_order_big*/
>> tab_cp1250_uni, /* tab_to_uni */
>>***************
>>*** 707,715 ****
>> ' ', /* pad char */
>> 0, /* escape_with_backslash_is_dangerous
>>*/
>> &my_charset_8bit_handler,
>>! &my_collation_czech_ci_handler
>> };
>> #endif /* REAL_MYSQL */
>> --- 742,781 ----
>> ' ', /* pad char */
>> 0, /* escape_with_backslash_is_dangerous
>>*/
>> &my_charset_8bit_handler,
>>! &my_collation_czech_cs_handler
>> };
>> + CHARSET_INFO my_charset_cp1250_czech_ci =
>>+ {
>>+ 99,0,0, /* number */
>>+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
>>+ "cp1250", /* cs name */
>>+ "cp1250_czech_ci", /* name */
>>+ "", /* comment */
>>+ NULL, /* tailoring */
>>+ ctype_win1250ch,
>>+ to_lower_win1250ch,
>>+ to_upper_win1250ch,
>>+ sort_order_win1250ch_ci,
>>+ NULL, /* contractions */
>>+ NULL, /* sort_order_big*/
>>+ tab_cp1250_uni, /* tab_to_uni */
>>+ idx_uni_cp1250, /* tab_from_uni */
>>+ my_unicase_default, /* caseinfo */
>>+ NULL, /* state_map */
>>+ NULL, /* ident_map */
>>+ 2, /* strxfrm_multiply */
>>+ 1, /* caseup_multiply */
>>+ 1, /* casedn_multiply */
>>+ 1, /* mbminlen */
>>+ 1, /* mbmaxlen */
>>+ 0, /* min_sort_char */
>>+ 0, /* max_sort_char */
>>+ ' ', /* pad char */
>>+ 0, /* escape_with_backslash_is_dangerous */
>>+ &my_charset_8bit_handler,
>>+ &my_collation_czech_ci_handler
>>+ };
>> #endif /* REAL_MYSQL */
>>
>>
_________________________________________________________________
Emotikony a pozadi programu MSN Messenger ozivi vasi konverzaci.
http://messenger.msn.cz/
--
MySQL Internals Mailing List
For list archives: http://lists.mysql.com/internals
To unsubscribe: http://lists.mysql.com/internals?unsub=mysql-internals@progressive-comp.com
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic