[prev in list] [next in list] [prev in thread] [next in thread] 

List:       icu-bugrfe
Subject:    [icu-bug] incoming/2696
From:       jtcsv () jtcsv ! com
Date:       2003-02-13 0:08:28
[Download RAW message or body]

schererm moved PR#2696 from incoming to collation	URL: http://www.jtcsv.com/cgi-bin/icu-bugs?findid=2696

====> ORIGINAL MESSAGE FOLLOWS <====

From: kentk@cs.chalmers.se
Date: Thu Feb  6 09:12:18 2003
Subject: collation rules for pl (Polish, (maybe also Kashubian?))

Full_Name: Kent Karlsson
Version: 
OS: all
PROJECT: ICU4C,ICU4J and ICU4JNI
JAVA:   
Submission from: (NULL) (129.16.214.213)


//// Polish (pl, pl_*)
//// Maybe also Kashubian. Ô?

////// a[a-ogonek] b c [c-acute][c-caron] d e[e-ogonek] f g h [ch!] iy! j k l
[l-stroke] m n [n-acute][n-caron]
////// o [o-acute][o-caron] p q r s [s-acute][s-caron] t u v w x z
[z-acute][z-caron] [z-dotabove]

///variants of y?


	"[normalization on]"


	" & AE" // order ae-ligature as a variant of AE
	"  << \u00E6" // LATIN SMALL LETTER AE
	"   <<< \u00C6" // LATIN CAPITAL LETTER AE


	" & OE" // order oe-ligature as a variant of OE
	"  << \u0153" // LATIN SMALL LIGATURE OE
	"   <<< \u0152" // LATIN CAPITAL LIGATURE OE


	// A-ogonek is orderes as a variant of A

	////" & A"
	////"  << a\u0328" // 
	////"   <<< \uFF41\u0328" // FULLWIDTH LATIN SMALL LETTER A with COMBINING
OGONEK
	////"   <<< A\u0328"  // 
	////"   <<< \uFF21\u0328" // FULLWIDTH LATIN CAPITAL LETTER A with COMBINING
OGONEK


	// C-acute, C-caron (c-tilde?)

	" & C"
	" < c\u0301" // 
	"   <<< \uFF43\u0301" // FULLWIDTH LATIN SMALL LETTER C with COMINING ACUTE
ACCENT
	"   <<< C\u0301"  // 
	"   <<< \uFF23\u0301" // FULLWIDTH LATIN CAPITAL LETTER C with COMBINING ACUTE
ACCENT
	"  << c\u030C" // 
	"   <<< \uFF43\u030C" // FULLWIDTH LATIN SMALL LETTER C with COMBINING CARON
	"   <<< C\u030C"  // 
	"   <<< \uFF23\u030C" // FULLWIDTH LATIN CAPITAL LETTER C with COMBINING CARON


	// E-ogonek is ordered as a variant of E

	////" & E"
	////"  << e\u0328" // 
	////"   <<< \uFF25\u0328" // FULLWIDTH LATIN CAPITAL LETTER E with COMBINING
OGONEK
	////"   <<< E\u0328"  // 
	////"   <<< \uFF45\u0328" // FULLWIDTH LATIN SMALL LETTER E with COMBINING
OGONEK


	// CH (cH is a miscapitalisation) is ordered as a separate letter after H

	" & H" // order CH after H
	"  < ch"
	"   <<< \uFF43\uFF48" // FULLWIDTH LATIN SMALL LETTER C with FULLWIDTH LATIN
SMALL LETTER H
	"   <<< Ch"
	"   <<< \uFF23\uFF48" // FULLWIDTH LATIN CAPITAL LETTER C with FULLWIDTH LATIN
SMALL LETTER H
	"   <<< CH"
	"   <<< \uFF23\uFF28" // FULLWIDTH LATIN CAPITAL LETTER C with FULLWIDTH LATIN
CAPITAL LETTER H


	// Y is orderes as a variant of I

	" & I" // order Y as a variant of I
	"  << y"
	"   <<< \uFF59" // FULLWIDTH LATIN SMALL LETTER Y
	"   <<< Y"
	"   <<< \uFF39" // FULLWIDTH LATIN CAPITAL LETTER Y


	// L-stroke (also the ones made with short stroke overlay)

	" & L"
	" < \u0142" // LATIN SMALL LETTER L WITH STROKE
	"   <<< \u0141"  // LATIN CAPITAL LETTER L WITH STROKE
	"  << l\u0335" // LATIN SMALL LETTER L with COMBINING SHORT STROKE OVERLAY
	"   <<< \uFF2C\u0335" // FULLWIDTH LATIN CAPITAL LETTER L with COMBINING SHORT
STROKE OVERLAY
	"   <<< L\u0335"  // LATIN CAPITAL LETTER L with COMBINING SHORT STROKE
OVERLAY
	"   <<< \uFF4C" //FULLWIDTH LATIN SMALL LETTER L with COMBINING SHORT STROKE
OVERLAY


	//// "'t" is short for "het" (the) in Duch (and Afrikaans?) is ordered as "t",
and 
	//// "'n" is short for "ein" (one, you) in Afrikaans is ordred as "n"

	"& \u2019n" // note that \u2019 should be ignored at levels 1-3
	"    = \u0149" // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE


	// N-acute, N-caron, (n-comma, n-tilde, n-cedilla?; nj ny??)

	" & N"
	" < n\u0301" // 
	"   <<< \uFF2E\u0301" // FULLWIDTH LATIN CAPITAL LETTER N with COMBINING ACUTE
ACCENT
	"   <<< N\u0301"  // 
	"   <<< \uFF4E\u0301" // FULLWIDTH LATIN SMALL LETTER N with COMBINING ACUTE
ACCENT
	"  << n\u030C" // 
	"   <<< \uFF2E\u030C" // FULLWIDTH LATIN CAPITAL LETTER N with COMBINING CARON
	"   <<< N\u030C"  // 
	"   <<< \uFF4E\u030C" // FULLWIDTH LATIN SMALL LETTER N with COMBINING CARON


	// O-acute, O-caron

	" & O"
	" < o\u0301" // 
	"   <<< FF2F\u0301" // FULLWIDTH LATIN CAPITAL LETTER O with COMBINING ACUTE
ACCENT
	"   <<< O\u0301"  // 
	"   <<< FF4F\u0301" // FULLWIDTH LATIN SMALL LETTER O with COMBINING ACUTE
ACCENT
	"  << o\u030C" // 
	"   <<< \uFF2F\u030C" // FULLWIDTH LATIN CAPITAL LETTER O with COMBINING CARON
	"   <<< O\u030C"  // 
	"   <<< \uFF4F\u030C" // FULLWIDTH LATIN SMALL LETTER O with COMBINING CARON
	"  << o\u0302" // LATIN SMALL LETTER O with COMBINING CIRCUMFLEX ACCENT
	"   <<< \uFF4F\u0302" // FULLWIDTH LATIN SMALL LETTER O with COMBINING
CIRCUMFLEX ACCENT
	"   <<< O\u0302" // LATIN CAPITAL LETTER O with COMBINING CIRCUMFLEX ACCENT
	"   <<< \uFF2F\u0302" // FULLWIDTH LATIN CAPITAL LETTER O with COMBINING
CIRCUMFLEX ACCENT
	////// Ô (Kashubian)?


	// S-acute, S-caron

	" & S"
	" < s\u0301" // 
	"   <<< \uFF53\u0301" // FULLWIDTH LATIN SMALL LETTER S with COMBINING ACUTE
ACCENT
	"   <<< S\u0301"  // 
	"   <<< \uFF33\u0301" // FULLWIDTH LATIN CAPITAL LETTER S with COMBINING ACUTE
ACCENT
	"  << s\u030C" // 
	"   <<< \uFF53\u030C" // FULLWIDTH LATIN SMALL LETTER S with COMBINING CARON
	"   <<< S\u030C"  // 
	"   <<< \uFF33\u030C" // FULLWIDTH LATIN CAPITAL LETTER S with COMBINING CARON


	// Z-acute, Z-caron; Z-dotabove

	" & Z" // order Z-acute after Z
	" < z\u0301" // 
	"   <<< \uFF5A\u0301" // FULLWIDTH LATIN SMALL LETTER Z with COMBINING ACUTE
ACCENT
	"   <<< Z\u0301"  // 
	"   <<< \uFF3A\u0301" // FULLWIDTH LATIN CAPITAL LETTER Z with COMBINING ACUTE
ACCENT
	"  << z\u030C" // 
	"   <<< \uFF5A\u030C" // FULLWIDTH LATIN SMALL LETTER Z with COMBINING CARON
	"   <<< Z\u030C"  // 
	"   <<< \uFF3A\u030C" // FULLWIDTH LATIN CAPITAL LETTER Z with COMBINING CARON

	" < z\u0307" // and then Z-dotabove
	"   <<< \uFF5A\u0307" // FULLWIDTH LATIN SMALL LETTER Z with COMBINING DOT
ABOVE
	"   <<< Z\u0307"  // 
	"   <<< \uFF3A\u0307" // FULLWIDTH LATIN CAPITAL LETTER Z with COMBINING DOT
ABOVE


	// dz-caron
	" & dz\u030C <<< \u01C6" // LATIN SMALL LETTER DZ WITH CARON
	" & Dz\u030C <<< \u01C5" // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH
CARON
	" & DZ\u030C <<< \u01C4" // LATIN CAPITAL LETTER DZ WITH CARON

_______________________________________________
icu-bugrfe mailing list
icu-bugrfe@oss.software.ibm.com
http://oss.software.ibm.com/developerworks/oss/mailman/listinfo/icu-bugrfe
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic