[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    koffice/kexi/kexiutils
From:       Jaroslaw Staniek <js () iidea ! pl>
Date:       2007-01-03 19:31:47
Message-ID: 1167852707.905491.23424.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 619580 by staniek:

Kexi
- use transliteration table generated by a shell script 
  to generate identifiers out of unicode characters;
  plus some adjustments made by hand



 M  +12 -35    CMakeLists.txt  
 AM            generate_transliteration_table.sh  
 M  +25 -175   identifier.cpp  
 AM            transliteration_table.cpp.bz2  
 A             transliteration_table.cpp.patch  
 A             transliteration_table.readme  
 AM            update_transliteration_table_patch.sh  


--- trunk/koffice/kexi/kexiutils/CMakeLists.txt #619579:619580
@@ -1,12 +1,10 @@
+include_directories( 
+	${CMAKE_SOURCE_DIR}/kexi 
+	${KDE4_INCLUDES}
+)
 
-
-include_directories( ${CMAKE_SOURCE_DIR}/kexi ${KDE4_INCLUDES}  )
-
-
-########### next target ###############
-
 set(kexiutils_LIB_SRCS identifier.cpp validator.cpp utils.cpp debuggui.cpp 
-	styleproxy.cpp longlongvalidator.cpp)
+	styleproxy.cpp longlongvalidator.cpp \
${CMAKE_CURRENT_BINARY_DIR}/transliteration_table.cpp)  
 kde4_automoc(${kexiutils_LIB_SRCS})
 
@@ -15,37 +13,16 @@
 target_link_libraries(kexiutils  
 	${KDE4_KDECORE_LIBS} ${KDE4_KDEUI_LIBS} ${KDE4_KIO_LIBS} )
 
+add_custom_command(
+	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/transliteration_table.cpp
+	COMMAND bzip2 ARGS -dc -k ${CMAKE_CURRENT_SOURCE_DIR}/transliteration_table.cpp.bz2 \
> ${CMAKE_CURRENT_BINARY_DIR}/transliteration_table.cpp +	DEPENDS \
> ${CMAKE_CURRENT_SOURCE_DIR}/transliteration_table.cpp.bz2
+)
+
 set_target_properties(kexiutils PROPERTIES VERSION ${GENERIC_KOFFICE_LIB_VERSION} \
SOVERSION ${GENERIC_KOFFICE_LIB_SOVERSION} ) +
 install(TARGETS kexiutils  DESTINATION ${LIB_INSTALL_DIR})
 
-
-########### install files ###############
-
 install( FILES  tristate.h identifier.h validator.h utils.h kexiutils_export.h
 	longlongvalidator.h DESTINATION ${INCLUDE_INSTALL_DIR}/kexiutils)
 
-
-
-
-
-#original Makefile.am contents follow:
-
-#include $(top_srcdir)/kexi/Makefile.global
-#
-#lib_LTLIBRARIES = libkexiutils.la
-#libkexiutils_la_SOURCES = identifier.cpp validator.cpp utils.cpp
-#
-#libkexiutilsincludedir=$(includedir)/kexiutils
-#libkexiutilsinclude_HEADERS = tristate.h identifier.h validator.h utils.h \
                kexiutils_export.h
-#
-#noinst_HEADERS = utils_p.h
-#
-#libkexiutils_la_LDFLAGS = \
-#  $(KDE_RPATH) $(all_libraries) \
-#  $(VER_INFO) -Wnounresolved -no-undefined
-#
-#libkexiutils_la_LIBADD = $(LIB_QT) $(LIB_KDEUI) $(LIB_KIO) 
-#
-#INCLUDES = -I$(top_srcdir)/kexi $(all_includes)
-#
-#METASOURCES = AUTO
** trunk/koffice/kexi/kexiutils/generate_transliteration_table.sh #property \
svn:executable  + *
--- trunk/koffice/kexi/kexiutils/identifier.cpp #619579:619580
@@ -19,10 +19,7 @@
 */
 
 #include "identifier.h"
-#include <kstaticdeleter.h>
-#include <q3dict.h>
-//Added by qt3to4:
-#include <Q3CString>
+#include "transliteration_table.h"
 
 using namespace KexiUtils;
 
@@ -47,191 +44,44 @@
 	return fn;
 }
 
-// These are in pairs - first the non-latin character in UTF-8,
-// the second is the latin character(s) to appear in identifiers.
-static const char* string2Identifier_table[] = {
-/* 1. Polish characters */
-"Ą", "A",  "Ć", "C",  "Ę", "E",
-"Ł", "L",  "Ń", "N",  "Ó", "O",
-"Ś", "S",  "Ź", "Z",  "Ż", "Z",
-"ą", "a",  "ć", "c",  "ę", "e",
-"ł", "l",  "ń", "n",  "ó", "o",
-"ś", "s",  "ź", "z",  "ż", "z",
-
-/* 2. The mappings of the german "umlauts" to their 2-letter equivalents:
-  (Michael Drüing <michael at drueing.de>)
-
- Note that ß->ss is AFAIK not always correct transliteration, for example
- "Maße" and "Masse" is different, the first meaning "measurements" (as
- plural of "Maß" meaning "measurement"), the second meaning "(physical)
- mass". They're also pronounced dirrefently, the first one is longer, the
- second one short. */
-/** @todo the above three only appear at the beginning of a word. if the word is in
- all caps - like in a caption - then the 2-letter equivalents should also be
- in all caps */
-"Ä", "Ae",
-"Ö", "Oe",
-"Ü", "Ue",
-"ä", "ae",
-"ö", "oe",
-"ü", "ue",
-"ß", "ss",
- 
-/* 3. The part of Serbian Cyrillic which is shared with other Cyrillics but 
- that doesn't mean I am sure that eg. Russians or Bulgarians would do the 
- same. (Chusslove Illich <caslav.ilic at gmx.net>) */
-"а", "a",
-"б", "b",
-"в", "v",
-"г", "g",
-"д", "d",
-"е", "e",
-"ж", "z",
-"з", "z",
-"и", "i",
-"к", "k",
-"л", "l",
-"м", "m",
-"н", "n",
-"о", "o",
-"п", "p",
-"р", "r",
-"с", "s",
-"т", "t",
-"у", "u",
-"ф", "f",
-"х", "h",
-"ц", "c",
-"ч", "c",
-"ш", "s",
-"А", "A",
-"Б", "B",
-"В", "V",
-"Г", "G",
-"Д", "D",
-"Е", "E",
-"Ж", "Z",
-"З", "Z",
-"И", "I",
-"К", "K",
-"Л", "L",
-"М", "M",
-"Н", "N",
-"О", "O",
-"П", "P",
-" ", "R",
-"С", "S",
-"Т", "T",
-"У", "U",
-"Ф", "F",
-"Х", "H",
-"Ц", "C",
-"Ч", "C",
-"Ш", "S",
-// 3.1. The Serbian-specific Cyrillic characters:
-"ђ", "dj",
-"ј", "j",
-"љ", "lj",
-"њ", "nj",
-"ћ", "c",
-"џ", "dz",
-"Ђ", "Dj",
-"Ј", "J",
-"Љ", "Lj",
-"Њ", "Nj",
-"Ћ", "C",
-"Џ", "Dz",
-// 3.2. The non-ASCII Serbian Latin characters:
-"đ", "dj",
-"ž", "z",
-"ć", "c",
-"č", "c",
-"š", "s",
-"Đ", "Dj",
-"Ž", "Z",
-"Ć", "C",
-"Č", "C",
-" ", "S",
-// 4. Czech characters (cs_CZ, Michal Svec)
- "Á", "A",
- "Č", "C",
- "Ď", "D",
- "É", "E",
- "Ě", "E",
- "Í", "I",
- "Ň", "N",
- "Ó", "O",
- "Ř", "R",
- " ", "S",
- "Ť", "T",
- "Ú", "U",
- "Ů", "U",
- "Ý", "Y",
- "Ž", "Z",
- "á", "a",
- "č", "c",
- "ď", "d",
- "é", "e",
- "ě", "e",
- "í", "i",
- "ň", "n",
- "ó", "o",
- "ř", "r",
- "š", "s",
- "ť", "t",
- "ú", "u",
- "ů", "u",
- "ý", "y",
- "ž", "z",
-// END.
-0
-};
-
-//! used for O(1) character transformations in char2Identifier()
-static KStaticDeleter< Q3Dict<Q3CString> > string2Identifier_deleter;
-static Q3Dict<Q3CString>* string2Identifier_dict = 0;
-
 inline QString char2Identifier(const QChar& c)
 {
-	if ((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_')
-		return QString(c);
-	else {
-		if (!string2Identifier_dict) {
-			//build dictionary for later use
-			string2Identifier_deleter.setObject( string2Identifier_dict, new \
                Q3Dict<Q3CString>(1009) );
-			string2Identifier_dict->setAutoDelete(true);
-			for (const char **p = string2Identifier_table; *p; p+=2) {
-				string2Identifier_dict->replace( /* replace, not insert because there may be \
                duplicates */
-					QString::fromUtf8(*p), new Q3CString(*(p+1)) );
-			}
-		}
-		const Q3CString *fixedChar = string2Identifier_dict->find(c);
-		if (fixedChar)
-			return *fixedChar;
-	}
-	return QString(QChar('_'));
+	if (c.unicode() >= TRANSLITERATION_TABLE_SIZE)
+		return QString(QChar('_'));
+	const char *const s = transliteration_table[c.unicode()];
+	return s ? QString::fromLatin1(s) : QString(QChar('_'));
 }
 
 QString KexiUtils::string2Identifier(const QString &s)
 {
-	QString r;
-	QString id( s.simplified() );
+	if (s.isEmpty())
+		return QString::null;
+	QString r, id = s.simplified();
 	if (id.isEmpty())
-		return id;
+		return QString::null;
 	r.reserve(id.length());
-//		return "_";
 	id.replace(' ',"_");
 	QChar c = id[0];
-
+	QString add;
+	bool wasUnderscore = false;
+	
 	if (c>='0' && c<='9') {
 		r+='_';
 		r+=c;
-	} else
-		r+=char2Identifier(c);
-
+	} else {
+		add = char2Identifier(c);
+		r+=add;
+		wasUnderscore = add == "_";
+	}
+	
 	const uint idLength = id.length();
-	for (uint i=1; i<idLength; i++)
-		r+=char2Identifier(id.at(i));
+	for (uint i=1; i<idLength; i++) {
+		add = char2Identifier(id.at(i));
+		if (wasUnderscore && add == "_")
+			continue;
+		wasUnderscore = add == "_";
+		r+=add;
+	}
 	return r;
 }
 
** trunk/koffice/kexi/kexiutils/transliteration_table.cpp.bz2 #property svn:mime-type
   + application/octet-stream
** trunk/koffice/kexi/kexiutils/update_transliteration_table_patch.sh #property \
svn:executable  + *


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic