[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    branches/KDE/3.5/kdepim/indexlib
From:       Luís Pedro Coelho <luis () luispedro ! org>
Date:       2005-10-12 11:00:44
Message-ID: 1129114844.789786.21134.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 469868 by luis_pedro:

Correctly tokenize strings with numbers


 M  +35 -0     tokenizer-test.tcc  
 M  +10 -10    tokenizer.cpp  


--- branches/KDE/3.5/kdepim/indexlib/tokenizer-test.tcc #469867:469868
@@ -25,9 +25,44 @@
 	}
 }
 
+void with_newlines() {
+	std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+	assert(tokenizer.get());
+	std::vector<std::string> tokens = tokenizer->string_to_words( "one\ntwo\nthree" );
+	std::vector<std::string> expected;
+	expected.push_back( "ONE" );
+	expected.push_back( "TWO" );
+	expected.push_back( "THREE" );
+	std::sort( tokens.begin(), tokens.end() );
+	std::sort( expected.begin(), expected.end() );
+	BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+	for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+		BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+	}
+}
+
+void with_numbers() {
+	std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+	assert(tokenizer.get());
+	std::vector<std::string> tokens = tokenizer->string_to_words( "one 012 123 four" );
+	std::vector<std::string> expected;
+	expected.push_back( "ONE" );
+	expected.push_back( "012" );
+	expected.push_back( "123" );
+	expected.push_back( "FOUR" );
+	std::sort( tokens.begin(), tokens.end() );
+	std::sort( expected.begin(), expected.end() );
+	BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+	for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+		BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+	}
+}
+
 test_suite* get_suite() {
 	test_suite* test = BOOST_TEST_SUITE( "Tokenizer tests" );
 	test->add( BOOST_TEST_CASE( &simple ) );
+	test->add( BOOST_TEST_CASE( &with_newlines ) );
+	test->add( BOOST_TEST_CASE( &with_numbers ) );
 	return test;
 }
 
--- branches/KDE/3.5/kdepim/indexlib/tokenizer.cpp #469867:469868
@@ -74,16 +74,16 @@
 				stop, // - [ 45 ]
 				stop, // . [ 46 ]
 				stop, // / [ 47 ]
-				0, // 0 [ 48 ]
-				1, // 1 [ 49 ]
-				2, // 2 [ 50 ]
-				3, // 3 [ 51 ]
-				4, // 4 [ 52 ]
-				5, // 5 [ 53 ]
-				6, // 6 [ 54 ]
-				7, // 7 [ 55 ]
-				8, // 8 [ 56 ]
-				9, // 9 [ 57 ]
+				'0', // 0 [ 48 ]
+				'1', // 1 [ 49 ]
+				'2', // 2 [ 50 ]
+				'3', // 3 [ 51 ]
+				'4', // 4 [ 52 ]
+				'5', // 5 [ 53 ]
+				'6', // 6 [ 54 ]
+				'7', // 7 [ 55 ]
+				'8', // 8 [ 56 ]
+				'9', // 9 [ 57 ]
 				stop, // : [ 58 ]
 				stop, // ; [ 59 ]
 				stop, // < [ 60 ]
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic