[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: branches/KDE/3.5/kdepim/indexlib
From: Luís Pedro Coelho <luis () luispedro ! org>
Date: 2005-10-12 11:00:44
Message-ID: 1129114844.789786.21134.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 469868 by luis_pedro:
Correctly tokenize strings with numbers
M +35 -0 tokenizer-test.tcc
M +10 -10 tokenizer.cpp
--- branches/KDE/3.5/kdepim/indexlib/tokenizer-test.tcc #469867:469868
@@ -25,9 +25,44 @@
}
}
+void with_newlines() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one\ntwo\nthree" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "TWO" );
+ expected.push_back( "THREE" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+ }
+}
+
+void with_numbers() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one 012 123 four" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "012" );
+ expected.push_back( "123" );
+ expected.push_back( "FOUR" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+ }
+}
+
test_suite* get_suite() {
test_suite* test = BOOST_TEST_SUITE( "Tokenizer tests" );
test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &with_newlines ) );
+ test->add( BOOST_TEST_CASE( &with_numbers ) );
return test;
}
--- branches/KDE/3.5/kdepim/indexlib/tokenizer.cpp #469867:469868
@@ -74,16 +74,16 @@
stop, // - [ 45 ]
stop, // . [ 46 ]
stop, // / [ 47 ]
- 0, // 0 [ 48 ]
- 1, // 1 [ 49 ]
- 2, // 2 [ 50 ]
- 3, // 3 [ 51 ]
- 4, // 4 [ 52 ]
- 5, // 5 [ 53 ]
- 6, // 6 [ 54 ]
- 7, // 7 [ 55 ]
- 8, // 8 [ 56 ]
- 9, // 9 [ 57 ]
+ '0', // 0 [ 48 ]
+ '1', // 1 [ 49 ]
+ '2', // 2 [ 50 ]
+ '3', // 3 [ 51 ]
+ '4', // 4 [ 52 ]
+ '5', // 5 [ 53 ]
+ '6', // 6 [ 54 ]
+ '7', // 7 [ 55 ]
+ '8', // 8 [ 56 ]
+ '9', // 9 [ 57 ]
stop, // : [ 58 ]
stop, // ; [ 59 ]
stop, // < [ 60 ]
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic