// // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) // // Distributed under the Boost Software License, Version 1.0. (See // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // #include #include #include #include #include #include #include "test_locale.hpp" #include "test_locale_tools.hpp" #ifndef BOOST_LOCALE_NO_POSIX_BACKEND # ifdef __APPLE__ # include # endif # include #endif #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__)) #ifndef NOMINMAX # define NOMINMAX #endif #include #endif bool test_iso; bool test_iso_8859_8 = true; bool test_utf; bool test_sjis; std::string he_il_8bit; std::string en_us_8bit; std::string ja_jp_shiftjis; template std::basic_string read_file(std::basic_istream &in) { std::basic_string res; Char c; while(in.get(c)) res+=c; return res; } template void test_ok(std::string file,std::locale const &l,std::basic_string cmp=std::basic_string()) { if(cmp.empty()) cmp=to(file); std::ofstream test("testi.txt"); test << file; test.close(); typedef std::basic_fstream stream_type; stream_type f1("testi.txt",stream_type::in); f1.imbue(l); TEST(read_file(f1) == cmp); f1.close(); stream_type f2("testo.txt",stream_type::out); f2.imbue(l); f2 << cmp; f2.close(); std::ifstream testo("testo.txt"); TEST(read_file(testo) == file); } template void test_rfail(std::string file,std::locale const &l,int pos) { std::ofstream test("testi.txt"); test << file; test.close(); typedef std::basic_fstream stream_type; stream_type f1("testi.txt",stream_type::in); f1.imbue(l); Char c; for(int i=0;i void test_wfail(std::string file,std::locale const &l,int pos) { typedef std::basic_fstream stream_type; stream_type f1("testo.txt",stream_type::out); f1.imbue(l); std::basic_string out=to(file); int i; for(i=0;i void test_for_char() { boost::locale::generator g; if(test_utf) { std::cout << " UTF-8" << std::endl; test_ok("grüße\nn i",g("en_US.UTF-8")); test_rfail("abc\xFF\xFF",g("en_US.UTF-8"),3); std::cout << " Testing codepoints above 0xFFFF" << std::endl; std::cout << " Single U+2008A" << std::endl; test_ok("\xf0\xa0\x82\x8a",g("en_US.UTF-8")); // U+2008A std::cout << " Single U+2008A withing text" << std::endl; test_ok("abc\"\xf0\xa0\x82\x8a\"",g("en_US.UTF-8")); // U+2008A std::string one = "\xf0\xa0\x82\x8a"; std::string res; for(unsigned i=0;i<1000;i++) res+=one; std::cout << " U+2008A x 1000" << std::endl; test_ok(res.c_str(),g("en_US.UTF-8")); // U+2008A } else { std::cout << " UTF-8 Not supported " << std::endl; } if(test_iso) { if(test_iso_8859_8) { std::cout << " ISO8859-8" << std::endl; test_ok("hello \xf9\xec\xe5\xed",g(he_il_8bit),to("hello שלום")); } std::cout << " ISO8859-1" << std::endl; test_ok(to("grüße\nn i"),g(en_us_8bit),to("grüße\nn i")); test_wfail("grüßen שלום",g(en_us_8bit),7); } if(test_sjis) { std::cout << " Shift-JIS" << std::endl; test_ok("\x93\xfa\x96\x7b",g(ja_jp_shiftjis), boost::locale::conv::to_utf("\xe6\x97\xa5\xe6\x9c\xac","UTF-8")); // Japan } } void test_wide_io() { std::cout << " wchar_t" << std::endl; test_for_char(); #if defined BOOST_LOCALE_ENABLE_CHAR16_T && !defined(BOOST_NO_CHAR16_T_CODECVT) std::cout << " char16_t" << std::endl; test_for_char(); #endif #if defined BOOST_LOCALE_ENABLE_CHAR32_T && !defined(BOOST_NO_CHAR32_T_CODECVT) std::cout << " char32_t" << std::endl; test_for_char(); #endif } template void test_pos(std::string source,std::basic_string target,std::string encoding) { using namespace boost::locale::conv; boost::locale::generator g; std::locale l= encoding == "ISO8859-8" ? g("he_IL."+encoding) : g("en_US."+encoding); TEST(to_utf(source,encoding)==target); TEST(to_utf(source.c_str(),encoding)==target); TEST(to_utf(source.c_str(),source.c_str()+source.size(),encoding)==target); TEST(to_utf(source,l)==target); TEST(to_utf(source.c_str(),l)==target); TEST(to_utf(source.c_str(),source.c_str()+source.size(),l)==target); TEST(from_utf(target,encoding)==source); TEST(from_utf(target.c_str(),encoding)==source); TEST(from_utf(target.c_str(),target.c_str()+target.size(),encoding)==source); TEST(from_utf(target,l)==source); TEST(from_utf(target.c_str(),l)==source); TEST(from_utf(target.c_str(),target.c_str()+target.size(),l)==source); } #define TESTF(X) TEST_THROWS(X,boost::locale::conv::conversion_error) template void test_to_neg(std::string source,std::basic_string target,std::string encoding) { using namespace boost::locale::conv; boost::locale::generator g; std::locale l=g("en_US."+encoding); TEST(to_utf(source,encoding)==target); TEST(to_utf(source.c_str(),encoding)==target); TEST(to_utf(source.c_str(),source.c_str()+source.size(),encoding)==target); TEST(to_utf(source,l)==target); TEST(to_utf(source.c_str(),l)==target); TEST(to_utf(source.c_str(),source.c_str()+source.size(),l)==target); TESTF(to_utf(source,encoding,stop)); TESTF(to_utf(source.c_str(),encoding,stop)); TESTF(to_utf(source.c_str(),source.c_str()+source.size(),encoding,stop)); TESTF(to_utf(source,l,stop)); TESTF(to_utf(source.c_str(),l,stop)); TESTF(to_utf(source.c_str(),source.c_str()+source.size(),l,stop)); } template void test_from_neg(std::basic_string source,std::string target,std::string encoding) { using namespace boost::locale::conv; boost::locale::generator g; std::locale l=g("en_US."+encoding); TEST(from_utf(source,encoding)==target); TEST(from_utf(source.c_str(),encoding)==target); TEST(from_utf(source.c_str(),source.c_str()+source.size(),encoding)==target); TEST(from_utf(source,l)==target); TEST(from_utf(source.c_str(),l)==target); TEST(from_utf(source.c_str(),source.c_str()+source.size(),l)==target); TESTF(from_utf(source,encoding,stop)); TESTF(from_utf(source.c_str(),encoding,stop)); TESTF(from_utf(source.c_str(),source.c_str()+source.size(),encoding,stop)); TESTF(from_utf(source,l,stop)); TESTF(from_utf(source.c_str(),l,stop)); TESTF(from_utf(source.c_str(),source.c_str()+source.size(),l,stop)); } template std::basic_string utf(char const *s) { return to(s); } template<> std::basic_string utf(char const *s) { return s; } template void test_with_0() { std::string a("abc\0\0 yz\0",3+2+3+1); TEST(boost::locale::conv::from_utf(boost::locale::conv::to_utf(a,"UTF-8"),"UTF-8") == a); TEST(boost::locale::conv::from_utf(boost::locale::conv::to_utf(a,"ISO8859-1"),"ISO8859-1") == a); } template struct utfutf; template<> struct utfutf { static char const *ok() {return "grüßen";} static char const *bad() { return "gr\xFF" "üßen"; } // split into 2 to make SunCC happy }; template<> struct utfutf { static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; } static wchar_t const *bad() { static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xFE\xFD\xdf\x65\x6e"; buf[2]=0xDC01; // second surrogate must not be buf[4]=0xD801; // First buf[5]=0xD801; // Must be surrogate trail return buf; } }; template<> struct utfutf { static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; } static wchar_t const *bad() { static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xdf\x65\x6e"; buf[2]=static_cast(0x1000000); // > 10FFFF return buf; } }; template void test_combinations() { using boost::locale::conv::utf_to_utf; typedef utfutf out; typedef utfutf in; TEST( (utf_to_utf(in::ok())==out::ok()) ); TESTF( (utf_to_utf(in::bad(),boost::locale::conv::stop)) ); TEST( (utf_to_utf(in::bad())==out::ok()) ); } void test_all_combinations() { std::cout << "Testing utf_to_utf" << std::endl; std::cout <<" char<-char"<(); std::cout <<" char<-wchar"<(); std::cout <<" wchar<-char"<(); std::cout <<" wchar<-wchar"<(); } template void test_to() { test_pos(to("grüßen"),utf("grüßen"),"ISO8859-1"); if(test_iso_8859_8) test_pos("\xf9\xec\xe5\xed",utf("שלום"),"ISO8859-8"); test_pos("grüßen",utf("grüßen"),"UTF-8"); test_pos("abc\"\xf0\xa0\x82\x8a\"",utf("abc\"\xf0\xa0\x82\x8a\""),"UTF-8"); test_to_neg("g\xFFrüßen",utf("grüßen"),"UTF-8"); test_from_neg(utf("hello שלום"),"hello ","ISO8859-1"); test_with_0(); } void test_skip(char const *enc,char const *utf,char const *name,char const *opt=0) { if(opt!=0) { if(boost::locale::conv::to_utf(enc,name) == opt) { test_skip(enc,opt,name); return; } } TEST(boost::locale::conv::to_utf(enc,name) == utf); TEST(boost::locale::conv::to_utf(enc,name) == boost::locale::conv::utf_to_utf(utf)); #ifdef BOOST_LOCALE_ENABLE_CHAR16_T TEST(boost::locale::conv::to_utf(enc,name) == boost::locale::conv::utf_to_utf(utf)); #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T TEST(boost::locale::conv::to_utf(enc,name) == boost::locale::conv::utf_to_utf(utf)); #endif } void test_simple_conversions() { namespace blc=boost::locale::conv; std::cout << "- Testing correct invalid bytes skipping" << std::endl; try { std::cout << "-- ISO-8859-8" << std::endl; test_skip("test \xE0\xE1\xFB-","test \xd7\x90\xd7\x91-","ISO-8859-8"); test_skip("\xFB","","ISO-8859-8"); test_skip("test \xE0\xE1\xFB","test \xd7\x90\xd7\x91","ISO-8859-8"); test_skip("\xFB-","-","ISO-8859-8"); } catch(blc::invalid_charset_error const &) { std::cout <<"--- not supported" << std::endl; } try { std::cout << "-- cp932" << std::endl; test_skip("test\xE0\xA0 \x83\xF8-","test\xe7\x87\xbf -","cp932","test\xe7\x87\xbf "); test_skip("\x83\xF8","","cp932"); test_skip("test\xE0\xA0 \x83\xF8","test\xe7\x87\xbf ","cp932"); test_skip("\x83\xF8-","-","cp932",""); } catch(blc::invalid_charset_error const &) { std::cout <<"--- not supported" << std::endl; } } int main() { try { std::vector def; #ifdef BOOST_LOCALE_WITH_ICU def.push_back("icu"); #endif #ifndef BOOST_LOCALE_NO_STD_BACKEND def.push_back("std"); #endif #ifndef BOOST_LOCALE_NO_WINAPI_BACKEND def.push_back("winapi"); #endif #ifndef BOOST_LOCALE_NO_POSIX_BACKEND def.push_back("posix"); #endif #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__)) test_iso_8859_8 = IsValidCodePage(28598)!=0; #endif test_simple_conversions(); for(int type = 0; type < int(def.size()); type ++ ) { boost::locale::localization_backend_manager tmp_backend = boost::locale::localization_backend_manager::global(); tmp_backend.select(def[type]); boost::locale::localization_backend_manager::global(tmp_backend); std::string bname = def[type]; if(bname=="std") { en_us_8bit = get_std_name("en_US.ISO8859-1"); he_il_8bit = get_std_name("he_IL.ISO8859-8"); ja_jp_shiftjis = get_std_name("ja_JP.SJIS"); if(!ja_jp_shiftjis.empty() && !test_std_supports_SJIS_codecvt(ja_jp_shiftjis)) { std::cout << "Warning: detected unproper support of " << ja_jp_shiftjis << " locale, disableling it" << std::endl; ja_jp_shiftjis = ""; } } else { en_us_8bit = "en_US.ISO8859-1"; he_il_8bit = "he_IL.ISO8859-8"; ja_jp_shiftjis = "ja_JP.SJIS"; } std::cout << "Testing for backend " << def[type] << std::endl; test_iso = true; if(bname=="std" && (he_il_8bit.empty() || en_us_8bit.empty())) { std::cout << "no iso locales availible, passing" << std::endl; test_iso = false; } test_sjis = true; if(bname=="std" && ja_jp_shiftjis.empty()) { test_sjis = false; } if(bname=="winapi") { test_iso = false; test_sjis = false; } test_utf = true; #ifndef BOOST_LOCALE_NO_POSIX_BACKEND if(bname=="posix") { { locale_t l = newlocale(LC_ALL_MASK,he_il_8bit.c_str(),0); if(!l) test_iso = false; else freelocale(l); } { locale_t l = newlocale(LC_ALL_MASK,en_us_8bit.c_str(),0); if(!l) test_iso = false; else freelocale(l); } { locale_t l = newlocale(LC_ALL_MASK,"en_US.UTF-8",0); if(!l) test_utf = false; else freelocale(l); } #ifdef BOOST_LOCALE_WITH_ICONV { locale_t l = newlocale(LC_ALL_MASK,ja_jp_shiftjis.c_str(),0); if(!l) test_sjis = false; else freelocale(l); } #else test_sjis = false; #endif } #endif if(def[type]=="std" && (get_std_name("en_US.UTF-8").empty() || get_std_name("he_IL.UTF-8").empty())) { test_utf = false; } std::cout << "Testing wide I/O" << std::endl; test_wide_io(); std::cout << "Testing charset to/from UTF conversion functions" << std::endl; std::cout << " char" << std::endl; test_to(); std::cout << " wchar_t" << std::endl; test_to(); #ifdef BOOST_LOCALE_ENABLE_CHAR16_T if(bname == "icu" || bname == "std") { std::cout << " char16_t" << std::endl; test_to(); } #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T if(bname == "icu" || bname == "std") { std::cout << " char32_t" << std::endl; test_to(); } #endif test_all_combinations(); } } catch(std::exception const &e) { std::cerr << "Failed " << e.what() << std::endl; return EXIT_FAILURE; } FINALIZE(); } // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 // boostinspect:noascii