/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8 // test_utf8_codecvt.cpp // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com . // Use, modification and distribution is subject to the Boost Software // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include // size_t #include #if defined(BOOST_NO_STDC_NAMESPACE) namespace std{ using ::size_t; } // namespace std #endif #include #ifdef BOOST_NO_STDC_NAMESPACE namespace std{ using ::wcslen; } #endif #include "../test/test_tools.hpp" #include #include #include #include template struct test_data { static unsigned char utf8_encoding[]; static wchar_t wchar_encoding[]; }; template<> unsigned char test_data<2>::utf8_encoding[] = { 0x01, 0x7f, 0xc2, 0x80, 0xdf, 0xbf, 0xe0, 0xa0, 0x80, 0xe7, 0xbf, 0xbf }; template<> wchar_t test_data<2>::wchar_encoding[] = { 0x0001, 0x007f, 0x0080, 0x07ff, 0x0800, 0x7fff }; template<> unsigned char test_data<4>::utf8_encoding[] = { 0x01, 0x7f, 0xc2, 0x80, 0xdf, 0xbf, 0xe0, 0xa0, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf, 0xf7, 0xbf, 0xbf, 0xbf, 0xf8, 0x88, 0x80, 0x80, 0x80, 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80, 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf }; template<> wchar_t test_data<4>::wchar_encoding[] = { 0x00000001, 0x0000007f, 0x00000080, 0x000007ff, 0x00000800, 0x0000ffff, 0x00010000, 0x0010ffff, 0x001fffff, 0x00200000, 0x03ffffff, 0x04000000, 0x7fffffff }; int test_main(int /* argc */, char * /* argv */[]) { std::locale old_loc; std::locale * utf8_locale = boost::archive::add_facet( old_loc, new boost::archive::detail::utf8_codecvt_facet ); typedef char utf8_t; typedef test_data td; // Send our test UTF-8 data to file { std::ofstream ofs; ofs.open("test.dat", std::ios::binary); std::copy( td::utf8_encoding, #if ! defined(__BORLANDC__) // borland 5.60 complains about this td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char), #else // so use this instead td::utf8_encoding + 12, #endif boost::archive::iterators::ostream_iterator(ofs) ); } // Read the test data back in, converting to UCS-4 on the way in std::vector from_file; { std::wifstream ifs; ifs.imbue(*utf8_locale); ifs.open("test.dat"); wchar_t item = 0; // note can't use normal vector from iterator constructor because // dinkumware doesn't have it. for(;;){ item = ifs.get(); if(item == WEOF) break; //ifs >> item; //if(ifs.eof()) // break; from_file.push_back(item); } } // compare the data read back in with the orginal #if ! defined(__BORLANDC__) // borland 5.60 complains about this BOOST_CHECK(from_file.size() == sizeof(td::wchar_encoding)/sizeof(wchar_t)); #else // so use this instead BOOST_CHECK(from_file.size() == 6); #endif BOOST_CHECK(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding)); // Send the UCS4_data back out, converting to UTF-8 { std::wofstream ofs; ofs.imbue(*utf8_locale); ofs.open("test2.dat"); std::copy( from_file.begin(), from_file.end(), boost::archive::iterators::ostream_iterator(ofs) ); } // Make sure that both files are the same { typedef boost::archive::iterators::istream_iterator is_iter; is_iter end_iter; std::ifstream ifs1("test.dat"); is_iter it1(ifs1); std::vector data1; std::copy(it1, end_iter, std::back_inserter(data1)); std::ifstream ifs2("test2.dat"); is_iter it2(ifs2); std::vector data2; std::copy(it2, end_iter, std::back_inserter(data2)); BOOST_CHECK(data1 == data2); } // some libraries have trouble that only shows up with longer strings wchar_t * test3_data = L"\ \ \ \ \ 1\ 96953204\ 177129195\ 1\ 5627\ 23010\ 7419\

16212

\ 4086\ 2749\ -33\ 124\ 28\ 32225\ 17543\ 0.84431422\ 1.0170664757130923\ tjbx\ cuwjentqpkejp\
\
\ "; // Send the UCS4_data back out, converting to UTF-8 std::size_t l = std::wcslen(test3_data); { std::wofstream ofs; ofs.imbue(*utf8_locale); ofs.open("test3.dat"); std::copy( test3_data, test3_data + l, boost::archive::iterators::ostream_iterator(ofs) ); } // Make sure that both files are the same { std::wifstream ifs; ifs.imbue(*utf8_locale); ifs.open("test3.dat"); BOOST_CHECK( std::equal( test3_data, test3_data + l, boost::archive::iterators::istream_iterator(ifs) ) ); } delete utf8_locale; return EXIT_SUCCESS; }