unicode_iterator_test.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE unicode_iterator_test.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Simple test suite for Unicode interconversions.
  16. */
  17. #include <boost/regex/pending/unicode_iterator.hpp>
  18. #include <boost/detail/lightweight_main.hpp>
  19. #include "../test_macros.hpp"
  20. #include <vector>
  21. #include <iterator>
  22. #include <algorithm>
  23. #include <iostream>
  24. #include <iomanip>
  25. #include <cstring>
  26. #if !defined(TEST_UTF8) && !defined(TEST_UTF16)
  27. # define TEST_UTF8
  28. # define TEST_UTF16
  29. #endif
  30. template <class I>
  31. typename I::value_type iterate_over(I a, I b)
  32. {
  33. typedef typename I::value_type value_type;
  34. value_type v = 0;
  35. while(a != b)
  36. {
  37. v ^= *a;
  38. ++a;
  39. }
  40. return v;
  41. }
  42. void spot_checks()
  43. {
  44. // test specific values ripped straight out of the Unicode standard
  45. // to verify that our encoding is the same as theirs, as well as
  46. // self-consistent:
  47. ::boost::uint32_t spot16[] = { 0x10302u, };
  48. typedef boost::u32_to_u16_iterator<const ::boost::uint32_t*> u32to16type;
  49. u32to16type it(spot16);
  50. BOOST_CHECK_EQUAL(*it++, 0xD800u);
  51. BOOST_CHECK_EQUAL(*it++, 0xDF02u);
  52. BOOST_CHECK_EQUAL(*--it, 0xDF02u);
  53. BOOST_CHECK_EQUAL(*--it, 0xD800u);
  54. ::boost::uint32_t spot8[] = { 0x004Du, 0x0430u, 0x4E8Cu, 0x10302u, };
  55. typedef boost::u32_to_u8_iterator<const ::boost::uint32_t*> u32to8type;
  56. u32to8type it8(spot8);
  57. BOOST_CHECK_EQUAL(*it8++, 0x4Du);
  58. BOOST_CHECK_EQUAL(*it8++, 0xD0u);
  59. BOOST_CHECK_EQUAL(*it8++, 0xB0u);
  60. BOOST_CHECK_EQUAL(*it8++, 0xE4u);
  61. BOOST_CHECK_EQUAL(*it8++, 0xBAu);
  62. BOOST_CHECK_EQUAL(*it8++, 0x8Cu);
  63. BOOST_CHECK_EQUAL(*it8++, 0xF0u);
  64. BOOST_CHECK_EQUAL(*it8++, 0x90u);
  65. BOOST_CHECK_EQUAL(*it8++, 0x8Cu);
  66. BOOST_CHECK_EQUAL(*it8++, 0x82u);
  67. BOOST_CHECK_EQUAL(*--it8, 0x82u);
  68. BOOST_CHECK_EQUAL(*--it8, 0x8Cu);
  69. BOOST_CHECK_EQUAL(*--it8, 0x90u);
  70. BOOST_CHECK_EQUAL(*--it8, 0xF0u);
  71. BOOST_CHECK_EQUAL(*--it8, 0x8Cu);
  72. BOOST_CHECK_EQUAL(*--it8, 0xBAu);
  73. BOOST_CHECK_EQUAL(*--it8, 0xE4u);
  74. BOOST_CHECK_EQUAL(*--it8, 0xB0u);
  75. BOOST_CHECK_EQUAL(*--it8, 0xD0u);
  76. BOOST_CHECK_EQUAL(*--it8, 0x4Du);
  77. //
  78. // Test some bad sequences and verify that our iterators will catch them:
  79. //
  80. boost::uint8_t bad_seq[10] = { 0x4Du, 0xD0u, 0xB0u, 0xE4u, 0xBAu, 0x8Cu, 0xF0u, 0x90u, 0x8Cu, 0x82u };
  81. BOOST_CHECK_EQUAL(
  82. iterate_over(
  83. boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq, bad_seq, bad_seq + 10),
  84. boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq+10, bad_seq, bad_seq + 10)),
  85. 0x000149f3u);
  86. BOOST_CHECK_THROW(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq, bad_seq, bad_seq + 9), std::out_of_range);
  87. BOOST_CHECK_THROW(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq, bad_seq, bad_seq + 8), std::out_of_range);
  88. BOOST_CHECK_THROW(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq, bad_seq, bad_seq + 7), std::out_of_range);
  89. BOOST_CHECK_THROW(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq + 2, bad_seq, bad_seq + 10), std::out_of_range);
  90. BOOST_CHECK_THROW(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq + 2, bad_seq + 2, bad_seq + 10), std::out_of_range);
  91. boost::uint16_t bad_seq2[6] = { 0xD800, 0xDF02, 0xD800, 0xDF02, 0xD800, 0xDF02 };
  92. BOOST_CHECK_EQUAL(
  93. iterate_over(
  94. boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2, bad_seq2, bad_seq2 + 6),
  95. boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2+6, bad_seq2, bad_seq2 + 6)),
  96. 66306u);
  97. BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2, bad_seq2, bad_seq2 + 5), std::out_of_range);
  98. BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2 + 1, bad_seq2 + 1, bad_seq2 + 6), std::out_of_range);
  99. BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2 + 1, bad_seq2, bad_seq2 + 6), std::out_of_range);
  100. boost::uint8_t bad_seq3[5] = { '.', '*', 0xe4, '.', '*' };
  101. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq3, bad_seq3, bad_seq3 + 5), boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq3 + 5, bad_seq3, bad_seq3 + 5)), std::out_of_range);
  102. boost::uint8_t bad_seq4[5] = { '.', '*', 0xf6, '.', '*' };
  103. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq4, bad_seq4, bad_seq4 + 5), boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq4 + 5, bad_seq4, bad_seq4 + 5)), std::out_of_range);
  104. // Invalid sequences containing surrogate pairs:
  105. const char* invalid_pseq = "\xed\xa0\x80"; // single lowest lead surrogate U+D800
  106. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  107. invalid_pseq = "\xed\xb0\x80"; // single lowest trail surrogate U+DC00
  108. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  109. invalid_pseq = "\xed\xb0\x80"; // single lowest trail surrogate U+DC00
  110. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  111. invalid_pseq = "\xed\xbf\xbf"; // single highest trail surrogate U+DFFF
  112. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  113. // overlong encodings (created by left-padding with zero bits)
  114. invalid_pseq = "\xc0\x80"; // illegal 2-byte encoding of 1-byte character U+0000
  115. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  116. invalid_pseq = "\xe0\x80\x80"; // illegal 3-byte encoding of 1-byte character U+0000
  117. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  118. invalid_pseq = "\xf0\x80\x80\x80"; // illegal 4-byte encoding of 1-byte character U+0000
  119. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  120. invalid_pseq = "\xc1\xbf"; // illegal 2-byte encoding of 1-byte character U+007F
  121. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  122. invalid_pseq = "\xe0\x81\xbf"; // illegal 3-byte encoding of 1-byte character U+007F
  123. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  124. invalid_pseq = "\xf0\x80\x81\xbf"; // illegal 4-byte encoding of 1-byte character U+007F
  125. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  126. invalid_pseq = "\xe0\x82\x80"; // illegal 3-byte encoding of 2-byte character U+0080
  127. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  128. invalid_pseq = "\xf0\x80\x82\x80"; // illegal 4-byte encoding of 2-byte character U+0080
  129. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  130. invalid_pseq = "\xe0\x9f\xbf"; // illegal 3-byte encoding of 2-byte character U+07FF
  131. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  132. invalid_pseq = "\xf0\x80\x9f\xbf"; // illegal 4-byte encoding of 2-byte character U+07FF
  133. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  134. invalid_pseq = "\xf0\x80\xa0\x80"; // illegal 4-byte encoding of 3-byte character U+0800
  135. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  136. invalid_pseq = "\xf0\x8f\xbf\xbf"; // illegal 4-byte encoding of 3-byte character U+FFFF
  137. BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
  138. }
  139. void test(const std::vector< ::boost::uint32_t>& v)
  140. {
  141. typedef std::vector< ::boost::uint32_t> vector32_type;
  142. #ifdef TEST_UTF16
  143. typedef std::vector< ::boost::uint16_t> vector16_type;
  144. #endif
  145. typedef std::vector< ::boost::uint8_t> vector8_type;
  146. #ifdef TEST_UTF16
  147. typedef boost::u32_to_u16_iterator<vector32_type::const_iterator, ::boost::uint16_t> u32to16type;
  148. typedef boost::u16_to_u32_iterator<vector16_type::const_iterator, ::boost::uint32_t> u16to32type;
  149. #if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !defined(BOOST_NO_STD_ITERATOR) && !defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
  150. typedef std::reverse_iterator<u32to16type> ru32to16type;
  151. typedef std::reverse_iterator<u16to32type> ru16to32type;
  152. #endif
  153. #endif // TEST_UTF16
  154. #ifdef TEST_UTF8
  155. typedef boost::u32_to_u8_iterator<vector32_type::const_iterator, ::boost::uint8_t> u32to8type;
  156. typedef boost::u8_to_u32_iterator<vector8_type::const_iterator, ::boost::uint32_t> u8to32type;
  157. #if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !defined(BOOST_NO_STD_ITERATOR) && !defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
  158. typedef std::reverse_iterator<u32to8type> ru32to8type;
  159. typedef std::reverse_iterator<u8to32type> ru8to32type;
  160. #endif
  161. #endif // TEST_UTF8
  162. vector8_type v8;
  163. #ifdef TEST_UTF16
  164. vector16_type v16;
  165. #endif
  166. vector32_type v32;
  167. vector32_type::const_iterator i, j, k;
  168. #ifdef TEST_UTF16
  169. //
  170. // begin by testing forward iteration, of 32-16 bit interconversions:
  171. //
  172. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
  173. v16.assign(u32to16type(v.begin()), u32to16type(v.end()));
  174. #else
  175. v16.clear();
  176. std::copy(u32to16type(v.begin()), u32to16type(v.end()), std::back_inserter(v16));
  177. #endif
  178. #ifndef BOOST_NO_STD_DISTANCE
  179. BOOST_CHECK_EQUAL((std::size_t)std::distance(u32to16type(v.begin()), u32to16type(v.end())), v16.size());
  180. #endif
  181. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
  182. v32.assign(u16to32type(v16.begin(), v16.begin(), v16.end()), u16to32type(v16.end(), v16.begin(), v16.end()));
  183. #else
  184. v32.clear();
  185. std::copy(u16to32type(v16.begin(), v16.begin(), v16.end()), u16to32type(v16.end(), v16.begin(), v16.end()), std::back_inserter(v32));
  186. #endif
  187. #ifndef BOOST_NO_STD_DISTANCE
  188. BOOST_CHECK_EQUAL((std::size_t)std::distance(u16to32type(v16.begin(), v16.begin(), v16.end()), u16to32type(v16.end(), v16.begin(), v16.end())), v32.size());
  189. #endif
  190. BOOST_CHECK_EQUAL(v.size(), v32.size());
  191. i = v.begin();
  192. j = i;
  193. std::advance(j, (std::min)(v.size(), v32.size()));
  194. k = v32.begin();
  195. BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), v32.begin(), v32.end());
  196. //
  197. // test backward iteration, of 32-16 bit interconversions:
  198. //
  199. #if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !defined(BOOST_NO_STD_ITERATOR) && !defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
  200. v16.assign(ru32to16type(u32to16type(v.end())), ru32to16type(u32to16type(v.begin())));
  201. #ifndef BOOST_NO_STD_DISTANCE
  202. BOOST_CHECK_EQUAL((std::size_t)std::distance(ru32to16type(u32to16type(v.end())), ru32to16type(u32to16type(v.begin()))), v16.size());
  203. #endif
  204. std::reverse(v16.begin(), v16.end());
  205. v32.assign(ru16to32type(u16to32type(v16.end(), v16.begin(), v16.end())), ru16to32type(u16to32type(v16.begin(), v16.begin(), v16.end())));
  206. #ifndef BOOST_NO_STD_DISTANCE
  207. BOOST_CHECK_EQUAL((std::size_t)std::distance(ru16to32type(u16to32type(v16.end(), v16.begin(), v16.end())), ru16to32type(u16to32type(v16.begin(), v16.begin(), v16.end()))), v32.size());
  208. #endif
  209. BOOST_CHECK_EQUAL(v.size(), v32.size());
  210. std::reverse(v32.begin(), v32.end());
  211. i = v.begin();
  212. j = i;
  213. std::advance(j, (std::min)(v.size(), v32.size()));
  214. k = v32.begin();
  215. BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), v32.begin(), v32.end());
  216. #endif
  217. #endif // TEST_UTF16
  218. #ifdef TEST_UTF8
  219. //
  220. // Test forward iteration, of 32-8 bit interconversions:
  221. //
  222. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
  223. v8.assign(u32to8type(v.begin()), u32to8type(v.end()));
  224. #else
  225. v8.clear();
  226. std::copy(u32to8type(v.begin()), u32to8type(v.end()), std::back_inserter(v8));
  227. #endif
  228. #ifndef BOOST_NO_STD_DISTANCE
  229. BOOST_CHECK_EQUAL((std::size_t)std::distance(u32to8type(v.begin()), u32to8type(v.end())), v8.size());
  230. #endif
  231. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
  232. v32.assign(u8to32type(v8.begin(), v8.begin(), v8.end()), u8to32type(v8.end(), v8.begin(), v8.end()));
  233. #else
  234. v32.clear();
  235. std::copy(u8to32type(v8.begin(), v8.begin(), v8.end()), u8to32type(v8.end(), v8.begin(), v8.end()), std::back_inserter(v32));
  236. #endif
  237. #ifndef BOOST_NO_STD_DISTANCE
  238. BOOST_CHECK_EQUAL((std::size_t)std::distance(u8to32type(v8.begin(), v8.begin(), v8.end()), u8to32type(v8.end(), v8.begin(), v8.end())), v32.size());
  239. #endif
  240. BOOST_CHECK_EQUAL(v.size(), v32.size());
  241. i = v.begin();
  242. j = i;
  243. std::advance(j, (std::min)(v.size(), v32.size()));
  244. k = v32.begin();
  245. BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), v32.begin(), v32.end());
  246. //
  247. // test backward iteration, of 32-8 bit interconversions:
  248. //
  249. #if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !defined(BOOST_NO_STD_ITERATOR) && !defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
  250. v8.assign(ru32to8type(u32to8type(v.end())), ru32to8type(u32to8type(v.begin())));
  251. #ifndef BOOST_NO_STD_DISTANCE
  252. BOOST_CHECK_EQUAL((std::size_t)std::distance(ru32to8type(u32to8type(v.end())), ru32to8type(u32to8type(v.begin()))), v8.size());
  253. #endif
  254. std::reverse(v8.begin(), v8.end());
  255. v32.assign(ru8to32type(u8to32type(v8.end(), v8.begin(), v8.end())), ru8to32type(u8to32type(v8.begin(), v8.begin(), v8.end())));
  256. #ifndef BOOST_NO_STD_DISTANCE
  257. BOOST_CHECK_EQUAL((std::size_t)std::distance(ru8to32type(u8to32type(v8.end(), v8.begin(), v8.end())), ru8to32type(u8to32type(v8.begin(), v8.begin(), v8.end()))), v32.size());
  258. #endif
  259. BOOST_CHECK_EQUAL(v.size(), v32.size());
  260. std::reverse(v32.begin(), v32.end());
  261. i = v.begin();
  262. j = i;
  263. std::advance(j, (std::min)(v.size(), v32.size()));
  264. k = v32.begin();
  265. BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), v32.begin(), v32.end());
  266. #endif
  267. #endif // TEST_UTF8
  268. //
  269. // Test checked construction of UTF-8/16 iterators at each location in the sequences:
  270. //
  271. #ifdef TEST_UTF8
  272. for(u8to32type v8p(v8.begin(), v8.begin(), v8.end()), v8e(v8.end(), v8.begin(), v8.end()); v8p != v8e; ++v8p)
  273. {
  274. u8to32type pos(v8p.base(), v8p.base(), v8.end());
  275. BOOST_CHECK(pos == v8p);
  276. BOOST_CHECK(*pos == *v8p);
  277. }
  278. #endif
  279. #ifdef TEST_UTF16
  280. for(u16to32type v16p(v16.begin(), v16.begin(), v16.end()), v16e(v16.end(), v16.begin(), v16.end()); v16p != v16e; ++v16p)
  281. {
  282. u16to32type pos(v16p.base(), v16p.base(), v16.end());
  283. BOOST_CHECK(pos == v16p);
  284. BOOST_CHECK(*pos == *v16p);
  285. }
  286. #endif
  287. }
  288. int cpp_main( int, char* [] )
  289. {
  290. // test specific value points from the standard:
  291. spot_checks();
  292. // now test a bunch of values for self-consistency and round-tripping:
  293. std::vector< ::boost::uint32_t> v;
  294. for(unsigned i = 0; i < 0xD800; ++i)
  295. v.push_back(i);
  296. for(unsigned i = 0xDFFF + 1; i < 0x10FFFF; ++i)
  297. v.push_back(i);
  298. test(v);
  299. return 0;
  300. }