test_icu.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE test_icu.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Test code for Unicode regexes with ICU support.
  16. */
  17. //
  18. // We can only build this if we have ICU support:
  19. //
  20. #include <boost/regex/config.hpp>
  21. #if defined(BOOST_HAS_ICU) && !defined(BOOST_NO_STD_WSTRING)
  22. #include <boost/regex/icu.hpp>
  23. #include "test.hpp"
  24. namespace unnecessary_fix{
  25. //
  26. // Some outrageously broken std lib's don't have a conforming
  27. // back_insert_iterator, which means we can't use the std version
  28. // as an argument to regex_replace, sigh... use our own:
  29. //
  30. template <class Seq>
  31. class back_insert_iterator
  32. {
  33. private:
  34. Seq* container;
  35. public:
  36. typedef const typename Seq::value_type value_type;
  37. typedef Seq container_type;
  38. typedef void difference_type;
  39. typedef void pointer;
  40. typedef void reference;
  41. typedef std::output_iterator_tag iterator_category;
  42. explicit back_insert_iterator(Seq& x) : container(&x) {}
  43. back_insert_iterator& operator=(const value_type& val)
  44. {
  45. container->push_back(val);
  46. return *this;
  47. }
  48. back_insert_iterator& operator*() { return *this; }
  49. back_insert_iterator& operator++() { return *this; }
  50. back_insert_iterator operator++(int) { return *this; }
  51. };
  52. template <class Seq>
  53. inline back_insert_iterator<Seq> back_inserter(Seq& x)
  54. {
  55. return back_insert_iterator<Seq>(x);
  56. }
  57. }
  58. //
  59. // compare two match_results struct's for equality,
  60. // converting the iterator as needed:
  61. //
  62. template <class MR1, class MR2>
  63. void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<2> const*)
  64. {
  65. typedef typename MR2::value_type MR2_value_type;
  66. typedef typename MR2_value_type::const_iterator MR2_iterator_type;
  67. typedef boost::u16_to_u32_iterator<MR2_iterator_type> iterator_type;
  68. //typedef typename MR1::size_type size_type;
  69. if(w1.size() != w2.size())
  70. {
  71. BOOST_REGEX_TEST_ERROR("Size mismatch in match_results class", UChar32);
  72. }
  73. for(int i = 0; i < (int)w1.size(); ++i)
  74. {
  75. if(w1[i].matched)
  76. {
  77. if(w2[i].matched == 0)
  78. {
  79. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  80. }
  81. if((w1.position(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2[i].first), iterator_type(w2[i].second))))
  82. {
  83. BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32);
  84. }
  85. }
  86. else if(w2[i].matched)
  87. {
  88. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  89. }
  90. }
  91. //
  92. // We don't have a way to access a list of named sub-expressions since we only store
  93. // hashes, but "abc" and "N" are common names used in our tests, so check those:
  94. //
  95. if (w1["abc"].matched)
  96. {
  97. if (w2["abc"].matched == 0)
  98. {
  99. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  100. }
  101. if ((w1.position("abc") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2["abc"].first))) || (w1.length("abc") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2["abc"].first), iterator_type(w2["abc"].second))))
  102. {
  103. BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32);
  104. }
  105. }
  106. else if (w2["abc"].matched)
  107. {
  108. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  109. }
  110. if (w1["N"].matched)
  111. {
  112. if (w2["N"].matched == 0)
  113. {
  114. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  115. }
  116. if ((w1.position("N") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2["N"].first))) || (w1.length("N") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2["N"].first), iterator_type(w2["N"].second))))
  117. {
  118. BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32);
  119. }
  120. }
  121. else if (w2["N"].matched)
  122. {
  123. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  124. }
  125. }
  126. template <class MR1, class MR2>
  127. void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<1> const*)
  128. {
  129. typedef typename MR2::value_type MR2_value_type;
  130. typedef typename MR2_value_type::const_iterator MR2_iterator_type;
  131. typedef boost::u8_to_u32_iterator<MR2_iterator_type> iterator_type;
  132. //typedef typename MR1::size_type size_type;
  133. if(w1.size() != w2.size())
  134. {
  135. BOOST_REGEX_TEST_ERROR("Size mismatch in match_results class", UChar32);
  136. }
  137. for(int i = 0; i < (int)w1.size(); ++i)
  138. {
  139. if(w1[i].matched)
  140. {
  141. if(w2[i].matched == 0)
  142. {
  143. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  144. }
  145. if((w1.position(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2[i].first), iterator_type(w2[i].second))))
  146. {
  147. BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32);
  148. }
  149. }
  150. else if(w2[i].matched)
  151. {
  152. BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32);
  153. }
  154. }
  155. }
  156. void test_icu_grep(const boost::u32regex& r, const std::vector< ::UChar32>& search_text)
  157. {
  158. typedef std::vector< ::UChar32>::const_iterator const_iterator;
  159. typedef boost::u32regex_iterator<const_iterator> test_iterator;
  160. boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options();
  161. const int* answer_table = test_info<wchar_t>::answer_table();
  162. test_iterator start(search_text.begin(), search_text.end(), r, opts), end;
  163. test_iterator copy(start);
  164. const_iterator last_end = search_text.begin();
  165. while(start != end)
  166. {
  167. if(start != copy)
  168. {
  169. BOOST_REGEX_TEST_ERROR("Failed iterator != comparison.", wchar_t);
  170. }
  171. if(!(start == copy))
  172. {
  173. BOOST_REGEX_TEST_ERROR("Failed iterator == comparison.", wchar_t);
  174. }
  175. test_result(*start, search_text.begin(), answer_table);
  176. // test $` and $' :
  177. if(start->prefix().first != last_end)
  178. {
  179. BOOST_REGEX_TEST_ERROR("Incorrect position for start of $`", wchar_t);
  180. }
  181. if(start->prefix().second != (*start)[0].first)
  182. {
  183. BOOST_REGEX_TEST_ERROR("Incorrect position for end of $`", wchar_t);
  184. }
  185. if(start->prefix().matched != (start->prefix().first != start->prefix().second))
  186. {
  187. BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $`", wchar_t);
  188. }
  189. if(start->suffix().first != (*start)[0].second)
  190. {
  191. BOOST_REGEX_TEST_ERROR("Incorrect position for start of $'", wchar_t);
  192. }
  193. if(start->suffix().second != search_text.end())
  194. {
  195. BOOST_REGEX_TEST_ERROR("Incorrect position for end of $'", wchar_t);
  196. }
  197. if(start->suffix().matched != (start->suffix().first != start->suffix().second))
  198. {
  199. BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $'", wchar_t);
  200. }
  201. last_end = (*start)[0].second;
  202. ++start;
  203. ++copy;
  204. // move on the answer table to next set of answers;
  205. if(*answer_table != -2)
  206. while(*answer_table++ != -2){}
  207. }
  208. if(answer_table[0] >= 0)
  209. {
  210. // we should have had a match but didn't:
  211. BOOST_REGEX_TEST_ERROR("Expected match was not found.", wchar_t);
  212. }
  213. }
  214. void test_icu(const wchar_t&, const test_regex_search_tag& )
  215. {
  216. boost::u32regex r;
  217. if(*test_locale::c_str())
  218. {
  219. U_NAMESPACE_QUALIFIER Locale l(test_locale::c_str());
  220. if(l.isBogus())
  221. return;
  222. r.imbue(l);
  223. }
  224. std::vector< ::UChar32> expression;
  225. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  226. expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end());
  227. #else
  228. std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression));
  229. #endif
  230. boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options();
  231. #ifndef BOOST_NO_EXCEPTIONS
  232. try
  233. #endif
  234. {
  235. #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
  236. r.assign(expression.begin(), expression.end(), syntax_options);
  237. #else
  238. if(expression.size())
  239. r.assign(&*expression.begin(), expression.size(), syntax_options);
  240. else
  241. r.assign(static_cast<UChar32 const*>(0), expression.size(), syntax_options);
  242. #endif
  243. if(r.status())
  244. {
  245. BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32);
  246. }
  247. std::vector< ::UChar32> search_text;
  248. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  249. search_text.assign(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end());
  250. #else
  251. std::copy(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end(), std::back_inserter(search_text));
  252. #endif
  253. boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options();
  254. const int* answer_table = test_info<wchar_t>::answer_table();
  255. boost::match_results<std::vector< ::UChar32>::const_iterator> what;
  256. if(boost::u32regex_search(
  257. const_cast<std::vector< ::UChar32>const&>(search_text).begin(),
  258. const_cast<std::vector< ::UChar32>const&>(search_text).end(),
  259. what,
  260. r,
  261. opts))
  262. {
  263. test_result(what, const_cast<std::vector< ::UChar32>const&>(search_text).begin(), answer_table);
  264. }
  265. else if(answer_table[0] >= 0)
  266. {
  267. // we should have had a match but didn't:
  268. BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32);
  269. }
  270. if(0 == *test_locale::c_str())
  271. {
  272. //
  273. // Now try UTF-16 construction:
  274. //
  275. typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv;
  276. std::vector<UChar> expression16, text16;
  277. boost::match_results<std::vector<UChar>::const_iterator> what16;
  278. boost::match_results<const UChar*> what16c;
  279. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  280. expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end()));
  281. text16.assign(u16_conv(search_text.begin()), u16_conv(search_text.end()));
  282. #else
  283. expression16.clear();
  284. std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16));
  285. text16.clear();
  286. std::copy(u16_conv(search_text.begin()), u16_conv(search_text.end()), std::back_inserter(text16));
  287. #endif
  288. r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options);
  289. if(boost::u32regex_search(const_cast<const std::vector<UChar>&>(text16).begin(), const_cast<const std::vector<UChar>&>(text16).end(), what16, r, opts))
  290. {
  291. compare_result(what, what16, static_cast<boost::mpl::int_<2> const*>(0));
  292. }
  293. else if(answer_table[0] >= 0)
  294. {
  295. // we should have had a match but didn't:
  296. BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32);
  297. }
  298. if(std::find(expression16.begin(), expression16.end(), 0) == expression16.end())
  299. {
  300. expression16.push_back(0);
  301. r = boost::make_u32regex(&*expression16.begin(), syntax_options);
  302. if(std::find(text16.begin(), text16.end(), 0) == text16.end())
  303. {
  304. text16.push_back(0);
  305. if(boost::u32regex_search((const UChar*)&*text16.begin(), what16c, r, opts))
  306. {
  307. compare_result(what, what16c, static_cast<boost::mpl::int_<2> const*>(0));
  308. }
  309. else if(answer_table[0] >= 0)
  310. {
  311. // we should have had a match but didn't:
  312. BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32);
  313. }
  314. }
  315. }
  316. //
  317. // Now try UTF-8 construction:
  318. //
  319. typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator, unsigned char> u8_conv;
  320. std::vector<unsigned char> expression8, text8;
  321. boost::match_results<std::vector<unsigned char>::const_iterator> what8;
  322. boost::match_results<const unsigned char*> what8c;
  323. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  324. expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end()));
  325. text8.assign(u8_conv(search_text.begin()), u8_conv(search_text.end()));
  326. #else
  327. expression8.clear();
  328. std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8));
  329. text8.clear();
  330. std::copy(u8_conv(search_text.begin()), u8_conv(search_text.end()), std::back_inserter(text8));
  331. #endif
  332. r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options);
  333. if(boost::u32regex_search(const_cast<const std::vector<unsigned char>&>(text8).begin(), const_cast<const std::vector<unsigned char>&>(text8).end(), what8, r, opts))
  334. {
  335. compare_result(what, what8, static_cast<boost::mpl::int_<1> const*>(0));
  336. }
  337. else if(answer_table[0] >= 0)
  338. {
  339. // we should have had a match but didn't:
  340. BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32);
  341. }
  342. if(std::find(expression8.begin(), expression8.end(), 0) == expression8.end())
  343. {
  344. expression8.push_back(0);
  345. r = boost::make_u32regex(&*expression8.begin(), syntax_options);
  346. if(std::find(text8.begin(), text8.end(), 0) == text8.end())
  347. {
  348. text8.push_back(0);
  349. if(boost::u32regex_search((const unsigned char*)&*text8.begin(), what8c, r, opts))
  350. {
  351. compare_result(what, what8c, static_cast<boost::mpl::int_<1> const*>(0));
  352. }
  353. else if(answer_table[0] >= 0)
  354. {
  355. // we should have had a match but didn't:
  356. BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32);
  357. }
  358. }
  359. }
  360. }
  361. //
  362. // finally try a grep:
  363. //
  364. test_icu_grep(r, search_text);
  365. }
  366. #ifndef BOOST_NO_EXCEPTIONS
  367. catch(const boost::bad_expression& e)
  368. {
  369. BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), UChar32);
  370. }
  371. catch(const std::runtime_error& e)
  372. {
  373. BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << e.what(), UChar32);
  374. }
  375. catch(const std::exception& e)
  376. {
  377. BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << e.what(), UChar32);
  378. }
  379. catch(...)
  380. {
  381. BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", UChar32);
  382. }
  383. #endif
  384. }
  385. void test_icu(const wchar_t&, const test_invalid_regex_tag&)
  386. {
  387. //typedef boost::u16_to_u32_iterator<std::wstring::const_iterator, ::UChar32> conv_iterator;
  388. std::vector< ::UChar32> expression;
  389. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  390. expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end());
  391. #else
  392. std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression));
  393. #endif
  394. boost::regex_constants::syntax_option_type syntax_options = test_info<wchar_t>::syntax_options();
  395. boost::u32regex r;
  396. if(*test_locale::c_str())
  397. {
  398. U_NAMESPACE_QUALIFIER Locale l(test_locale::c_str());
  399. if(l.isBogus())
  400. return;
  401. r.imbue(l);
  402. }
  403. //
  404. // try it with exceptions disabled first:
  405. //
  406. #ifndef BOOST_NO_EXCEPTIONS
  407. try
  408. #endif
  409. {
  410. #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
  411. if(0 == r.assign(expression.begin(), expression.end(), syntax_options | boost::regex_constants::no_except).status())
  412. #else
  413. if(expression.size())
  414. r.assign(&*expression.begin(), expression.size(), syntax_options | boost::regex_constants::no_except);
  415. else
  416. r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options | boost::regex_constants::no_except);
  417. if(0 == r.status())
  418. #endif
  419. {
  420. BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t);
  421. }
  422. }
  423. #ifndef BOOST_NO_EXCEPTIONS
  424. catch(...)
  425. {
  426. BOOST_REGEX_TEST_ERROR("Unexpected exception thrown.", wchar_t);
  427. }
  428. #endif
  429. //
  430. // now try again with exceptions:
  431. //
  432. bool have_catch = false;
  433. #ifndef BOOST_NO_EXCEPTIONS
  434. try
  435. #endif
  436. {
  437. #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
  438. r.assign(expression.begin(), expression.end(), syntax_options);
  439. #else
  440. if(expression.size())
  441. r.assign(&*expression.begin(), expression.size(), syntax_options);
  442. else
  443. r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options);
  444. #endif
  445. #ifdef BOOST_NO_EXCEPTIONS
  446. if(r.status())
  447. have_catch = true;
  448. #endif
  449. }
  450. #ifndef BOOST_NO_EXCEPTIONS
  451. catch(const boost::bad_expression&)
  452. {
  453. have_catch = true;
  454. }
  455. catch(const std::runtime_error& e)
  456. {
  457. have_catch = true;
  458. BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::runtime_error instead: " << e.what(), wchar_t);
  459. }
  460. catch(const std::exception& e)
  461. {
  462. have_catch = true;
  463. BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::exception instead: " << e.what(), wchar_t);
  464. }
  465. catch(...)
  466. {
  467. have_catch = true;
  468. BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but got an exception of unknown type instead", wchar_t);
  469. }
  470. #endif
  471. if(!have_catch)
  472. {
  473. // oops expected exception was not thrown:
  474. BOOST_REGEX_TEST_ERROR("Expected an exception, but didn't find one.", wchar_t);
  475. }
  476. if(0 == *test_locale::c_str())
  477. {
  478. //
  479. // Now try UTF-16 construction:
  480. //
  481. typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv;
  482. std::vector<UChar> expression16;
  483. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  484. expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end()));
  485. #else
  486. std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16));
  487. #endif
  488. if(0 == boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options | boost::regex_constants::no_except).status())
  489. {
  490. BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t);
  491. }
  492. if(std::find(expression16.begin(), expression16.end(), 0) == expression16.end())
  493. {
  494. expression16.push_back(0);
  495. if(0 == boost::make_u32regex(&*expression16.begin(), syntax_options | boost::regex_constants::no_except).status())
  496. {
  497. BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t);
  498. }
  499. }
  500. //
  501. // Now try UTF-8 construction:
  502. //
  503. typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator> u8_conv;
  504. std::vector<unsigned char> expression8;
  505. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  506. expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end()));
  507. #else
  508. std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8));
  509. #endif
  510. if(0 == boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options | boost::regex_constants::no_except).status())
  511. {
  512. BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t);
  513. }
  514. if(std::find(expression8.begin(), expression8.end(), 0) == expression8.end())
  515. {
  516. expression8.push_back(0);
  517. if(0 == boost::make_u32regex(&*expression8.begin(), syntax_options | boost::regex_constants::no_except).status())
  518. {
  519. BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t);
  520. }
  521. }
  522. }
  523. }
  524. void test_icu(const wchar_t&, const test_regex_replace_tag&)
  525. {
  526. std::vector< ::UChar32> expression;
  527. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  528. expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end());
  529. #else
  530. std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression));
  531. #endif
  532. boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options();
  533. boost::u32regex r;
  534. #ifndef BOOST_NO_EXCEPTIONS
  535. try
  536. #endif
  537. {
  538. #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
  539. r.assign(expression.begin(), expression.end(), syntax_options);
  540. #else
  541. if(expression.size())
  542. r.assign(&*expression.begin(), expression.size(), syntax_options);
  543. else
  544. r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options);
  545. #endif
  546. if(r.status())
  547. {
  548. BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32);
  549. }
  550. typedef std::vector<UChar32> string_type;
  551. string_type search_text;
  552. boost::regex_constants::match_flag_type opts = test_info<UChar32>::match_options();
  553. string_type format_string;
  554. string_type result_string;
  555. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  556. search_text.assign(test_info<UChar32>::search_text().begin(), test_info<UChar32>::search_text().end());
  557. format_string.assign(test_info<UChar32>::format_string().begin(), test_info<UChar32>::format_string().end());
  558. format_string.push_back(0);
  559. result_string.assign(test_info<UChar32>::result_string().begin(), test_info<UChar32>::result_string().end());
  560. #else
  561. std::copy(test_info<UChar32>::search_text().begin(), test_info<UChar32>::search_text().end(), std::back_inserter(search_text));
  562. std::copy(test_info<UChar32>::format_string().begin(), test_info<UChar32>::format_string().end(), std::back_inserter(format_string));
  563. format_string.push_back(0);
  564. std::copy(test_info<UChar32>::result_string().begin(), test_info<UChar32>::result_string().end(), std::back_inserter(result_string));
  565. #endif
  566. string_type result;
  567. boost::u32regex_replace(unnecessary_fix::back_inserter(result), search_text.begin(), search_text.end(), r, &*format_string.begin(), opts);
  568. if(result != result_string)
  569. {
  570. BOOST_REGEX_TEST_ERROR("regex_replace generated an incorrect string result", UChar32);
  571. }
  572. //
  573. // Mixed mode character encoding:
  574. //
  575. if(0 == *test_locale::c_str())
  576. {
  577. //
  578. // Now try UTF-16 construction:
  579. //
  580. typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv;
  581. std::vector<UChar> expression16, text16, format16, result16, found16;
  582. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  583. expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end()));
  584. text16.assign(u16_conv(search_text.begin()), u16_conv(search_text.end()));
  585. format16.assign(u16_conv(format_string.begin()), u16_conv(format_string.end()));
  586. result16.assign(u16_conv(result_string.begin()), u16_conv(result_string.end()));
  587. #else
  588. std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16));
  589. std::copy(u16_conv(search_text.begin()), u16_conv(search_text.end()), std::back_inserter(text16));
  590. std::copy(u16_conv(format_string.begin()), u16_conv(format_string.end()), std::back_inserter(format16));
  591. std::copy(u16_conv(result_string.begin()), u16_conv(result_string.end()), std::back_inserter(result16));
  592. #endif
  593. r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options);
  594. boost::u32regex_replace(unnecessary_fix::back_inserter(found16), text16.begin(), text16.end(), r, &*format16.begin(), opts);
  595. if(result16 != found16)
  596. {
  597. BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-16 string returned incorrect result", UChar32);
  598. }
  599. //
  600. // Now with UnicodeString:
  601. //
  602. U_NAMESPACE_QUALIFIER UnicodeString expression16u, text16u, format16u, result16u, found16u;
  603. if(expression16.size())
  604. expression16u.setTo(&*expression16.begin(), expression16.size());
  605. if(text16.size())
  606. text16u.setTo(&*text16.begin(), text16.size());
  607. format16u.setTo(&*format16.begin(), format16.size()-1);
  608. if(result16.size())
  609. result16u.setTo(&*result16.begin(), result16.size());
  610. r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options);
  611. found16u = boost::u32regex_replace(text16u, r, format16u, opts);
  612. if(result16u != found16u)
  613. {
  614. BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-16 string returned incorrect result", UChar32);
  615. }
  616. //
  617. // Now try UTF-8 construction:
  618. //
  619. typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator, unsigned char> u8_conv;
  620. std::vector<char> expression8, text8, format8, result8, found8;
  621. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  622. expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end()));
  623. text8.assign(u8_conv(search_text.begin()), u8_conv(search_text.end()));
  624. format8.assign(u8_conv(format_string.begin()), u8_conv(format_string.end()));
  625. result8.assign(u8_conv(result_string.begin()), u8_conv(result_string.end()));
  626. #else
  627. std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8));
  628. std::copy(u8_conv(search_text.begin()), u8_conv(search_text.end()), std::back_inserter(text8));
  629. std::copy(u8_conv(format_string.begin()), u8_conv(format_string.end()), std::back_inserter(format8));
  630. std::copy(u8_conv(result_string.begin()), u8_conv(result_string.end()), std::back_inserter(result8));
  631. #endif
  632. r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options);
  633. boost::u32regex_replace(unnecessary_fix::back_inserter(found8), text8.begin(), text8.end(), r, &*format8.begin(), opts);
  634. if(result8 != found8)
  635. {
  636. BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-8 string returned incorrect result", UChar32);
  637. }
  638. //
  639. // Now with std::string and UTF-8:
  640. //
  641. std::string expression8s, text8s, format8s, result8s, found8s;
  642. if(expression8.size())
  643. expression8s.assign(&*expression8.begin(), expression8.size());
  644. if(text8.size())
  645. text8s.assign(&*text8.begin(), text8.size());
  646. format8s.assign(&*format8.begin(), format8.size()-1);
  647. if(result8.size())
  648. result8s.assign(&*result8.begin(), result8.size());
  649. r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options);
  650. found8s = boost::u32regex_replace(text8s, r, format8s, opts);
  651. if(result8s != found8s)
  652. {
  653. BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-8 string returned incorrect result", UChar32);
  654. }
  655. }
  656. }
  657. #ifndef BOOST_NO_EXCEPTIONS
  658. catch(const boost::bad_expression& e)
  659. {
  660. BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), UChar32);
  661. }
  662. catch(const std::runtime_error& e)
  663. {
  664. BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << e.what(), UChar32);
  665. }
  666. catch(const std::exception& e)
  667. {
  668. BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << e.what(), UChar32);
  669. }
  670. catch(...)
  671. {
  672. BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", UChar32);
  673. }
  674. #endif
  675. }
  676. #else
  677. #include "test.hpp"
  678. void test_icu(const wchar_t&, const test_regex_search_tag&){}
  679. void test_icu(const wchar_t&, const test_invalid_regex_tag&){}
  680. void test_icu(const wchar_t&, const test_regex_replace_tag&){}
  681. #endif