icu_example.cpp 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE mfc_example.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
  16. */
  17. #include <boost/regex/config.hpp>
  18. #ifdef BOOST_HAS_ICU
  19. #include <boost/regex/icu.hpp>
  20. #include <iostream>
  21. #include <assert.h>
  22. //
  23. // Find out if *password* meets our password requirements,
  24. // as defined by the regular expression *requirements*.
  25. //
  26. bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements)
  27. {
  28. return boost::u32regex_match(password, boost::make_u32regex(requirements));
  29. }
  30. //
  31. // Extract filename part of a path from a UTF-8 encoded std::string and return the result
  32. // as another std::string:
  33. //
  34. std::string get_filename(const std::string& path)
  35. {
  36. boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
  37. boost::smatch what;
  38. if(boost::u32regex_match(path, what, r))
  39. {
  40. // extract $1 as a std::string:
  41. return what.str(1);
  42. }
  43. else
  44. {
  45. throw std::runtime_error("Invalid pathname");
  46. }
  47. }
  48. U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text)
  49. {
  50. // searches through some UTF-16 encoded text for a block encoded in Greek,
  51. // this expression is imperfect, but the best we can do for now - searching
  52. // for specific scripts is actually pretty hard to do right.
  53. boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
  54. boost::u16match what;
  55. if(boost::u32regex_search(text, what, r))
  56. {
  57. // extract $0 as a UnicodeString:
  58. return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0));
  59. }
  60. else
  61. {
  62. throw std::runtime_error("No Greek found!");
  63. }
  64. }
  65. void enumerate_currencies(const std::string& text)
  66. {
  67. // enumerate and print all the currency symbols, along
  68. // with any associated numeric values:
  69. const char* re =
  70. "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
  71. "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
  72. "(?(1)"
  73. "|(?(2)"
  74. "[[:Cf:][:Cc:][:Z*:]]*"
  75. ")"
  76. "[[:Sc:]]"
  77. ")";
  78. boost::u32regex r = boost::make_u32regex(re);
  79. boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
  80. while(i != j)
  81. {
  82. std::cout << (*i)[0] << std::endl;
  83. ++i;
  84. }
  85. }
  86. void enumerate_currencies2(const std::string& text)
  87. {
  88. // enumerate and print all the currency symbols, along
  89. // with any associated numeric values:
  90. const char* re =
  91. "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
  92. "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
  93. "(?(1)"
  94. "|(?(2)"
  95. "[[:Cf:][:Cc:][:Z*:]]*"
  96. ")"
  97. "[[:Sc:]]"
  98. ")";
  99. boost::u32regex r = boost::make_u32regex(re);
  100. boost::u32regex_token_iterator<std::string::const_iterator>
  101. i(boost::make_u32regex_token_iterator(text, r, 1)), j;
  102. while(i != j)
  103. {
  104. std::cout << *i << std::endl;
  105. ++i;
  106. }
  107. }
  108. //
  109. // Take a credit card number as a string of digits,
  110. // and reformat it as a human readable string with "-"
  111. // separating each group of four digit;,
  112. // note that we're mixing a UTF-32 regex, with a UTF-16
  113. // string and a UTF-8 format specifier, and it still all
  114. // just works:
  115. //
  116. const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
  117. const char* human_format = "$1-$2-$3-$4";
  118. U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s)
  119. {
  120. return boost::u32regex_replace(s, e, human_format);
  121. }
  122. int main()
  123. {
  124. // password checks using u32regex_match:
  125. U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---";
  126. U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
  127. bool b = is_valid_password(pwd, pwd_check);
  128. assert(b);
  129. pwd = "abcD-";
  130. b = is_valid_password(pwd, pwd_check);
  131. assert(!b);
  132. // filename extraction with u32regex_match:
  133. std::string file = "abc.hpp";
  134. file = get_filename(file);
  135. assert(file == "abc.hpp");
  136. file = "c:\\a\\b\\c\\d.h";
  137. file = get_filename(file);
  138. assert(file == "d.h");
  139. // Greek text extraction with u32regex_search:
  140. const UChar t[] = {
  141. 'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
  142. };
  143. const UChar g[] = {
  144. 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
  145. };
  146. U_NAMESPACE_QUALIFIER UnicodeString text = t;
  147. U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text);
  148. assert(greek == g);
  149. // extract currency symbols with associated value, use iterator interface:
  150. std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8
  151. enumerate_currencies(text2);
  152. enumerate_currencies2(text2);
  153. U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321";
  154. credit_card_number = human_readable_card_number(credit_card_number);
  155. assert(credit_card_number == "1234-5678-8765-4321");
  156. return 0;
  157. }
  158. #else
  159. #include <iostream>
  160. int main()
  161. {
  162. std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
  163. return 0;
  164. }
  165. #endif