mfc_strings.qbk 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. [/
  2. Copyright 2006-2007 John Maddock.
  3. Distributed under the Boost Software License, Version 1.0.
  4. (See accompanying file LICENSE_1_0.txt or copy at
  5. http://www.boost.org/LICENSE_1_0.txt).
  6. ]
  7. [section:mfc_strings Using Boost Regex With MFC Strings]
  8. [section:mfc_intro Introduction to Boost.Regex and MFC Strings]
  9. The header `<boost/regex/mfc.hpp>` provides Boost.Regex support for MFC string
  10. types: note that this support requires Visual Studio .NET (Visual C++ 7) or
  11. later, where all of the MFC and ATL string types are based around the
  12. CSimpleStringT class template.
  13. In the following documentation, whenever you see
  14. CSimpleStringT<charT>, then you can substitute any of the following
  15. MFC/ATL types (all of which inherit from CSimpleStringT):
  16. CString
  17. CStringA
  18. CStringW
  19. CAtlString
  20. CAtlStringA
  21. CAtlStringW
  22. CStringT<charT,traits>
  23. CFixedStringT<charT,N>
  24. CSimpleStringT<charT>
  25. [endsect]
  26. [section:mfc_regex_types Regex Types Used With MFC Strings]
  27. The following typedefs are provided for the convenience of those working with
  28. TCHAR's:
  29. typedef basic_regex<TCHAR> tregex;
  30. typedef match_results<TCHAR const*> tmatch;
  31. typedef regex_iterator<TCHAR const*> tregex_iterator;
  32. typedef regex_token_iterator<TCHAR const*> tregex_token_iterator;
  33. If you are working with explicitly narrow or wide characters rather than
  34. TCHAR, then use the regular Boost.Regex types `regex` and `wregex` instead.
  35. [endsect]
  36. [section:mfc_regex_create Regular Expression Creation From an MFC String]
  37. The following helper function is available to assist in the creation of a
  38. regular expression from an MFC/ATL string type:
  39. template <class charT>
  40. basic_regex<charT>
  41. make_regex(const ATL::CSimpleStringT<charT>& s,
  42. ::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal);
  43. [*Effects]: returns `basic_regex<charT>(s.GetString(), s.GetString() + s.GetLength(), f);`
  44. [endsect]
  45. [section:mfc_algo Overloaded Algorithms For MFC String Types]
  46. For each regular expression algorithm that's overloaded for a `std::basic_string`
  47. argument, there is also one overloaded for the MFC/ATL string types. These
  48. algorithm signatures all look a lot more complex than they actually are,
  49. but for completeness here they are anyway:
  50. [h4 regex_match]
  51. There are two overloads, the first reports what matched in a match_results
  52. structure, the second does not.
  53. All the usual caveats for [regex_match] apply, in particular the algorithm
  54. will only report a successful match if all of the input text matches the
  55. expression, if this isn't what you want then use [regex_search] instead.
  56. template <class charT, class T, class A>
  57. bool regex_match(
  58. const ATL::CSimpleStringT<charT>& s,
  59. match_results<const B*, A>& what,
  60. const basic_regex<charT, T>& e,
  61. boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
  62. [*Effects]: returns `::boost::regex_match(s.GetString(), s.GetString() + s.GetLength(), what, e, f);`
  63. [*Example:]
  64. //
  65. // Extract filename part of a path from a CString and return the result
  66. // as another CString:
  67. //
  68. CString get_filename(const CString& path)
  69. {
  70. boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
  71. boost::tmatch what;
  72. if(boost::regex_match(path, what, r))
  73. {
  74. // extract $1 as a CString:
  75. return CString(what[1].first, what.length(1));
  76. }
  77. else
  78. {
  79. throw std::runtime_error("Invalid pathname");
  80. }
  81. }
  82. [h4 regex_match (second overload)]
  83. template <class charT, class T>
  84. bool regex_match(
  85. const ATL::CSimpleStringT<charT>& s,
  86. const basic_regex<B, T>& e,
  87. boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
  88. [*Effects]: returns `::boost::regex_match(s.GetString(), s.GetString() + s.GetLength(), e, f);`
  89. [*Example:]
  90. //
  91. // Find out if *password* meets our password requirements,
  92. // as defined by the regular expression *requirements*.
  93. //
  94. bool is_valid_password(const CString& password, const CString& requirements)
  95. {
  96. return boost::regex_match(password, boost::make_regex(requirements));
  97. }
  98. [h4 regex_search]
  99. There are two additional overloads for [regex_search], the first reports what
  100. matched the second does not:
  101. template <class charT, class A, class T>
  102. bool regex_search(const ATL::CSimpleStringT<charT>& s,
  103. match_results<const charT*, A>& what,
  104. const basic_regex<charT, T>& e,
  105. boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
  106. [*Effects]: returns ::boost::regex_search(s.GetString(), s.GetString() + s.GetLength(), what, e, f);
  107. [*Example]: Postcode extraction from an address string.
  108. CString extract_postcode(const CString& address)
  109. {
  110. // searches throw address for a UK postcode and returns the result,
  111. // the expression used is by Phil A. on www.regxlib.com:
  112. boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
  113. boost::tmatch what;
  114. if(boost::regex_search(address, what, r))
  115. {
  116. // extract $0 as a CString:
  117. return CString(what[0].first, what.length());
  118. }
  119. else
  120. {
  121. throw std::runtime_error("No postcode found");
  122. }
  123. }
  124. [h4 regex_search (second overload)]
  125. template <class charT, class T>
  126. inline bool regex_search(const ATL::CSimpleStringT<charT>& s,
  127. const basic_regex<charT, T>& e,
  128. boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
  129. [*Effects]: returns `::boost::regex_search(s.GetString(), s.GetString() + s.GetLength(), e, f);`
  130. [h4 regex_replace]
  131. There are two additional overloads for [regex_replace], the first sends output
  132. to an output iterator, while the second creates a new string
  133. template <class OutputIterator, class BidirectionalIterator, class traits, class
  134. charT>
  135. OutputIterator regex_replace(OutputIterator out,
  136. BidirectionalIterator first,
  137. BidirectionalIterator last,
  138. const basic_regex<charT, traits>& e,
  139. const ATL::CSimpleStringT<charT>& fmt,
  140. match_flag_type flags = match_default)
  141. [*Effects]: returns `::boost::regex_replace(out, first, last, e, fmt.GetString(), flags);`
  142. template <class traits, charT>
  143. ATL::CSimpleStringT<charT> regex_replace(const ATL::CSimpleStringT<charT>& s,
  144. const basic_regex<charT, traits>& e,
  145. const ATL::CSimpleStringT<charT>& fmt,
  146. match_flag_type flags = match_default)
  147. [*Effects]: returns a new string created using [regex_replace], and the same
  148. memory manager as string /s/.
  149. [*Example]:
  150. //
  151. // Take a credit card number as a string of digits,
  152. // and reformat it as a human readable string with "-"
  153. // separating each group of four digits:
  154. //
  155. const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
  156. const CString human_format = __T("$1-$2-$3-$4");
  157. CString human_readable_card_number(const CString& s)
  158. {
  159. return boost::regex_replace(s, e, human_format);
  160. }
  161. [endsect]
  162. [section:mfc_iter Iterating Over the Matches Within An MFC String]
  163. The following helper functions are provided to ease the conversion from an
  164. MFC/ATL string to a [regex_iterator] or [regex_token_iterator]:
  165. [h4 regex_iterator creation helper]
  166. template <class charT>
  167. regex_iterator<charT const*>
  168. make_regex_iterator(
  169. const ATL::CSimpleStringT<charT>& s,
  170. const basic_regex<charT>& e,
  171. ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
  172. [*Effects]: returns `regex_iterator(s.GetString(), s.GetString() + s.GetLength(), e, f);`
  173. [*Example]:
  174. void enumerate_links(const CString& html)
  175. {
  176. // enumerate and print all the links in some HTML text,
  177. // the expression used is by Andew Lee on www.regxlib.com:
  178. boost::tregex r(
  179. __T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+"
  180. "(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*"
  181. "(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
  182. boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
  183. while(i != j)
  184. {
  185. std::cout << (*i)[1] << std::endl;
  186. ++i;
  187. }
  188. }
  189. [h4 regex_token_iterator creation helpers]
  190. template <class charT>
  191. regex_token_iterator<charT const*>
  192. make_regex_token_iterator(
  193. const ATL::CSimpleStringT<charT>& s,
  194. const basic_regex<charT>& e,
  195. int sub = 0,
  196. ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
  197. [*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, sub, f);`
  198. template <class charT>
  199. regex_token_iterator<charT const*>
  200. make_regex_token_iterator(
  201. const ATL::CSimpleStringT<charT>& s,
  202. const basic_regex<charT>& e,
  203. const std::vector<int>& subs,
  204. ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
  205. [*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, subs, f);`
  206. template <class charT, std::size_t N>
  207. regex_token_iterator<charT const*>
  208. make_regex_token_iterator(
  209. const ATL::CSimpleStringT<charT>& s,
  210. const basic_regex<charT>& e,
  211. const int (& subs)[N],
  212. ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
  213. [*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, subs, f);`
  214. [*Example]:
  215. void enumerate_links2(const CString& html)
  216. {
  217. // enumerate and print all the links in some HTML text,
  218. // the expression used is by Andew Lee on www.regxlib.com:
  219. boost::tregex r(
  220. __T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+"
  221. "(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*"
  222. "(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
  223. boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
  224. while(i != j)
  225. {
  226. std::cout << *i << std::endl;
  227. ++i;
  228. }
  229. }
  230. [endsect]
  231. [endsect]