regex_iterator.qbk 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. [/
  2. Copyright 2006-2007 John Maddock.
  3. Distributed under the Boost Software License, Version 1.0.
  4. (See accompanying file LICENSE_1_0.txt or copy at
  5. http://www.boost.org/LICENSE_1_0.txt).
  6. ]
  7. [section:regex_iterator regex_iterator]
  8. The iterator type [regex_iterator] will enumerate all of the regular expression
  9. matches found in some sequence: dereferencing a [regex_iterator] yields a
  10. reference to a [match_results] object.
  11. template <class BidirectionalIterator,
  12. class charT = iterator_traits<BidirectionalIterator>::value_type,
  13. class traits = regex_traits<charT> >
  14. class regex_iterator
  15. {
  16. public:
  17. typedef basic_regex<charT, traits> regex_type;
  18. typedef match_results<BidirectionalIterator> value_type;
  19. typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
  20. typedef const value_type* pointer;
  21. typedef const value_type& reference;
  22. typedef std::forward_iterator_tag iterator_category;
  23. ``[link boost_regex.regex_iterator.construct1 regex_iterator]``();
  24. ``[link boost_regex.regex_iterator.construct2 regex_iterator]``(BidirectionalIterator a, BidirectionalIterator b,
  25. const regex_type& re,
  26. match_flag_type m = match_default);
  27. ``[link boost_regex.regex_iterator.construct3 regex_iterator]``(const regex_iterator&);
  28. regex_iterator& ``[link boost_regex.regex_iterator.assign operator=(]``const regex_iterator&);
  29. bool ``[link boost_regex.regex_iterator.op_eq operator==]``(const regex_iterator&)const;
  30. bool ``[link boost_regex.regex_iterator.op_ne operator!=]``(const regex_iterator&)const;
  31. const value_type& ``[link boost_regex.regex_iterator.op_deref operator*]``()const;
  32. const value_type* ``[link boost_regex.regex_iterator.op_arrow operator->]``()const;
  33. regex_iterator& ``[link boost_regex.regex_iterator.op_inc operator++]``();
  34. regex_iterator ``[link boost_regex.regex_iterator.op_inc2 operator++]``(int);
  35. };
  36. typedef regex_iterator<const char*> cregex_iterator;
  37. typedef regex_iterator<std::string::const_iterator> sregex_iterator;
  38. #ifndef BOOST_NO_WREGEX
  39. typedef regex_iterator<const wchar_t*> wcregex_iterator;
  40. typedef regex_iterator<std::wstring::const_iterator> wsregex_iterator;
  41. #endif
  42. template <class charT, class traits> regex_iterator<const charT*, charT, traits>
  43. ``[link boost_regex.regex_iterator.make make_regex_iterator]``(const charT* p, const basic_regex<charT, traits>& e,
  44. regex_constants::match_flag_type m = regex_constants::match_default);
  45. template <class charT, class traits, class ST, class SA>
  46. regex_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
  47. ``[link boost_regex.regex_iterator.make make_regex_iterator]``(const std::basic_string<charT, ST, SA>& p,
  48. const basic_regex<charT, traits>& e,
  49. regex_constants::match_flag_type m = regex_constants::match_default);
  50. [h4 Description]
  51. A [regex_iterator] is constructed from a pair of iterators, and enumerates
  52. all occurrences of a regular expression within that iterator range.
  53. [#boost_regex.regex_iterator.construct1]
  54. regex_iterator();
  55. [*Effects]: constructs an end of sequence [regex_iterator].
  56. [#boost_regex.regex_iterator.construct2]
  57. regex_iterator(BidirectionalIterator a, BidirectionalIterator b,
  58. const regex_type& re,
  59. match_flag_type m = match_default);
  60. [*Effects]: constructs a [regex_iterator] that will enumerate all occurrences of
  61. the expression /re/, within the sequence \[a,b), and found using [match_flag_type] /m/.
  62. The object /re/ must exist for the lifetime of the [regex_iterator].
  63. [*Throws]: `std::runtime_error` if the complexity of matching the expression
  64. against an N character string begins to exceed O(N[super 2]), or if the program
  65. runs out of stack space while matching the expression (if Boost.Regex is
  66. configured in recursive mode), or if the matcher exhausts its permitted
  67. memory allocation (if Boost.Regex is configured in non-recursive mode).
  68. [#boost_regex.regex_iterator.construct3]
  69. regex_iterator(const regex_iterator& that);
  70. [*Effects]: constructs a copy of `that`.
  71. [*Postconditions]: `*this == that`.
  72. [#boost_regex.regex_iterator.assign]
  73. regex_iterator& operator=(const regex_iterator&);
  74. [*Effects]: sets `*this` equal to those in `that`.
  75. [*Postconditions]: *this == that.
  76. [#boost_regex.regex_iterator.op_eq]
  77. bool operator==(const regex_iterator& that)const;
  78. [*Effects]: returns true if *this is equal to that.
  79. [#boost_regex.regex_iterator.op_ne]
  80. bool operator!=(const regex_iterator&)const;
  81. [*Effects]: returns `!(*this == that)`.
  82. [#boost_regex.regex_iterator.op_deref]
  83. const value_type& operator*()const;
  84. [*Effects]: dereferencing a [regex_iterator] object it yields a const reference
  85. to a [match_results] object, whose members are set as follows:
  86. [table
  87. [[Element][Value]]
  88. [[`(*it).size()`][`1 + re.mark_count()`]]
  89. [[`(*it).empty()`][`false`]]
  90. [[`(*it).prefix().first`][The end of the last match found, or the start
  91. of the underlying sequence if this is the first match enumerated]]
  92. [[`(*it).prefix().last`][The same as the start of the match found:
  93. `(*it)[0].first`]]
  94. [[`(*it).prefix().matched`][True if the prefix did not match an empty string:
  95. `(*it).prefix().first != (*it).prefix().second`]]
  96. [[`(*it).suffix().first`][The same as the end of the match found:
  97. `(*it)[0].second`]]
  98. [[`(*it).suffix().last`][The end of the underlying sequence.]]
  99. [[`(*it).suffix().matched`][True if the suffix did not match an empty string:
  100. `(*it).suffix().first != (*it).suffix().second`]]
  101. [[`(*it)[0].first`][The start of the sequence of characters that matched the regular expression]]
  102. [[`(*it)[0].second`][The end of the sequence of characters that matched the regular expression]]
  103. [[`(*it)[0].matched`][true if a full match was found, and false if it was a partial match (found as a result of the match_partial flag being set).]]
  104. [[`(*it)[n].first`][For all integers `n < (*it).size()`, the start of the sequence
  105. that matched sub-expression /n/. Alternatively, if sub-expression /n/
  106. did not participate in the match, then last.]]
  107. [[`(*it)[n].second`][For all integers `n < (*it).size()`, the end of the sequence
  108. that matched sub-expression /n/. Alternatively, if sub-expression /n/ did
  109. not participate in the match, then last.]]
  110. [[`(*it)[n].matched`][For all integers `n < (*it).size()`, true if sub-expression /n/
  111. participated in the match, false otherwise.]]
  112. [[`(*it).position(n)`][For all integers `n < (*it).size()`, then the distance from
  113. the start of the underlying sequence to the start of sub-expression match /n/.]]
  114. ]
  115. [#boost_regex.regex_iterator.op_arrow]
  116. const value_type* operator->()const;
  117. [*Effects]: returns `&(*this)`.
  118. [#boost_regex.regex_iterator.op_inc]
  119. regex_iterator& operator++();
  120. [*Effects]: moves the iterator to the next match in the underlying sequence, or
  121. the end of sequence iterator if none if found. When the last match found
  122. matched a zero length string, then the [regex_iterator] will find the next match as
  123. follows: if there exists a non-zero length match that starts at the same
  124. location as the last one, then returns it, otherwise starts looking for the
  125. next (possibly zero length) match from one position to the right of the last match.
  126. [*Throws]: `std::runtime_error` if the complexity of matching the expression
  127. against an N character string begins to exceed O(N[super 2]), or if the
  128. program runs out of stack space while matching the expression (if Boost.Regex is
  129. configured in recursive mode), or if the matcher exhausts its permitted
  130. memory allocation (if Boost.Regex is configured in non-recursive mode).
  131. [*Returns]: *this.
  132. [#boost_regex.regex_iterator.op_inc2]
  133. regex_iterator operator++(int);
  134. [*Effects]: constructs a copy result of `*this`, then calls `++(*this)`.
  135. [*Returns]: result.
  136. [#boost_regex.regex_iterator.make]
  137. template <class charT, class traits>
  138. regex_iterator<const charT*, charT, traits>
  139. make_regex_iterator(const charT* p, const basic_regex<charT, traits>& e,
  140. regex_constants::match_flag_type m = regex_constants::match_default);
  141. template <class charT, class traits, class ST, class SA>
  142. regex_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
  143. make_regex_iterator(const std::basic_string<charT, ST, SA>& p,
  144. const basic_regex<charT, traits>& e,
  145. regex_constants::match_flag_type m = regex_constants::match_default);
  146. [*Effects]: returns an iterator that enumerates all occurrences of expression /e/
  147. in text /p/ using [match_flag_type] /m/.
  148. [h4 Examples]
  149. The following example takes a C++ source file and builds up an index of class
  150. names, and the location of that class in the file.
  151. #include <string>
  152. #include <map>
  153. #include <fstream>
  154. #include <iostream>
  155. #include <boost/regex.hpp>
  156. using namespace std;
  157. // purpose:
  158. // takes the contents of a file in the form of a string
  159. // and searches for all the C++ class definitions, storing
  160. // their locations in a map of strings/int's
  161. typedef std::map<std::string, std::string::difference_type, std::less<std::string> > map_type;
  162. const char* re =
  163. // possibly leading whitespace:
  164. "^[[:space:]]*"
  165. // possible template declaration:
  166. "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
  167. // class or struct:
  168. "(class|struct)[[:space:]]*"
  169. // leading declspec macros etc:
  170. "("
  171. "\\<\\w+\\>"
  172. "("
  173. "[[:blank:]]*\\([^)]*\\)"
  174. ")?"
  175. "[[:space:]]*"
  176. ")*"
  177. // the class name
  178. "(\\<\\w*\\>)[[:space:]]*"
  179. // template specialisation parameters
  180. "(<[^;:{]+>)?[[:space:]]*"
  181. // terminate in { or :
  182. "(\\{|:[^;\\{()]*\\{)";
  183. boost::regex expression(re);
  184. map_type class_index;
  185. bool regex_callback(const boost::match_results<std::string::const_iterator>& what)
  186. {
  187. // what[0] contains the whole string
  188. // what[5] contains the class name.
  189. // what[6] contains the template specialisation if any.
  190. // add class name and position to map:
  191. class_index[what[5].str() + what[6].str()] = what.position(5);
  192. return true;
  193. }
  194. void load_file(std::string& s, std::istream& is)
  195. {
  196. s.erase();
  197. s.reserve(is.rdbuf()->in_avail());
  198. char c;
  199. while(is.get(c))
  200. {
  201. if(s.capacity() == s.size())
  202. s.reserve(s.capacity() * 3);
  203. s.append(1, c);
  204. }
  205. }
  206. int main(int argc, const char** argv)
  207. {
  208. std::string text;
  209. for(int i = 1; i < argc; ++i)
  210. {
  211. cout << "Processing file " << argv[i] << endl;
  212. std::ifstream fs(argv[i]);
  213. load_file(text, fs);
  214. // construct our iterators:
  215. boost::sregex_iterator m1(text.begin(), text.end(), expression);
  216. boost::sregex_iterator m2;
  217. std::for_each(m1, m2, &regex_callback);
  218. // copy results:
  219. cout << class_index.size() << " matches found" << endl;
  220. map_type::iterator c, d;
  221. c = class_index.begin();
  222. d = class_index.end();
  223. while(c != d)
  224. {
  225. cout << "class \"" << (*c).first << "\" found at index: " << (*c).second << endl;
  226. ++c;
  227. }
  228. class_index.erase(class_index.begin(), class_index.end());
  229. }
  230. return 0;
  231. }
  232. [endsect]