regex_replace.qbk 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. [/
  2. Copyright 2006-2007 John Maddock.
  3. Distributed under the Boost Software License, Version 1.0.
  4. (See accompanying file LICENSE_1_0.txt or copy at
  5. http://www.boost.org/LICENSE_1_0.txt).
  6. ]
  7. [section:regex_replace regex_replace]
  8. #include <boost/regex.hpp>
  9. The algorithm [regex_replace] searches through a string finding all the
  10. matches to the regular expression: for each match it then calls
  11. [match_results_format] to format the string and sends the result to the
  12. output iterator. Sections of text that do not match are copied to the
  13. output unchanged only if the /flags/ parameter does not have the
  14. flag `format_no_copy` set. If the flag `format_first_only` is set then
  15. only the first occurrence is replaced rather than all occurrences.
  16. template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter>
  17. OutputIterator regex_replace(OutputIterator out,
  18. BidirectionalIterator first,
  19. BidirectionalIterator last,
  20. const basic_regex<charT, traits>& e,
  21. Formatter fmt,
  22. match_flag_type flags = match_default);
  23. template <class traits, class Formatter>
  24. basic_string<charT> regex_replace(const basic_string<charT>& s,
  25. const basic_regex<charT, traits>& e,
  26. Formatter fmt,
  27. match_flag_type flags = match_default);
  28. [h4 Description]
  29. template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter>
  30. OutputIterator regex_replace(OutputIterator out,
  31. BidirectionalIterator first,
  32. BidirectionalIterator last,
  33. const basic_regex<charT, traits>& e,
  34. Formatter fmt,
  35. match_flag_type flags = match_default);
  36. Enumerates all the occurrences of expression /e/ in the sequence \[first, last),
  37. replacing each occurrence with the string that results by merging the
  38. match found with the format string /fmt/, and copies the resulting string to /out/.
  39. In the case that /fmt/ is a unary, binary or ternary function object, then the
  40. character sequence generated by that object is copied unchanged to the output when performing
  41. a substitution.
  42. If the flag `format_no_copy` is set in /flags/ then unmatched sections of
  43. text are not copied to output.
  44. If the flag `format_first_only` is set in flags then only the first
  45. occurrence of /e/ is replaced.
  46. The manner in which the format string /fmt/ is interpreted, along with the
  47. rules used for finding matches, are determined by the flags set in /flags/:
  48. see [match_flag_type].
  49. [*Requires]
  50. The type `Formatter` must be either a pointer to a null-terminated string
  51. of type `char_type[]`, or be a container of `char_type`'s (for example
  52. `std::basic_string<char_type>`) or be a unary, binary or ternary functor
  53. that computes the replacement string from a function call: either
  54. `fmt(what)` which must return a container of `char_type`'s to be used as the
  55. replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of
  56. which write the replacement text to `*out`, and then return the new
  57. OutputIterator position. In each case `what` is the [match_results] object
  58. that represents the match found. Note that if the formatter is a functor, then it is
  59. ['passed by value]: users that want to pass function objects with internal state
  60. might want to use [@../../../../doc/html/ref.html Boost.Ref] to wrap the object so
  61. that it's passed by reference.
  62. [*Effects]: Constructs an [regex_iterator] object:
  63. regex_iterator<BidirectionalIterator, charT, traits, Allocator>
  64. i(first, last, e, flags),
  65. and uses /i/ to enumerate through all of the matches /m/ of type
  66. [match_results] `<BidirectionalIterator>` that occur within the sequence
  67. \[first, last).
  68. If no such matches are found and
  69. !(flags & format_no_copy)
  70. then calls
  71. std::copy(first, last, out).
  72. Otherwise, for each match found, if
  73. !(flags & format_no_copy)
  74. calls
  75. std::copy(m.prefix().first, m.prefix().last, out),
  76. and then calls
  77. m.format(out, fmt, flags).
  78. Finally if
  79. !(flags & format_no_copy)
  80. calls
  81. std::copy(last_m.suffix().first, last_m,suffix().last, out)
  82. where /last_m/ is a copy of the last match found.
  83. If `flags & format_first_only` is non-zero then only the first match found
  84. is replaced.
  85. [*Throws]: `std::runtime_error` if the complexity of matching the expression
  86. against an N character string begins to exceed O(N[super 2]), or if the
  87. program runs out of stack space while matching the expression (if Boost.Regex is
  88. configured in recursive mode), or if the matcher exhausts its permitted
  89. memory allocation (if Boost.Regex is configured in non-recursive mode).
  90. [*Returns]: out.
  91. template <class traits, class Formatter>
  92. basic_string<charT> regex_replace(const basic_string<charT>& s,
  93. const basic_regex<charT, traits>& e,
  94. Formatter fmt,
  95. match_flag_type flags = match_default);
  96. [*Requires]
  97. The type `Formatter` must be either a pointer to a null-terminated string
  98. of type `char_type[]`, or be a container of `char_type`'s (for example
  99. `std::basic_string<char_type>`) or be a unary, binary or ternary functor
  100. that computes the replacement string from a function call: either
  101. `fmt(what)` which must return a container of `char_type`'s to be used as the
  102. replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of
  103. which write the replacement text to `*out`, and then return the new
  104. OutputIterator position. In each case `what` is the [match_results] object
  105. that represents the match found.
  106. [*Effects]: Constructs an object `basic_string<charT> result`, calls
  107. `regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, flags)`,
  108. and then returns `result`.
  109. [h4 Examples]
  110. The following example takes C/C++ source code as input, and outputs
  111. syntax highlighted HTML code.
  112. #include <fstream>
  113. #include <sstream>
  114. #include <string>
  115. #include <iterator>
  116. #include <boost/regex.hpp>
  117. #include <fstream>
  118. #include <iostream>
  119. // purpose:
  120. // takes the contents of a file and transform to
  121. // syntax highlighted code in html format
  122. boost::regex e1, e2;
  123. extern const char* expression_text;
  124. extern const char* format_string;
  125. extern const char* pre_expression;
  126. extern const char* pre_format;
  127. extern const char* header_text;
  128. extern const char* footer_text;
  129. void load_file(std::string& s, std::istream& is)
  130. {
  131. s.erase();
  132. s.reserve(is.rdbuf()->in_avail());
  133. char c;
  134. while(is.get(c))
  135. {
  136. if(s.capacity() == s.size())
  137. s.reserve(s.capacity() * 3);
  138. s.append(1, c);
  139. }
  140. }
  141. int main(int argc, const char** argv)
  142. {
  143. try{
  144. e1.assign(expression_text);
  145. e2.assign(pre_expression);
  146. for(int i = 1; i < argc; ++i)
  147. {
  148. std::cout << "Processing file " << argv[i] << std::endl;
  149. std::ifstream fs(argv[i]);
  150. std::string in;
  151. load_file(in, fs);
  152. std::string out_name(std::string(argv[i]) + std::string(".htm"));
  153. std::ofstream os(out_name.c_str());
  154. os << header_text;
  155. // strip '<' and '>' first by outputting to a
  156. // temporary string stream
  157. std::ostringstream t(std::ios::out | std::ios::binary);
  158. std::ostream_iterator<char, char> oi(t);
  159. boost::regex_replace(oi, in.begin(), in.end(),
  160. e2, pre_format, boost::match_default | boost::format_all);
  161. // then output to final output stream
  162. // adding syntax highlighting:
  163. std::string s(t.str());
  164. std::ostream_iterator<char, char> out(os);
  165. boost::regex_replace(out, s.begin(), s.end(),
  166. e1, format_string, boost::match_default | boost::format_all);
  167. os << footer_text;
  168. }
  169. }
  170. catch(...)
  171. { return -1; }
  172. return 0;
  173. }
  174. extern const char* pre_expression = "(<)|(>)|(&)|\\r";
  175. extern const char* pre_format = "(?1&lt;)(?2&gt;)(?3&amp;)";
  176. const char* expression_text =
  177. // preprocessor directives: index 1
  178. "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
  179. // comment: index 2
  180. "(//[^\\n]*|/\\*.*?\\*/)|"
  181. // literals: index 3
  182. "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+"
  183. "(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
  184. // string literals: index 4
  185. "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
  186. // keywords: index 5
  187. "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
  188. "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
  189. "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
  190. "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
  191. "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
  192. "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
  193. "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
  194. "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
  195. "|using|virtual|void|volatile|wchar_t|while)\\>"
  196. ;
  197. const char* format_string = "(?1<font color=\"#008040\">$&</font>)"
  198. "(?2<I><font color=\"#000080\">$&</font></I>)"
  199. "(?3<font color=\"#0000A0\">$&</font>)"
  200. "(?4<font color=\"#0000FF\">$&</font>)"
  201. "(?5<B>$&</B>)";
  202. const char* header_text =
  203. "<HTML>\n<HEAD>\n"
  204. "<TITLE>Auto-generated html formatted source</TITLE>\n"
  205. "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
  206. "</HEAD>\n"
  207. "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
  208. "<P> </P>\n<PRE>";
  209. const char* footer_text = "</PRE>\n</BODY>\n\n";
  210. [endsect]