performance.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. ///////////////////////////////////////////////////////////////
  2. // Copyright 2015 John Maddock. Distributed under the Boost
  3. // Software License, Version 1.0. (See accompanying file
  4. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_
  5. //
  6. #include "performance.hpp"
  7. #include <list>
  8. #include <fstream>
  9. #include <iostream>
  10. #include <iomanip>
  11. #include <boost/chrono.hpp>
  12. #include <boost/detail/lightweight_main.hpp>
  13. #include <boost/regex.hpp>
  14. #include <boost/filesystem.hpp>
  15. void load_file(std::string& text, const char* file)
  16. {
  17. std::deque<char> temp_copy;
  18. std::ifstream is(file);
  19. if(!is.good())
  20. {
  21. std::string msg("Unable to open file: \"");
  22. msg.append(file);
  23. msg.append("\"");
  24. throw std::runtime_error(msg);
  25. }
  26. is.seekg(0, std::ios_base::end);
  27. std::istream::pos_type pos = is.tellg();
  28. is.seekg(0, std::ios_base::beg);
  29. text.erase();
  30. text.reserve(pos);
  31. std::istreambuf_iterator<char> it(is);
  32. std::copy(it, std::istreambuf_iterator<char>(), std::back_inserter(text));
  33. }
  34. typedef std::list<boost::shared_ptr<abstract_regex> > list_type;
  35. list_type& engines()
  36. {
  37. static list_type l;
  38. return l;
  39. }
  40. void abstract_regex::register_instance(boost::shared_ptr<abstract_regex> item)
  41. {
  42. engines().push_back(item);
  43. }
  44. template <class Clock>
  45. struct stopwatch
  46. {
  47. typedef typename Clock::duration duration;
  48. stopwatch()
  49. {
  50. m_start = Clock::now();
  51. }
  52. duration elapsed()
  53. {
  54. return Clock::now() - m_start;
  55. }
  56. void reset()
  57. {
  58. m_start = Clock::now();
  59. }
  60. private:
  61. typename Clock::time_point m_start;
  62. };
  63. unsigned sum = 0;
  64. unsigned last_value_returned = 0;
  65. template <class Func>
  66. double exec_timed_test(Func f)
  67. {
  68. double t = 0;
  69. unsigned repeats = 1;
  70. do {
  71. stopwatch<boost::chrono::high_resolution_clock> w;
  72. for(unsigned count = 0; count < repeats; ++count)
  73. {
  74. last_value_returned = f();
  75. sum += last_value_returned;
  76. }
  77. t = boost::chrono::duration_cast<boost::chrono::duration<double>>(w.elapsed()).count();
  78. if(t < 0.5)
  79. repeats *= 2;
  80. } while(t < 0.5);
  81. return t / repeats;
  82. }
  83. std::string format_expression_as_quickbook(std::string s)
  84. {
  85. static const boost::regex e("[`/_*=$^@#&%\\\\]");
  86. static const boost::regex open_b("\\[");
  87. static const boost::regex close_b("\\]");
  88. s = regex_replace(s, e, "\\\\$0");
  89. s = regex_replace(s, open_b, "\\\\u005B");
  90. s = regex_replace(s, close_b, "\\\\u005D");
  91. if(s.size() > 200)
  92. {
  93. s.erase(200);
  94. s += " ...";
  95. }
  96. return "[^" + s + "]";
  97. }
  98. void test_match(const char* expression, const char* text, bool isperl = false)
  99. {
  100. std::string table = "Testing simple " + (isperl ? std::string("Perl") : std::string("leftmost-longest")) + " matches (platform = " + platform_name() + ", compiler = " + compiler_name() + ")";
  101. std::string row = format_expression_as_quickbook(expression);
  102. row += "[br]";
  103. row += format_expression_as_quickbook(text);
  104. for(list_type::const_iterator i = engines().begin(); i != engines().end(); ++i)
  105. {
  106. std::string heading = (*i)->name();
  107. if((*i)->set_expression(expression, isperl))
  108. {
  109. double time = exec_timed_test([&]() { return (*i)->match_test(text) ? 1 : 0; });
  110. report_execution_time(time, table, row, heading);
  111. }
  112. }
  113. }
  114. void test_search(const char* expression, const char* text, bool isperl = false, const char* filename = 0)
  115. {
  116. std::string table = "Testing " + (isperl ? std::string("Perl") : std::string("leftmost-longest")) + " searches (platform = " + platform_name() + ", compiler = " + compiler_name() + ")";
  117. std::string row = format_expression_as_quickbook(expression);
  118. row += "[br]";
  119. if(filename)
  120. {
  121. row += "In file: ";
  122. row += filename;
  123. }
  124. else
  125. {
  126. row += format_expression_as_quickbook(text);
  127. }
  128. for(list_type::const_iterator i = engines().begin(); i != engines().end(); ++i)
  129. {
  130. std::string heading = (*i)->name();
  131. if((*i)->set_expression(expression, isperl))
  132. {
  133. double time = exec_timed_test([&]() { return (*i)->find_all(text); });
  134. report_execution_time(time, table, row, heading);
  135. std::cout << "Search with library: " << heading << " found " << last_value_returned << " occurances.\n";
  136. }
  137. }
  138. }
  139. int cpp_main(int argc, char* argv[])
  140. {
  141. boost::filesystem::path here(__FILE__);
  142. here = here.parent_path().parent_path().parent_path().parent_path();
  143. boost::filesystem::path cpp_file = here / "boost";
  144. cpp_file /= "crc.hpp";
  145. // start with a simple test, this is basically a measure of the minimal overhead
  146. // involved in calling a regex matcher:
  147. test_match("abc", "abc");
  148. // these are from the regex docs:
  149. test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
  150. test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
  151. // these are from http://www.regxlib.com/
  152. test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk");
  153. test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
  154. test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
  155. test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
  156. test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
  157. test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
  158. test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
  159. test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
  160. test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
  161. test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
  162. test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
  163. // start with a simple test, this is basically a measure of the minimal overhead
  164. // involved in calling a regex matcher:
  165. test_match("abc", "abc", true);
  166. // these are from the regex docs:
  167. test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string", true);
  168. test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456", true);
  169. // these are from http://www.regxlib.com/
  170. test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk", true);
  171. test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu", true);
  172. test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv", true);
  173. test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ", true);
  174. test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA", true);
  175. test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ", true);
  176. test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001", true);
  177. test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001", true);
  178. test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123", true);
  179. test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159", true);
  180. test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159", true);
  181. std::string file_contents;
  182. const char* highlight_expression = // preprocessor directives: index 1
  183. "(^[ \\t]*#(?:(?>[^\\\\\\n]+)|\\\\(?>\\s*\\n|.))*)|";
  184. // comment: index 2
  185. "(//[^\\n]*|/\\*.*?\\*/)|"
  186. // literals: index 3
  187. "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
  188. // string literals: index 4
  189. "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
  190. // keywords: index 5
  191. "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
  192. "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
  193. "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
  194. "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
  195. "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
  196. "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
  197. "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
  198. "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
  199. "|using|virtual|void|volatile|wchar_t|while)\\>"
  200. ;
  201. const char* class_expression = "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
  202. "(class|struct)[[:space:]]*(\\w+([ \t]*\\([^)]*\\))?"
  203. "[[:space:]]*)*(\\w*)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
  204. "(\\{|:[^;\\{()]*\\{)";
  205. const char* call_expression = "\\w+\\s*(\\([^()]++(?:(?1)[^()]++)*+[^)]*\\))";
  206. const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
  207. const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)";
  208. const char* brace_expression = "\\{[^{}]++((?0)[^{}]++)*+[^}]*+\\}";
  209. const char* function_with_body_expression = "(\\w+)\\s*(\\([^()]++(?:(?2)[^()]++)*+[^)]*\\))\\s*(\\{[^{}]++((?3)[^{}]++)*+[^}]*+\\})";
  210. load_file(file_contents, "../../../libs/libraries.htm");
  211. test_search("Beman|John|Dave", file_contents.c_str(), false, "../../../libs/libraries.htm");
  212. test_search("Beman|John|Dave", file_contents.c_str(), true, "../../../libs/libraries.htm");
  213. test_search("(?i)<p>.*?</p>", file_contents.c_str(), true, "../../../libs/libraries.htm");
  214. test_search("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), false, "../../../libs/libraries.htm");
  215. test_search("(?i)<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), true, "../../../libs/libraries.htm");
  216. test_search("(?i)<h[12345678][^>]*>.*?</h[12345678]>", file_contents.c_str(), true, "../../../libs/libraries.htm");
  217. test_search("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), false, "../../../libs/libraries.htm");
  218. test_search("(?i)<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), true, "../../../libs/libraries.htm");
  219. test_search("(?i)<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents.c_str(), true, "../../../libs/libraries.htm");
  220. load_file(file_contents, "../../../boost/multiprecision/number.hpp");
  221. test_search(function_with_body_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  222. test_search(brace_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  223. test_search(call_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  224. test_search(highlight_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  225. test_search(class_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  226. test_search(include_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  227. test_search(boost_include_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
  228. return 0;
  229. }