partial_regex_grep.cpp 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. /*
  2. *
  3. * Copyright (c) 1998-2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE partial_regex_grep.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Search example using partial matches.
  16. */
  17. #include <boost/regex.hpp>
  18. #include <iostream>
  19. #include <fstream>
  20. #include <sstream>
  21. #include <string>
  22. #include <cstring>
  23. #ifdef BOOST_NO_STDC_NAMESPACE
  24. namespace std{ using ::memmove; }
  25. #endif
  26. // match some kind of html tag:
  27. boost::regex e("<[^>]*>");
  28. // count how many:
  29. unsigned int tags = 0;
  30. // saved position of partial match:
  31. const char* next_pos = 0;
  32. bool grep_callback(const boost::match_results<const char*>& m)
  33. {
  34. if(m[0].matched == false)
  35. {
  36. // save position and return:
  37. next_pos = m[0].first;
  38. }
  39. else
  40. ++tags;
  41. return true;
  42. }
  43. void search(std::istream& is)
  44. {
  45. char buf[4096];
  46. next_pos = buf + sizeof(buf);
  47. bool have_more = true;
  48. while(have_more)
  49. {
  50. // how much do we copy forward from last try:
  51. std::ptrdiff_t leftover = (buf + sizeof(buf)) - next_pos;
  52. // and how much is left to fill:
  53. std::ptrdiff_t size = next_pos - buf;
  54. // copy forward whatever we have left:
  55. std::memmove(buf, next_pos, leftover);
  56. // fill the rest from the stream:
  57. is.read(buf + leftover, size);
  58. std::streamsize read = is.gcount();
  59. // check to see if we've run out of text:
  60. have_more = read == size;
  61. // reset next_pos:
  62. next_pos = buf + sizeof(buf);
  63. // and then grep:
  64. boost::regex_grep<bool(*)(const boost::cmatch&), const char*>(grep_callback,
  65. static_cast<const char*>(buf),
  66. static_cast<const char*>(buf + read + leftover),
  67. e,
  68. boost::match_default | boost::match_partial);
  69. }
  70. }
  71. int main(int argc, char* argv[])
  72. {
  73. if(argc > 1)
  74. {
  75. for(int i = 1; i < argc; ++i)
  76. {
  77. std::ifstream fs(argv[i]);
  78. if(fs.bad()) continue;
  79. search(fs);
  80. fs.close();
  81. }
  82. }
  83. else
  84. {
  85. std::string one("<META NAME=\"keywords\" CONTENT=\"regex++, regular expressions, regular expression library, C++\">");
  86. std::string what;
  87. while(what.size() < 10000)
  88. {
  89. what.append(one);
  90. what.append(13, ' ');
  91. }
  92. std::stringstream ss;
  93. ss.str(what);
  94. search(ss);
  95. }
  96. std::cout << "total tag count was " << tags << std::endl;
  97. return 0;
  98. }