partial_regex_iterate.cpp 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. /*
  2. *
  3. * Copyright (c) 1998-2007
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE partial_regex_iterate.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Search example using partial matches.
  16. */
  17. #include <boost/regex.hpp>
  18. #include <iostream>
  19. #include <fstream>
  20. #include <sstream>
  21. #include <string>
  22. #include <cstring>
  23. #ifdef BOOST_NO_STDC_NAMESPACE
  24. namespace std{ using ::memmove; }
  25. #endif
  26. // match some kind of html tag:
  27. boost::regex e("<[^>]*>");
  28. // count how many:
  29. unsigned int tags = 0;
  30. void search(std::istream& is)
  31. {
  32. // buffer we'll be searching in:
  33. char buf[4096];
  34. // saved position of end of partial match:
  35. const char* next_pos = buf + sizeof(buf);
  36. // flag to indicate whether there is more input to come:
  37. bool have_more = true;
  38. while(have_more)
  39. {
  40. // how much do we copy forward from last try:
  41. std::ptrdiff_t leftover = (buf + sizeof(buf)) - next_pos;
  42. // and how much is left to fill:
  43. std::ptrdiff_t size = next_pos - buf;
  44. // copy forward whatever we have left:
  45. std::memmove(buf, next_pos, leftover);
  46. // fill the rest from the stream:
  47. is.read(buf + leftover, size);
  48. std::streamsize read = is.gcount();
  49. // check to see if we've run out of text:
  50. have_more = read == size;
  51. // reset next_pos:
  52. next_pos = buf + sizeof(buf);
  53. // and then iterate:
  54. boost::cregex_iterator a(
  55. buf,
  56. buf + read + leftover,
  57. e,
  58. boost::match_default | boost::match_partial);
  59. boost::cregex_iterator b;
  60. while(a != b)
  61. {
  62. if((*a)[0].matched == false)
  63. {
  64. // Partial match, save position and break:
  65. next_pos = (*a)[0].first;
  66. break;
  67. }
  68. else
  69. {
  70. // full match:
  71. ++tags;
  72. }
  73. // move to next match:
  74. ++a;
  75. }
  76. }
  77. }
  78. int main(int argc, char* argv[])
  79. {
  80. if(argc > 1)
  81. {
  82. for(int i = 1; i < argc; ++i)
  83. {
  84. std::ifstream fs(argv[i]);
  85. if(fs.bad()) continue;
  86. search(fs);
  87. fs.close();
  88. }
  89. }
  90. else
  91. {
  92. std::string one("<META NAME=\"keywords\" CONTENT=\"regex++, regular expressions, regular expression library, C++\">");
  93. std::string what;
  94. while(what.size() < 10000)
  95. {
  96. what.append(one);
  97. what.append(13, ' ');
  98. }
  99. std::stringstream ss;
  100. ss.str(what);
  101. search(ss);
  102. }
  103. std::cout << "total tag count was " << tags << std::endl;
  104. return 0;
  105. }