regex_token_iterator_eg_2.cpp 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. /*
  2. *
  3. * Copyright (c) 2003
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_token_iterator_example_2.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: regex_token_iterator example: spit out linked URL's.
  16. */
  17. #include <boost/regex.hpp>
  18. #include <fstream>
  19. #include <iostream>
  20. #include <iterator>
  21. boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
  22. boost::regex::normal | boost::regbase::icase);
  23. void load_file(std::string& s, std::istream& is)
  24. {
  25. s.erase();
  26. if(is.bad()) return;
  27. //
  28. // attempt to grow string buffer to match file size,
  29. // this doesn't always work...
  30. s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail()));
  31. char c;
  32. while(is.get(c))
  33. {
  34. // use logarithmic growth stategy, in case
  35. // in_avail (above) returned zero:
  36. if(s.capacity() == s.size())
  37. s.reserve(s.capacity() * 3);
  38. s.append(1, c);
  39. }
  40. }
  41. int main(int argc, char** argv)
  42. {
  43. std::string s;
  44. int i;
  45. for(i = 1; i < argc; ++i)
  46. {
  47. std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
  48. s.erase();
  49. std::ifstream is(argv[i]);
  50. load_file(s, is);
  51. is.close();
  52. boost::sregex_token_iterator i(s.begin(), s.end(), e, 1);
  53. boost::sregex_token_iterator j;
  54. while(i != j)
  55. {
  56. std::cout << *i++ << std::endl;
  57. }
  58. }
  59. //
  60. // alternative method:
  61. // test the array-literal constructor, and split out the whole
  62. // match as well as $1....
  63. //
  64. for(i = 1; i < argc; ++i)
  65. {
  66. std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
  67. s.erase();
  68. std::ifstream is(argv[i]);
  69. load_file(s, is);
  70. is.close();
  71. const int subs[] = {1, 0,};
  72. boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
  73. boost::sregex_token_iterator j;
  74. while(i != j)
  75. {
  76. std::cout << *i++ << std::endl;
  77. }
  78. }
  79. return 0;
  80. }