regex_split_example_2.cpp 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. /*
  2. *
  3. * Copyright (c) 1998-2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_split_example_2.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: regex_split example: spit out linked URL's.
  16. */
  17. #include <boost/regex.hpp>
  18. #include <list>
  19. #include <fstream>
  20. #include <iostream>
  21. #include <iterator>
  22. boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
  23. boost::regex::normal | boost::regbase::icase);
  24. void load_file(std::string& s, std::istream& is)
  25. {
  26. s.erase();
  27. if(is.bad()) return;
  28. //
  29. // attempt to grow string buffer to match file size,
  30. // this doesn't always work...
  31. s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail()));
  32. char c;
  33. while(is.get(c))
  34. {
  35. // use logarithmic growth stategy, in case
  36. // in_avail (above) returned zero:
  37. if(s.capacity() == s.size())
  38. s.reserve(s.capacity() * 3);
  39. s.append(1, c);
  40. }
  41. }
  42. int main(int argc, char** argv)
  43. {
  44. std::string s;
  45. std::list<std::string> l;
  46. int i;
  47. for(i = 1; i < argc; ++i)
  48. {
  49. std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
  50. s.erase();
  51. std::ifstream is(argv[i]);
  52. load_file(s, is);
  53. is.close();
  54. boost::regex_split(std::back_inserter(l), s, e);
  55. while(l.size())
  56. {
  57. s = *(l.begin());
  58. l.pop_front();
  59. std::cout << s << std::endl;
  60. }
  61. }
  62. //
  63. // alternative method:
  64. // split one match at a time and output direct to
  65. // cout via ostream_iterator<std::string>....
  66. //
  67. for(i = 1; i < argc; ++i)
  68. {
  69. std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
  70. s.erase();
  71. std::ifstream is(argv[i]);
  72. load_file(s, is);
  73. is.close();
  74. while(boost::regex_split(std::ostream_iterator<std::string>(std::cout), s, e, boost::match_default, 1)) std::cout << std::endl;
  75. }
  76. return 0;
  77. }