123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- // Boost.Bimap
- //
- // Copyright (c) 2006-2007 Matias Capeletto
- //
- // Distributed under the Boost Software License, Version 1.0.
- // (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- /*****************************************************************************
- Boost.MultiIndex
- *****************************************************************************/
- #include <boost/config.hpp>
- //[ code_mi_to_b_path_mi_hashed_indices
- #include <iostream>
- #include <iomanip>
- #include <boost/tokenizer.hpp>
- #include <boost/multi_index_container.hpp>
- #include <boost/multi_index/key_extractors.hpp>
- #include <boost/multi_index/ordered_index.hpp>
- #include <boost/multi_index/hashed_index.hpp>
- #include <boost/lambda/lambda.hpp>
- using namespace boost::multi_index;
- namespace bl = boost::lambda;
- // word_counter keeps the ocurrences of words inserted. A hashed
- // index allows for fast checking of preexisting entries.
- struct word_counter_entry
- {
- std::string word;
- unsigned int occurrences;
- word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {}
- };
- typedef multi_index_container
- <
- word_counter_entry,
- indexed_by
- <
- ordered_non_unique
- <
- BOOST_MULTI_INDEX_MEMBER(
- word_counter_entry,unsigned int,occurrences),
- std::greater<unsigned int>
- >,
- hashed_unique
- <
- BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word)
- >
- >
- > word_counter;
- typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;
- int main()
- {
- std::string text=
- "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... "
- "...snip..."
- "...no se salga un punto de la verdad.";
- // feed the text into the container
- word_counter wc;
- text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
- unsigned int total_occurrences = 0;
- for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end();
- it != it_end ; ++it )
- {
- ++total_occurrences;
- word_counter::iterator wit = wc.insert(*it).first;
- wc.modify_key( wit, ++ bl::_1 );
- }
- // list words by frequency of appearance
- std::cout << std::fixed << std::setprecision(2);
- for( word_counter::iterator wit = wc.begin(), wit_end=wc.end();
- wit != wit_end; ++wit )
- {
- std::cout << std::setw(11) << wit->word << ": "
- << std::setw(5)
- << 100.0 * wit->occurrences / total_occurrences << "%"
- << std::endl;
- }
- return 0;
- }
- //]
|