symbols.hpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file symbols.hpp
  3. /// Contains the Ternary Search Trie implementation.
  4. /// Based on the following papers:
  5. /// J. Bentley and R. Sedgewick. (1998) Ternary search trees. Dr. Dobbs Journal
  6. /// G. Badr and B.J. Oommen. (2005) Self-Adjusting of Ternary Search Tries Using
  7. /// Conditional Rotations and Randomized Heuristics
  8. //
  9. // Copyright 2007 David Jenkins.
  10. // Copyright 2007 Eric Niebler.
  11. //
  12. // Distributed under the Boost Software License, Version 1.0. (See
  13. // accompanying file LICENSE_1_0.txt or copy at
  14. // http://www.boost.org/LICENSE_1_0.txt)
  15. #ifndef BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
  16. #define BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
  17. // MS compatible compilers support #pragma once
  18. #if defined(_MSC_VER)
  19. # pragma once
  20. #endif
  21. #include <boost/noncopyable.hpp>
  22. #include <boost/range/begin.hpp>
  23. #include <boost/range/end.hpp>
  24. #include <boost/range/value_type.hpp>
  25. #include <boost/range/const_iterator.hpp>
  26. #include <boost/shared_ptr.hpp>
  27. namespace boost { namespace xpressive { namespace detail
  28. {
  29. ///////////////////////////////////////////////////////////////////////////////
  30. // symbols (using a ternary search trie)
  31. //
  32. template<typename Map>
  33. struct symbols
  34. {
  35. typedef typename range_value<Map>::type::first_type key_type;
  36. typedef typename range_value<Map>::type::second_type value_type;
  37. typedef typename range_value<key_type>::type char_type;
  38. typedef typename range_const_iterator<Map>::type iterator;
  39. typedef typename range_const_iterator<key_type>::type key_iterator;
  40. typedef value_type const *result_type;
  41. // copies of this symbol table share the TST
  42. template<typename Trans>
  43. void load(Map const &map, Trans trans)
  44. {
  45. iterator begin = boost::begin(map);
  46. iterator end = boost::end(map);
  47. node* root_p = this->root.get();
  48. for(; begin != end; ++begin)
  49. {
  50. key_iterator kbegin = boost::begin(begin->first);
  51. key_iterator kend = boost::end(begin->first);
  52. root_p = this->insert(root_p, kbegin, kend, &begin->second, trans);
  53. }
  54. this->root.reset(root_p);
  55. }
  56. template<typename BidiIter, typename Trans>
  57. result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const
  58. {
  59. return this->search(begin, end, trans, this->root.get());
  60. }
  61. template<typename Sink>
  62. void peek(Sink const &sink) const
  63. {
  64. this->peek_(this->root.get(), sink);
  65. }
  66. private:
  67. ///////////////////////////////////////////////////////////////////////////////
  68. // struct node : a node in the TST.
  69. // The "eq" field stores the result pointer when ch is zero.
  70. //
  71. struct node
  72. : boost::noncopyable
  73. {
  74. node(char_type c)
  75. : ch(c)
  76. , lo(0)
  77. , eq(0)
  78. , hi(0)
  79. #ifdef BOOST_DISABLE_THREADS
  80. , tau(0)
  81. #endif
  82. {}
  83. ~node()
  84. {
  85. delete lo;
  86. if (ch)
  87. delete eq;
  88. delete hi;
  89. }
  90. void swap(node& that)
  91. {
  92. std::swap(ch, that.ch);
  93. std::swap(lo, that.lo);
  94. std::swap(eq, that.eq);
  95. std::swap(hi, that.hi);
  96. #ifdef BOOST_DISABLE_THREADS
  97. std::swap(tau, that.tau);
  98. #endif
  99. }
  100. char_type ch;
  101. node* lo;
  102. union
  103. {
  104. node* eq;
  105. result_type result;
  106. };
  107. node* hi;
  108. #ifdef BOOST_DISABLE_THREADS
  109. long tau;
  110. #endif
  111. };
  112. ///////////////////////////////////////////////////////////////////////////////
  113. // insert : insert a string into the TST
  114. //
  115. template<typename Trans>
  116. node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const
  117. {
  118. char_type c1 = 0;
  119. if(begin != end)
  120. {
  121. c1 = trans(*begin);
  122. }
  123. if(!p)
  124. {
  125. p = new node(c1);
  126. }
  127. if(c1 < p->ch)
  128. {
  129. p->lo = this->insert(p->lo, begin, end, r, trans);
  130. }
  131. else if(c1 == p->ch)
  132. {
  133. if(0 == c1)
  134. {
  135. p->result = r;
  136. }
  137. else
  138. {
  139. p->eq = this->insert(p->eq, ++begin, end, r, trans);
  140. }
  141. }
  142. else
  143. {
  144. p->hi = this->insert(p->hi, begin, end, r, trans);
  145. }
  146. return p;
  147. }
  148. #ifdef BOOST_DISABLE_THREADS
  149. ///////////////////////////////////////////////////////////////////////////////
  150. // conditional rotation : the goal is to minimize the overall
  151. // weighted path length of each binary search tree
  152. //
  153. bool cond_rotation(bool left, node* const i, node* const j) const
  154. {
  155. // don't rotate top node in binary search tree
  156. if (i == j)
  157. return false;
  158. // calculate psi (the rotation condition)
  159. node* const k = (left ? i->hi : i->lo);
  160. long psi = 2*i->tau - j->tau - (k ? k->tau : 0);
  161. if (psi <= 0)
  162. return false;
  163. // recalculate the tau values
  164. j->tau += -i->tau + (k ? k->tau : 0);
  165. i->tau += j->tau - (k ? k->tau : 0);
  166. // fixup links and swap nodes
  167. if (left)
  168. {
  169. j->lo = k;
  170. i->hi = i;
  171. }
  172. else
  173. {
  174. j->hi = k;
  175. i->lo = i;
  176. }
  177. (*i).swap(*j);
  178. return true;
  179. }
  180. #endif
  181. ///////////////////////////////////////////////////////////////////////////////
  182. // search : find a string in the TST
  183. //
  184. template<typename BidiIter, typename Trans>
  185. result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const
  186. {
  187. result_type r = 0;
  188. #ifdef BOOST_DISABLE_THREADS
  189. node* p2 = p;
  190. bool left = false;
  191. #endif
  192. char_type c1 = (begin != end ? trans(*begin) : 0);
  193. while(p)
  194. {
  195. #ifdef BOOST_DISABLE_THREADS
  196. ++p->tau;
  197. #endif
  198. if(c1 == p->ch)
  199. {
  200. // conditional rotation test
  201. #ifdef BOOST_DISABLE_THREADS
  202. if (this->cond_rotation(left, p, p2))
  203. p = p2;
  204. #endif
  205. if (0 == p->ch)
  206. {
  207. // it's a match!
  208. r = p->result;
  209. }
  210. if(begin == end)
  211. break;
  212. ++begin;
  213. p = p->eq;
  214. // search for the longest match first
  215. r = search(begin,end,trans,p);
  216. if (0 == r)
  217. {
  218. // search for a match ending here
  219. r = search(end,end,trans,p);
  220. if (0 == r)
  221. {
  222. --begin;
  223. }
  224. }
  225. break;
  226. }
  227. else if(c1 < p->ch)
  228. {
  229. #ifdef BOOST_DISABLE_THREADS
  230. left = true;
  231. p2 = p;
  232. #endif
  233. p = p->lo;
  234. }
  235. else // (c1 > p->ch)
  236. {
  237. #ifdef BOOST_DISABLE_THREADS
  238. left = false;
  239. p2 = p;
  240. #endif
  241. p = p->hi;
  242. }
  243. }
  244. return r;
  245. }
  246. template<typename Sink>
  247. void peek_(node const *const &p, Sink const &sink) const
  248. {
  249. if(p)
  250. {
  251. sink(p->ch);
  252. this->peek_(p->lo, sink);
  253. this->peek_(p->hi, sink);
  254. }
  255. }
  256. boost::shared_ptr<node> root;
  257. };
  258. }}} // namespace boost::xpressive::detail
  259. #endif