regex_traits_defaults.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_traits_defaults.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares API's for access to regex_traits default properties.
  16. */
  17. #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  18. #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #endif
  23. #ifdef BOOST_HAS_ABI_HEADERS
  24. # include BOOST_ABI_PREFIX
  25. #endif
  26. #ifdef BOOST_MSVC
  27. #pragma warning(pop)
  28. #endif
  29. #include <boost/regex/config.hpp>
  30. #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP
  31. #include <boost/regex/v4/syntax_type.hpp>
  32. #endif
  33. #ifndef BOOST_REGEX_ERROR_TYPE_HPP
  34. #include <boost/regex/v4/error_type.hpp>
  35. #endif
  36. #include <boost/type_traits/make_unsigned.hpp>
  37. #include <boost/utility/enable_if.hpp>
  38. #ifdef BOOST_NO_STDC_NAMESPACE
  39. namespace std{
  40. using ::strlen;
  41. }
  42. #endif
  43. namespace boost{ namespace BOOST_REGEX_DETAIL_NS{
  44. //
  45. // helpers to suppress warnings:
  46. //
  47. template <class charT>
  48. inline bool is_extended(charT c)
  49. {
  50. typedef typename make_unsigned<charT>::type unsigned_type;
  51. return (sizeof(charT) > 1) && (static_cast<unsigned_type>(c) >= 256u);
  52. }
  53. inline bool is_extended(char)
  54. { return false; }
  55. BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n);
  56. BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n);
  57. BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c);
  58. BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c);
  59. // is charT c a combining character?
  60. BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s);
  61. template <class charT>
  62. inline bool is_combining(charT c)
  63. {
  64. return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c)));
  65. }
  66. template <>
  67. inline bool is_combining<char>(char)
  68. {
  69. return false;
  70. }
  71. template <>
  72. inline bool is_combining<signed char>(signed char)
  73. {
  74. return false;
  75. }
  76. template <>
  77. inline bool is_combining<unsigned char>(unsigned char)
  78. {
  79. return false;
  80. }
  81. #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
  82. #ifdef _MSC_VER
  83. template<>
  84. inline bool is_combining<wchar_t>(wchar_t c)
  85. {
  86. return is_combining_implementation(static_cast<unsigned short>(c));
  87. }
  88. #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
  89. #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX)
  90. template<>
  91. inline bool is_combining<wchar_t>(wchar_t c)
  92. {
  93. return is_combining_implementation(static_cast<unsigned short>(c));
  94. }
  95. #else
  96. template<>
  97. inline bool is_combining<wchar_t>(wchar_t c)
  98. {
  99. return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c));
  100. }
  101. #endif
  102. #endif
  103. #endif
  104. //
  105. // is a charT c a line separator?
  106. //
  107. template <class charT>
  108. inline bool is_separator(charT c)
  109. {
  110. return BOOST_REGEX_MAKE_BOOL(
  111. (c == static_cast<charT>('\n'))
  112. || (c == static_cast<charT>('\r'))
  113. || (c == static_cast<charT>('\f'))
  114. || (static_cast<boost::uint16_t>(c) == 0x2028u)
  115. || (static_cast<boost::uint16_t>(c) == 0x2029u)
  116. || (static_cast<boost::uint16_t>(c) == 0x85u));
  117. }
  118. template <>
  119. inline bool is_separator<char>(char c)
  120. {
  121. return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
  122. }
  123. //
  124. // get a default collating element:
  125. //
  126. BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name);
  127. //
  128. // get the state_id of a character clasification, the individual
  129. // traits classes then transform that state_id into a bitmask:
  130. //
  131. template <class charT>
  132. struct character_pointer_range
  133. {
  134. const charT* p1;
  135. const charT* p2;
  136. bool operator < (const character_pointer_range& r)const
  137. {
  138. return std::lexicographical_compare(p1, p2, r.p1, r.p2);
  139. }
  140. bool operator == (const character_pointer_range& r)const
  141. {
  142. // Not only do we check that the ranges are of equal size before
  143. // calling std::equal, but there is no other algorithm available:
  144. // not even a non-standard MS one. So forward to unchecked_equal
  145. // in the MS case.
  146. return ((p2 - p1) == (r.p2 - r.p1)) && BOOST_REGEX_DETAIL_NS::equal(p1, p2, r.p1);
  147. }
  148. };
  149. template <class charT>
  150. int get_default_class_id(const charT* p1, const charT* p2)
  151. {
  152. static const charT data[73] = {
  153. 'a', 'l', 'n', 'u', 'm',
  154. 'a', 'l', 'p', 'h', 'a',
  155. 'b', 'l', 'a', 'n', 'k',
  156. 'c', 'n', 't', 'r', 'l',
  157. 'd', 'i', 'g', 'i', 't',
  158. 'g', 'r', 'a', 'p', 'h',
  159. 'l', 'o', 'w', 'e', 'r',
  160. 'p', 'r', 'i', 'n', 't',
  161. 'p', 'u', 'n', 'c', 't',
  162. 's', 'p', 'a', 'c', 'e',
  163. 'u', 'n', 'i', 'c', 'o', 'd', 'e',
  164. 'u', 'p', 'p', 'e', 'r',
  165. 'v',
  166. 'w', 'o', 'r', 'd',
  167. 'x', 'd', 'i', 'g', 'i', 't',
  168. };
  169. static const character_pointer_range<charT> ranges[21] =
  170. {
  171. {data+0, data+5,}, // alnum
  172. {data+5, data+10,}, // alpha
  173. {data+10, data+15,}, // blank
  174. {data+15, data+20,}, // cntrl
  175. {data+20, data+21,}, // d
  176. {data+20, data+25,}, // digit
  177. {data+25, data+30,}, // graph
  178. {data+29, data+30,}, // h
  179. {data+30, data+31,}, // l
  180. {data+30, data+35,}, // lower
  181. {data+35, data+40,}, // print
  182. {data+40, data+45,}, // punct
  183. {data+45, data+46,}, // s
  184. {data+45, data+50,}, // space
  185. {data+57, data+58,}, // u
  186. {data+50, data+57,}, // unicode
  187. {data+57, data+62,}, // upper
  188. {data+62, data+63,}, // v
  189. {data+63, data+64,}, // w
  190. {data+63, data+67,}, // word
  191. {data+67, data+73,}, // xdigit
  192. };
  193. static const character_pointer_range<charT>* ranges_begin = ranges;
  194. static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
  195. character_pointer_range<charT> t = { p1, p2, };
  196. const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
  197. if((p != ranges_end) && (t == *p))
  198. return static_cast<int>(p - ranges);
  199. return -1;
  200. }
  201. //
  202. // helper functions:
  203. //
  204. template <class charT>
  205. std::ptrdiff_t global_length(const charT* p)
  206. {
  207. std::ptrdiff_t n = 0;
  208. while(*p)
  209. {
  210. ++p;
  211. ++n;
  212. }
  213. return n;
  214. }
  215. template<>
  216. inline std::ptrdiff_t global_length<char>(const char* p)
  217. {
  218. return (std::strlen)(p);
  219. }
  220. #ifndef BOOST_NO_WREGEX
  221. template<>
  222. inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p)
  223. {
  224. return (std::ptrdiff_t)(std::wcslen)(p);
  225. }
  226. #endif
  227. template <class charT>
  228. inline charT BOOST_REGEX_CALL global_lower(charT c)
  229. {
  230. return c;
  231. }
  232. template <class charT>
  233. inline charT BOOST_REGEX_CALL global_upper(charT c)
  234. {
  235. return c;
  236. }
  237. BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c);
  238. BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c);
  239. #ifndef BOOST_NO_WREGEX
  240. BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c);
  241. BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c);
  242. #endif
  243. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  244. BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c);
  245. BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c);
  246. #endif
  247. //
  248. // This sucks: declare template specialisations of global_lower/global_upper
  249. // that just forward to the non-template implementation functions. We do
  250. // this because there is one compiler (Compaq Tru64 C++) that doesn't seem
  251. // to differentiate between templates and non-template overloads....
  252. // what's more, the primary template, plus all overloads have to be
  253. // defined in the same translation unit (if one is inline they all must be)
  254. // otherwise the "local template instantiation" compiler option can pick
  255. // the wrong instantiation when linking:
  256. //
  257. template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); }
  258. template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); }
  259. #ifndef BOOST_NO_WREGEX
  260. template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); }
  261. template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); }
  262. #endif
  263. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  264. template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); }
  265. template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); }
  266. #endif
  267. template <class charT>
  268. int global_value(charT c)
  269. {
  270. static const charT zero = '0';
  271. static const charT nine = '9';
  272. static const charT a = 'a';
  273. static const charT f = 'f';
  274. static const charT A = 'A';
  275. static const charT F = 'F';
  276. if(c > f) return -1;
  277. if(c >= a) return 10 + (c - a);
  278. if(c > F) return -1;
  279. if(c >= A) return 10 + (c - A);
  280. if(c > nine) return -1;
  281. if(c >= zero) return c - zero;
  282. return -1;
  283. }
  284. template <class charT, class traits>
  285. boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
  286. {
  287. (void)t; // warning suppression
  288. boost::intmax_t limit = (std::numeric_limits<boost::intmax_t>::max)() / radix;
  289. boost::intmax_t next_value = t.value(*p1, radix);
  290. if((p1 == p2) || (next_value < 0) || (next_value >= radix))
  291. return -1;
  292. boost::intmax_t result = 0;
  293. while(p1 != p2)
  294. {
  295. next_value = t.value(*p1, radix);
  296. if((next_value < 0) || (next_value >= radix))
  297. break;
  298. result *= radix;
  299. result += next_value;
  300. ++p1;
  301. if (result > limit)
  302. return -1;
  303. }
  304. return result;
  305. }
  306. template <class charT>
  307. inline typename boost::enable_if_c<(sizeof(charT) > 1), const charT*>::type get_escape_R_string()
  308. {
  309. #ifdef BOOST_MSVC
  310. # pragma warning(push)
  311. # pragma warning(disable:4309 4245)
  312. #endif
  313. static const charT e1[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?',
  314. '|', '[', '\x0A', '\x0B', '\x0C', static_cast<charT>(0x85), static_cast<charT>(0x2028),
  315. static_cast<charT>(0x2029), ']', ')', ')', '\0' };
  316. static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?',
  317. '|', '[', '\x0A', '\x0B', '\x0C', static_cast<charT>(0x85), ']', ')', ')', '\0' };
  318. charT c = static_cast<charT>(0x2029u);
  319. bool b = (static_cast<unsigned>(c) == 0x2029u);
  320. return (b ? e1 : e2);
  321. #ifdef BOOST_MSVC
  322. # pragma warning(pop)
  323. #endif
  324. }
  325. template <class charT>
  326. inline typename boost::disable_if_c<(sizeof(charT) > 1), const charT*>::type get_escape_R_string()
  327. {
  328. #ifdef BOOST_MSVC
  329. # pragma warning(push)
  330. # pragma warning(disable:4309)
  331. #endif
  332. static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?',
  333. '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', ')', '\0' };
  334. return e2;
  335. #ifdef BOOST_MSVC
  336. # pragma warning(pop)
  337. #endif
  338. }
  339. } // BOOST_REGEX_DETAIL_NS
  340. } // boost
  341. #ifdef BOOST_MSVC
  342. #pragma warning(push)
  343. #pragma warning(disable: 4103)
  344. #endif
  345. #ifdef BOOST_HAS_ABI_HEADERS
  346. # include BOOST_ABI_SUFFIX
  347. #endif
  348. #ifdef BOOST_MSVC
  349. #pragma warning(pop)
  350. #endif
  351. #endif