primary_transform.hpp 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. /*
  2. *
  3. * Copyright (c) 1998-2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE: primary_transform.hpp
  14. * VERSION: see <boost/version.hpp>
  15. * DESCRIPTION: Heuristically determines the sort string format in use
  16. * by the current locale.
  17. */
  18. #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
  19. #define BOOST_REGEX_PRIMARY_TRANSFORM
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. namespace boost{
  31. namespace BOOST_REGEX_DETAIL_NS{
  32. enum{
  33. sort_C,
  34. sort_fixed,
  35. sort_delim,
  36. sort_unknown
  37. };
  38. template <class S, class charT>
  39. unsigned count_chars(const S& s, charT c)
  40. {
  41. //
  42. // Count how many occurances of character c occur
  43. // in string s: if c is a delimeter between collation
  44. // fields, then this should be the same value for all
  45. // sort keys:
  46. //
  47. unsigned int count = 0;
  48. for(unsigned pos = 0; pos < s.size(); ++pos)
  49. {
  50. if(s[pos] == c) ++count;
  51. }
  52. return count;
  53. }
  54. template <class traits, class charT>
  55. unsigned find_sort_syntax(const traits* pt, charT* delim)
  56. {
  57. //
  58. // compare 'a' with 'A' to see how similar they are,
  59. // should really use a-accute but we can't portably do that,
  60. //
  61. typedef typename traits::string_type string_type;
  62. typedef typename traits::char_type char_type;
  63. // Suppress incorrect warning for MSVC
  64. (void)pt;
  65. char_type a[2] = {'a', '\0', };
  66. string_type sa(pt->transform(a, a+1));
  67. if(sa == a)
  68. {
  69. *delim = 0;
  70. return sort_C;
  71. }
  72. char_type A[2] = { 'A', '\0', };
  73. string_type sA(pt->transform(A, A+1));
  74. char_type c[2] = { ';', '\0', };
  75. string_type sc(pt->transform(c, c+1));
  76. int pos = 0;
  77. while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos;
  78. --pos;
  79. if(pos < 0)
  80. {
  81. *delim = 0;
  82. return sort_unknown;
  83. }
  84. //
  85. // at this point sa[pos] is either the end of a fixed width field
  86. // or the character that acts as a delimiter:
  87. //
  88. charT maybe_delim = sa[pos];
  89. if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim)))
  90. {
  91. *delim = maybe_delim;
  92. return sort_delim;
  93. }
  94. //
  95. // OK doen't look like a delimiter, try for fixed width field:
  96. //
  97. if((sa.size() == sA.size()) && (sa.size() == sc.size()))
  98. {
  99. // note assumes that the fixed width field is less than
  100. // (numeric_limits<charT>::max)(), should be true for all types
  101. // I can't imagine 127 character fields...
  102. *delim = static_cast<charT>(++pos);
  103. return sort_fixed;
  104. }
  105. //
  106. // don't know what it is:
  107. //
  108. *delim = 0;
  109. return sort_unknown;
  110. }
  111. } // namespace BOOST_REGEX_DETAIL_NS
  112. } // namespace boost
  113. #ifdef BOOST_MSVC
  114. #pragma warning(push)
  115. #pragma warning(disable: 4103)
  116. #endif
  117. #ifdef BOOST_HAS_ABI_HEADERS
  118. # include BOOST_ABI_SUFFIX
  119. #endif
  120. #ifdef BOOST_MSVC
  121. #pragma warning(pop)
  122. #endif
  123. #endif