////////////////////////////////////////////////////////////////////////////// /// \file c_regex_traits.hpp /// Contains the definition of the c_regex_traits\<\> template, which is a /// wrapper for the C locale functions that can be used to customize the /// behavior of static and dynamic regexes. // // Copyright 2008 Eric Niebler. Distributed under the Boost // Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005 #define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005 // MS compatible compilers support #pragma once #if defined(_MSC_VER) # pragma once #endif #include #include #include #include namespace boost { namespace xpressive { namespace detail { /////////////////////////////////////////////////////////////////////////////// // empty_locale struct empty_locale { }; /////////////////////////////////////////////////////////////////////////////// // c_regex_traits_base template struct c_regex_traits_base { protected: template void imbue(Traits const &tr) { } }; template struct c_regex_traits_base { protected: template static void imbue(Traits const &) { } }; #ifndef BOOST_XPRESSIVE_NO_WREGEX template struct c_regex_traits_base { protected: template static void imbue(Traits const &) { } }; #endif template Char c_tolower(Char); template Char c_toupper(Char); template<> inline char c_tolower(char ch) { using namespace std; return static_cast(tolower(static_cast(ch))); } template<> inline char c_toupper(char ch) { using namespace std; return static_cast(toupper(static_cast(ch))); } #ifndef BOOST_XPRESSIVE_NO_WREGEX template<> inline wchar_t c_tolower(wchar_t ch) { using namespace std; return towlower(ch); } template<> inline wchar_t c_toupper(wchar_t ch) { using namespace std; return towupper(ch); } #endif } // namespace detail /////////////////////////////////////////////////////////////////////////////// // regex_traits_version_1_tag // struct regex_traits_version_1_tag; /////////////////////////////////////////////////////////////////////////////// // c_regex_traits // /// \brief Encapsaulates the standard C locale functions for use by the /// \c basic_regex\<\> class template. template struct c_regex_traits : detail::c_regex_traits_base { typedef Char char_type; typedef std::basic_string string_type; typedef detail::empty_locale locale_type; typedef typename detail::char_class_impl::char_class_type char_class_type; typedef regex_traits_version_2_tag version_tag; typedef detail::c_regex_traits_base base_type; /// Initialize a c_regex_traits object to use the global C locale. /// c_regex_traits(locale_type const &loc = locale_type()) : base_type() { this->imbue(loc); } /// Checks two c_regex_traits objects for equality /// /// \return true. bool operator ==(c_regex_traits const &) const { return true; } /// Checks two c_regex_traits objects for inequality /// /// \return false. bool operator !=(c_regex_traits const &) const { return false; } /// Convert a char to a Char /// /// \param ch The source character. /// \return ch if Char is char, std::btowc(ch) if Char is wchar_t. static char_type widen(char ch); /// Returns a hash value for a Char in the range [0, UCHAR_MAX] /// /// \param ch The source character. /// \return a value between 0 and UCHAR_MAX, inclusive. static unsigned char hash(char_type ch) { return static_cast(std::char_traits::to_int_type(ch)); } /// No-op /// /// \param ch The source character. /// \return ch static char_type translate(char_type ch) { return ch; } /// Converts a character to lower-case using the current global C locale. /// /// \param ch The source character. /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t. static char_type translate_nocase(char_type ch) { return detail::c_tolower(ch); } /// Converts a character to lower-case using the current global C locale. /// /// \param ch The source character. /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t. static char_type tolower(char_type ch) { return detail::c_tolower(ch); } /// Converts a character to upper-case using the current global C locale. /// /// \param ch The source character. /// \return std::toupper(ch) if Char is char, std::towupper(ch) if Char is wchar_t. static char_type toupper(char_type ch) { return detail::c_toupper(ch); } /// Returns a \c string_type containing all the characters that compare equal /// disregrarding case to the one passed in. This function can only be called /// if has_fold_case\ \>::value is \c true. /// /// \param ch The source character. /// \return \c string_type containing all chars which are equal to \c ch when disregarding /// case //typedef array fold_case_type; string_type fold_case(char_type ch) const { BOOST_MPL_ASSERT((is_same)); char_type ntcs[] = { detail::c_tolower(ch) , detail::c_toupper(ch) , 0 }; if(ntcs[1] == ntcs[0]) ntcs[1] = 0; return string_type(ntcs); } /// Checks to see if a character is within a character range. /// /// \param first The bottom of the range, inclusive. /// \param last The top of the range, inclusive. /// \param ch The source character. /// \return first <= ch && ch <= last. static bool in_range(char_type first, char_type last, char_type ch) { return first <= ch && ch <= last; } /// Checks to see if a character is within a character range, irregardless of case. /// /// \param first The bottom of the range, inclusive. /// \param last The top of the range, inclusive. /// \param ch The source character. /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first, /// last, toupper(ch)) /// \attention The default implementation doesn't do proper Unicode /// case folding, but this is the best we can do with the standard /// C locale functions. static bool in_range_nocase(char_type first, char_type last, char_type ch) { return c_regex_traits::in_range(first, last, ch) || c_regex_traits::in_range(first, last, detail::c_tolower(ch)) || c_regex_traits::in_range(first, last, detail::c_toupper(ch)); } /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) /// then v.transform(G1, G2) < v.transform(H1, H2). /// /// \attention Not currently used template static string_type transform(FwdIter begin, FwdIter end) { BOOST_ASSERT(false); // BUGBUG implement me } /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) /// when character case is not considered then /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2). /// /// \attention Not currently used template static string_type transform_primary(FwdIter begin, FwdIter end) { BOOST_ASSERT(false); // BUGBUG implement me } /// Returns a sequence of characters that represents the collating element /// consisting of the character sequence designated by the iterator range [F1, F2). /// Returns an empty string if the character sequence is not a valid collating element. /// /// \attention Not currently used template static string_type lookup_collatename(FwdIter begin, FwdIter end) { BOOST_ASSERT(false); // BUGBUG implement me } /// For the character class name represented by the specified character sequence, /// return the corresponding bitmask representation. /// /// \param begin A forward iterator to the start of the character sequence representing /// the name of the character class. /// \param end The end of the character sequence. /// \param icase Specifies whether the returned bitmask should represent the case-insensitive /// version of the character class. /// \return A bitmask representing the character class. template static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) { return detail::char_class_impl::lookup_classname(begin, end, icase); } /// Tests a character against a character class bitmask. /// /// \param ch The character to test. /// \param mask The character class bitmask against which to test. /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed /// together. /// \return true if the character is a member of any of the specified character classes, false /// otherwise. static bool isctype(char_type ch, char_class_type mask) { return detail::char_class_impl::isctype(ch, mask); } /// Convert a digit character into the integer it represents. /// /// \param ch The digit character. /// \param radix The radix to use for the conversion. /// \pre radix is one of 8, 10, or 16. /// \return -1 if ch is not a digit character, the integer value of the character otherwise. If /// char_type is char, std::strtol is used for the conversion. If char_type is wchar_t, /// std::wcstol is used. static int value(char_type ch, int radix); /// No-op /// locale_type imbue(locale_type loc) { this->base_type::imbue(*this); return loc; } /// No-op /// static locale_type getloc() { locale_type loc; return loc; } }; /////////////////////////////////////////////////////////////////////////////// // c_regex_traits<>::widen specializations /// INTERNAL ONLY template<> inline char c_regex_traits::widen(char ch) { return ch; } #ifndef BOOST_XPRESSIVE_NO_WREGEX /// INTERNAL ONLY template<> inline wchar_t c_regex_traits::widen(char ch) { using namespace std; return btowc(ch); } #endif /////////////////////////////////////////////////////////////////////////////// // c_regex_traits<>::hash specializations /// INTERNAL ONLY template<> inline unsigned char c_regex_traits::hash(char ch) { return static_cast(ch); } #ifndef BOOST_XPRESSIVE_NO_WREGEX /// INTERNAL ONLY template<> inline unsigned char c_regex_traits::hash(wchar_t ch) { return static_cast(ch); } #endif /////////////////////////////////////////////////////////////////////////////// // c_regex_traits<>::value specializations /// INTERNAL ONLY template<> inline int c_regex_traits::value(char ch, int radix) { using namespace std; BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); char begin[2] = { ch, '\0' }, *end = 0; int val = strtol(begin, &end, radix); return begin == end ? -1 : val; } #ifndef BOOST_XPRESSIVE_NO_WREGEX /// INTERNAL ONLY template<> inline int c_regex_traits::value(wchar_t ch, int radix) { using namespace std; BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); wchar_t begin[2] = { ch, L'\0' }, *end = 0; int val = wcstol(begin, &end, radix); return begin == end ? -1 : val; } #endif // Narrow C traits has fold_case() member function. template<> struct has_fold_case > : mpl::true_ { }; }} #endif