conversion.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED
  9. #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED
  10. #include <boost/locale/config.hpp>
  11. #ifdef BOOST_MSVC
  12. # pragma warning(push)
  13. # pragma warning(disable : 4275 4251 4231 4660)
  14. #endif
  15. #include <locale>
  16. namespace boost {
  17. namespace locale {
  18. ///
  19. /// \defgroup convert Text Conversions
  20. ///
  21. /// This module provides various function for string manipulation like Unicode normalization, case conversion etc.
  22. /// @{
  23. ///
  24. ///
  25. /// \brief This class provides base flags for text manipulation. It is used as base for converter facet.
  26. ///
  27. class converter_base {
  28. public:
  29. ///
  30. /// The flag used for facet - the type of operation to perform
  31. ///
  32. typedef enum {
  33. normalization, ///< Apply Unicode normalization on the text
  34. upper_case, ///< Convert text to upper case
  35. lower_case, ///< Convert text to lower case
  36. case_folding, ///< Fold case in the text
  37. title_case ///< Convert text to title case
  38. } conversion_type;
  39. };
  40. template<typename CharType>
  41. class converter;
  42. #ifdef BOOST_LOCALE_DOXYGEN
  43. ///
  44. /// \brief The facet that implements text manipulation
  45. ///
  46. /// It is used to performs text conversion operations defined by \ref conversion_type. It is specialized
  47. /// for four types of characters \c char, \c wchar_t, \c char16_t, \c char32_t
  48. ///
  49. template<typename Char>
  50. class BOOST_LOCALE_DECL converter: public converter_base, public std::locale::facet {
  51. public:
  52. /// Locale identification
  53. static std::locale::id id;
  54. /// Standard constructor
  55. converter(size_t refs = 0) : std::locale::facet(refs)
  56. {
  57. }
  58. ///
  59. /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter
  60. /// \a flags is used for specification of normalization method like nfd, nfc etc.
  61. ///
  62. virtual std::basic_string<Char> convert(conversion_type how,Char const *begin,Char const *end,int flags = 0) const = 0;
  63. #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
  64. std::locale::id& __get_id (void) const { return id; }
  65. #endif
  66. };
  67. #else
  68. template<>
  69. class BOOST_LOCALE_DECL converter<char> : public converter_base, public std::locale::facet {
  70. public:
  71. static std::locale::id id;
  72. converter(size_t refs = 0) : std::locale::facet(refs)
  73. {
  74. }
  75. virtual std::string convert(conversion_type how,char const *begin,char const *end,int flags = 0) const = 0;
  76. #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
  77. std::locale::id& __get_id (void) const { return id; }
  78. #endif
  79. };
  80. template<>
  81. class BOOST_LOCALE_DECL converter<wchar_t> : public converter_base, public std::locale::facet {
  82. public:
  83. static std::locale::id id;
  84. converter(size_t refs = 0) : std::locale::facet(refs)
  85. {
  86. }
  87. virtual std::wstring convert(conversion_type how,wchar_t const *begin,wchar_t const *end,int flags = 0) const = 0;
  88. #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
  89. std::locale::id& __get_id (void) const { return id; }
  90. #endif
  91. };
  92. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  93. template<>
  94. class BOOST_LOCALE_DECL converter<char16_t> : public converter_base, public std::locale::facet {
  95. public:
  96. static std::locale::id id;
  97. converter(size_t refs = 0) : std::locale::facet(refs)
  98. {
  99. }
  100. virtual std::u16string convert(conversion_type how,char16_t const *begin,char16_t const *end,int flags = 0) const = 0;
  101. #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
  102. std::locale::id& __get_id (void) const { return id; }
  103. #endif
  104. };
  105. #endif
  106. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  107. template<>
  108. class BOOST_LOCALE_DECL converter<char32_t> : public converter_base, public std::locale::facet {
  109. public:
  110. static std::locale::id id;
  111. converter(size_t refs = 0) : std::locale::facet(refs)
  112. {
  113. }
  114. virtual std::u32string convert(conversion_type how,char32_t const *begin,char32_t const *end,int flags = 0) const = 0;
  115. #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
  116. std::locale::id& __get_id (void) const { return id; }
  117. #endif
  118. };
  119. #endif
  120. #endif
  121. ///
  122. /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a>
  123. ///
  124. typedef enum {
  125. norm_nfd, ///< Canonical decomposition
  126. norm_nfc, ///< Canonical decomposition followed by canonical composition
  127. norm_nfkd, ///< Compatibility decomposition
  128. norm_nfkc, ///< Compatibility decomposition followed by canonical composition.
  129. norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition
  130. } norm_type;
  131. ///
  132. /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n
  133. ///
  134. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  135. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  136. /// of a Unicode character set.
  137. ///
  138. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  139. ///
  140. template<typename CharType>
  141. std::basic_string<CharType> normalize(std::basic_string<CharType> const &str,norm_type n=norm_default,std::locale const &loc=std::locale())
  142. {
  143. return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str.data(),str.data() + str.size(),n);
  144. }
  145. ///
  146. /// Normalize NUL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n
  147. ///
  148. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  149. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  150. /// of a Unicode character set.
  151. ///
  152. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  153. ///
  154. template<typename CharType>
  155. std::basic_string<CharType> normalize(CharType const *str,norm_type n=norm_default,std::locale const &loc=std::locale())
  156. {
  157. CharType const *end=str;
  158. while(*end)
  159. end++;
  160. return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str,end,n);
  161. }
  162. ///
  163. /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n
  164. ///
  165. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  166. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  167. /// of a Unicode character set.
  168. ///
  169. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  170. ///
  171. template<typename CharType>
  172. std::basic_string<CharType> normalize( CharType const *begin,
  173. CharType const *end,
  174. norm_type n=norm_default,
  175. std::locale const &loc=std::locale())
  176. {
  177. return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,begin,end,n);
  178. }
  179. ///////////////////////////////////////////////////
  180. ///
  181. /// Convert a string \a str to upper case according to locale \a loc
  182. ///
  183. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  184. ///
  185. template<typename CharType>
  186. std::basic_string<CharType> to_upper(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
  187. {
  188. return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str.data(),str.data()+str.size());
  189. }
  190. ///
  191. /// Convert a NUL terminated string \a str to upper case according to locale \a loc
  192. ///
  193. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  194. ///
  195. template<typename CharType>
  196. std::basic_string<CharType> to_upper(CharType const *str,std::locale const &loc=std::locale())
  197. {
  198. CharType const *end=str;
  199. while(*end)
  200. end++;
  201. return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str,end);
  202. }
  203. ///
  204. /// Convert a string in range [begin,end) to upper case according to locale \a loc
  205. ///
  206. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  207. ///
  208. template<typename CharType>
  209. std::basic_string<CharType> to_upper(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
  210. {
  211. return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,begin,end);
  212. }
  213. ///////////////////////////////////////////////////
  214. ///
  215. /// Convert a string \a str to lower case according to locale \a loc
  216. ///
  217. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  218. ///
  219. template<typename CharType>
  220. std::basic_string<CharType> to_lower(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
  221. {
  222. return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str.data(),str.data()+str.size());
  223. }
  224. ///
  225. /// Convert a NUL terminated string \a str to lower case according to locale \a loc
  226. ///
  227. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  228. ///
  229. template<typename CharType>
  230. std::basic_string<CharType> to_lower(CharType const *str,std::locale const &loc=std::locale())
  231. {
  232. CharType const *end=str;
  233. while(*end)
  234. end++;
  235. return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str,end);
  236. }
  237. ///
  238. /// Convert a string in range [begin,end) to lower case according to locale \a loc
  239. ///
  240. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  241. ///
  242. template<typename CharType>
  243. std::basic_string<CharType> to_lower(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
  244. {
  245. return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,begin,end);
  246. }
  247. ///////////////////////////////////////////////////
  248. ///
  249. /// Convert a string \a str to title case according to locale \a loc
  250. ///
  251. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  252. ///
  253. template<typename CharType>
  254. std::basic_string<CharType> to_title(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
  255. {
  256. return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str.data(),str.data()+str.size());
  257. }
  258. ///
  259. /// Convert a NUL terminated string \a str to title case according to locale \a loc
  260. ///
  261. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  262. ///
  263. template<typename CharType>
  264. std::basic_string<CharType> to_title(CharType const *str,std::locale const &loc=std::locale())
  265. {
  266. CharType const *end=str;
  267. while(*end)
  268. end++;
  269. return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str,end);
  270. }
  271. ///
  272. /// Convert a string in range [begin,end) to title case according to locale \a loc
  273. ///
  274. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  275. ///
  276. template<typename CharType>
  277. std::basic_string<CharType> to_title(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
  278. {
  279. return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,begin,end);
  280. }
  281. ///////////////////////////////////////////////////
  282. ///
  283. /// Fold case of a string \a str according to locale \a loc
  284. ///
  285. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  286. ///
  287. template<typename CharType>
  288. std::basic_string<CharType> fold_case(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
  289. {
  290. return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str.data(),str.data()+str.size());
  291. }
  292. ///
  293. /// Fold case of a NUL terminated string \a str according to locale \a loc
  294. ///
  295. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  296. ///
  297. template<typename CharType>
  298. std::basic_string<CharType> fold_case(CharType const *str,std::locale const &loc=std::locale())
  299. {
  300. CharType const *end=str;
  301. while(*end)
  302. end++;
  303. return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str,end);
  304. }
  305. ///
  306. /// Fold case of a string in range [begin,end) according to locale \a loc
  307. ///
  308. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  309. ///
  310. template<typename CharType>
  311. std::basic_string<CharType> fold_case(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
  312. {
  313. return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,begin,end);
  314. }
  315. ///
  316. ///@}
  317. ///
  318. } // locale
  319. } // boost
  320. #ifdef BOOST_MSVC
  321. #pragma warning(pop)
  322. #endif
  323. #endif
  324. ///
  325. /// \example conversions.cpp
  326. ///
  327. /// Example of using various text conversion functions.
  328. ///
  329. /// \example wconversions.cpp
  330. ///
  331. /// Example of using various text conversion functions with wide strings.
  332. ///
  333. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4