tz_db_base.hpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #ifndef DATE_TIME_TZ_DB_BASE_HPP__
  2. #define DATE_TIME_TZ_DB_BASE_HPP__
  3. /* Copyright (c) 2003-2005 CrystalClear Software, Inc.
  4. * Subject to the Boost Software License, Version 1.0.
  5. * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
  6. * Author: Jeff Garland, Bart Garst
  7. * $Date$
  8. */
  9. #include <map>
  10. #include <vector>
  11. #include <string>
  12. #include <sstream>
  13. #include <fstream>
  14. #include <stdexcept>
  15. #include <boost/tokenizer.hpp>
  16. #include <boost/shared_ptr.hpp>
  17. #include <boost/throw_exception.hpp>
  18. #include <boost/date_time/compiler_config.hpp>
  19. #include <boost/date_time/time_zone_names.hpp>
  20. #include <boost/date_time/time_zone_base.hpp>
  21. #include <boost/date_time/time_parsing.hpp>
  22. #include <boost/algorithm/string.hpp>
  23. namespace boost {
  24. namespace date_time {
  25. //! Exception thrown when tz database cannot locate requested data file
  26. class data_not_accessible : public std::logic_error
  27. {
  28. public:
  29. data_not_accessible() :
  30. std::logic_error(std::string("Unable to locate or access the required datafile."))
  31. {}
  32. data_not_accessible(const std::string& filespec) :
  33. std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec))
  34. {}
  35. };
  36. //! Exception thrown when tz database locates incorrect field structure in data file
  37. class bad_field_count : public std::out_of_range
  38. {
  39. public:
  40. bad_field_count(const std::string& s) :
  41. std::out_of_range(s)
  42. {}
  43. };
  44. //! Creates a database of time_zones from csv datafile
  45. /*! The csv file containing the zone_specs used by the
  46. * tz_db_base is intended to be customized by the
  47. * library user. When customizing this file (or creating your own) the
  48. * file must follow a specific format.
  49. *
  50. * This first line is expected to contain column headings and is therefore
  51. * not processed by the tz_db_base.
  52. *
  53. * Each record (line) must have eleven fields. Some of those fields can
  54. * be empty. Every field (even empty ones) must be enclosed in
  55. * double-quotes.
  56. * Ex:
  57. * @code
  58. * "America/Phoenix" <- string enclosed in quotes
  59. * "" <- empty field
  60. * @endcode
  61. *
  62. * Some fields represent a length of time. The format of these fields
  63. * must be:
  64. * @code
  65. * "{+|-}hh:mm[:ss]" <- length-of-time format
  66. * @endcode
  67. * Where the plus or minus is mandatory and the seconds are optional.
  68. *
  69. * Since some time zones do not use daylight savings it is not always
  70. * necessary for every field in a zone_spec to contain a value. All
  71. * zone_specs must have at least ID and GMT offset. Zones that use
  72. * daylight savings must have all fields filled except:
  73. * STD ABBR, STD NAME, DST NAME. You should take note
  74. * that DST ABBR is mandatory for zones that use daylight savings
  75. * (see field descriptions for further details).
  76. *
  77. * ******* Fields and their description/details *********
  78. *
  79. * ID:
  80. * Contains the identifying string for the zone_spec. Any string will
  81. * do as long as it's unique. No two ID's can be the same.
  82. *
  83. * STD ABBR:
  84. * STD NAME:
  85. * DST ABBR:
  86. * DST NAME:
  87. * These four are all the names and abbreviations used by the time
  88. * zone being described. While any string will do in these fields,
  89. * care should be taken. These fields hold the strings that will be
  90. * used in the output of many of the local_time classes.
  91. * Ex:
  92. * @code
  93. * time_zone nyc = tz_db.time_zone_from_region("America/New_York");
  94. * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
  95. * cout << ny_time.to_long_string() << endl;
  96. * // 2004-Aug-30 00:00:00 Eastern Daylight Time
  97. * cout << ny_time.to_short_string() << endl;
  98. * // 2004-Aug-30 00:00:00 EDT
  99. * @endcode
  100. *
  101. * NOTE: The exact format/function names may vary - see local_time
  102. * documentation for further details.
  103. *
  104. * GMT offset:
  105. * This is the number of hours added to utc to get the local time
  106. * before any daylight savings adjustments are made. Some examples
  107. * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
  108. * The format must follow the length-of-time format described above.
  109. *
  110. * DST adjustment:
  111. * The amount of time added to gmt_offset when daylight savings is in
  112. * effect. The format must follow the length-of-time format described
  113. * above.
  114. *
  115. * DST Start Date rule:
  116. * This is a specially formatted string that describes the day of year
  117. * in which the transition take place. It holds three fields of it's own,
  118. * separated by semicolons.
  119. * The first field indicates the "nth" weekday of the month. The possible
  120. * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth),
  121. * and -1 (last).
  122. * The second field indicates the day-of-week from 0-6 (Sun=0).
  123. * The third field indicates the month from 1-12 (Jan=1).
  124. *
  125. * Examples are: "-1;5;9"="Last Friday of September",
  126. * "2;1;3"="Second Monday of March"
  127. *
  128. * Start time:
  129. * Start time is the number of hours past midnight, on the day of the
  130. * start transition, the transition takes place. More simply put, the
  131. * time of day the transition is made (in 24 hours format). The format
  132. * must follow the length-of-time format described above with the
  133. * exception that it must always be positive.
  134. *
  135. * DST End date rule:
  136. * See DST Start date rule. The difference here is this is the day
  137. * daylight savings ends (transition to STD).
  138. *
  139. * End time:
  140. * Same as Start time.
  141. */
  142. template<class time_zone_type, class rule_type>
  143. class tz_db_base {
  144. public:
  145. /* Having CharT as a template parameter created problems
  146. * with posix_time::duration_from_string. Templatizing
  147. * duration_from_string was not possible at this time, however,
  148. * it should be possible in the future (when poor compilers get
  149. * fixed or stop being used).
  150. * Since this class was designed to use CharT as a parameter it
  151. * is simply typedef'd here to ease converting in back to a
  152. * parameter the future */
  153. typedef char char_type;
  154. typedef typename time_zone_type::base_type time_zone_base_type;
  155. typedef typename time_zone_type::time_duration_type time_duration_type;
  156. typedef time_zone_names_base<char_type> time_zone_names;
  157. typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
  158. typedef std::basic_string<char_type> string_type;
  159. //! Constructs an empty database
  160. tz_db_base() {}
  161. //! Process csv data file, may throw exceptions
  162. /*! May throw bad_field_count exceptions */
  163. void load_from_stream(std::istream &in)
  164. {
  165. std::string buff;
  166. while( std::getline(in, buff)) {
  167. boost::trim_right(buff);
  168. parse_string(buff);
  169. }
  170. }
  171. //! Process csv data file, may throw exceptions
  172. /*! May throw data_not_accessible, or bad_field_count exceptions */
  173. void load_from_file(const std::string& pathspec)
  174. {
  175. std::string buff;
  176. std::ifstream ifs(pathspec.c_str());
  177. if(!ifs){
  178. boost::throw_exception(data_not_accessible(pathspec));
  179. }
  180. std::getline(ifs, buff); // first line is column headings
  181. this->load_from_stream(ifs);
  182. }
  183. //! returns true if record successfully added to map
  184. /*! Takes a region name in the form of "America/Phoenix", and a
  185. * time_zone object for that region. The id string must be a unique
  186. * name that does not already exist in the database. */
  187. bool add_record(const string_type& region,
  188. boost::shared_ptr<time_zone_base_type> tz)
  189. {
  190. typename map_type::value_type p(region, tz);
  191. return (m_zone_map.insert(p)).second;
  192. }
  193. //! Returns a time_zone object built from the specs for the given region
  194. /*! Returns a time_zone object built from the specs for the given
  195. * region. If region does not exist a local_time::record_not_found
  196. * exception will be thrown */
  197. boost::shared_ptr<time_zone_base_type>
  198. time_zone_from_region(const string_type& region) const
  199. {
  200. // get the record
  201. typename map_type::const_iterator record = m_zone_map.find(region);
  202. if(record == m_zone_map.end()){
  203. return boost::shared_ptr<time_zone_base_type>(); //null pointer
  204. }
  205. return record->second;
  206. }
  207. //! Returns a vector of strings holding the time zone regions in the database
  208. std::vector<std::string> region_list() const
  209. {
  210. typedef std::vector<std::string> vector_type;
  211. vector_type regions;
  212. typename map_type::const_iterator itr = m_zone_map.begin();
  213. while(itr != m_zone_map.end()) {
  214. regions.push_back(itr->first);
  215. ++itr;
  216. }
  217. return regions;
  218. }
  219. private:
  220. typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
  221. map_type m_zone_map;
  222. // start and end rule are of the same type
  223. typedef typename rule_type::start_rule::week_num week_num;
  224. /* TODO: mechanisms need to be put in place to handle different
  225. * types of rule specs. parse_rules() only handles nth_kday
  226. * rule types. */
  227. //! parses rule specs for transition day rules
  228. rule_type* parse_rules(const string_type& sr, const string_type& er) const
  229. {
  230. using namespace gregorian;
  231. // start and end rule are of the same type,
  232. // both are included here for readability
  233. typedef typename rule_type::start_rule start_rule;
  234. typedef typename rule_type::end_rule end_rule;
  235. // these are: [start|end] nth, day, month
  236. int s_nth = 0, s_d = 0, s_m = 0;
  237. int e_nth = 0, e_d = 0, e_m = 0;
  238. split_rule_spec(s_nth, s_d, s_m, sr);
  239. split_rule_spec(e_nth, e_d, e_m, er);
  240. typename start_rule::week_num s_wn, e_wn;
  241. s_wn = get_week_num(s_nth);
  242. e_wn = get_week_num(e_nth);
  243. return new rule_type(start_rule(s_wn,
  244. static_cast<unsigned short>(s_d),
  245. static_cast<unsigned short>(s_m)),
  246. end_rule(e_wn,
  247. static_cast<unsigned short>(e_d),
  248. static_cast<unsigned short>(e_m)));
  249. }
  250. //! helper function for parse_rules()
  251. week_num get_week_num(int nth) const
  252. {
  253. typedef typename rule_type::start_rule start_rule;
  254. switch(nth){
  255. case 1:
  256. return start_rule::first;
  257. case 2:
  258. return start_rule::second;
  259. case 3:
  260. return start_rule::third;
  261. case 4:
  262. return start_rule::fourth;
  263. case 5:
  264. case -1:
  265. return start_rule::fifth;
  266. default:
  267. // shouldn't get here - add error handling later
  268. break;
  269. }
  270. return start_rule::fifth; // silence warnings
  271. }
  272. //! splits the [start|end]_date_rule string into 3 ints
  273. void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
  274. {
  275. typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
  276. typedef boost::tokenizer<char_separator_type,
  277. std::basic_string<char_type>::const_iterator,
  278. std::basic_string<char_type> > tokenizer;
  279. typedef boost::tokenizer<char_separator_type,
  280. std::basic_string<char_type>::const_iterator,
  281. std::basic_string<char_type> >::iterator tokenizer_iterator;
  282. const char_type sep_char[] = { ';', '\0'};
  283. char_separator_type sep(sep_char);
  284. tokenizer tokens(rule, sep); // 3 fields
  285. if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) {
  286. std::ostringstream msg;
  287. msg << "Expecting 3 fields, got "
  288. << std::distance ( tokens.begin(), tokens.end ())
  289. << " fields in line: " << rule;
  290. boost::throw_exception(bad_field_count(msg.str()));
  291. }
  292. tokenizer_iterator tok_iter = tokens.begin();
  293. nth = std::atoi(tok_iter->c_str()); ++tok_iter;
  294. d = std::atoi(tok_iter->c_str()); ++tok_iter;
  295. m = std::atoi(tok_iter->c_str());
  296. }
  297. //! Take a line from the csv, turn it into a time_zone_type.
  298. /*! Take a line from the csv, turn it into a time_zone_type,
  299. * and add it to the map. Zone_specs in csv file are expected to
  300. * have eleven fields that describe the time zone. Returns true if
  301. * zone_spec successfully added to database */
  302. bool parse_string(string_type& s)
  303. {
  304. std::vector<string_type> result;
  305. typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;
  306. token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>());
  307. token_iter_type end;
  308. while (i != end) {
  309. result.push_back(*i);
  310. i++;
  311. }
  312. enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
  313. DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
  314. END_TIME, FIELD_COUNT };
  315. //take a shot at fixing gcc 4.x error
  316. const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
  317. if (result.size() != expected_fields) {
  318. std::ostringstream msg;
  319. msg << "Expecting " << FIELD_COUNT << " fields, got "
  320. << result.size() << " fields in line: " << s;
  321. boost::throw_exception(bad_field_count(msg.str()));
  322. BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
  323. }
  324. // initializations
  325. bool has_dst = true;
  326. if(result[DSTABBR] == std::string()){
  327. has_dst = false;
  328. }
  329. // start building components of a time_zone
  330. time_zone_names names(result[STDNAME], result[STDABBR],
  331. result[DSTNAME], result[DSTABBR]);
  332. time_duration_type utc_offset =
  333. str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
  334. dst_adjustment_offsets adjust(time_duration_type(0,0,0),
  335. time_duration_type(0,0,0),
  336. time_duration_type(0,0,0));
  337. boost::shared_ptr<rule_type> rules;
  338. if(has_dst){
  339. adjust = dst_adjustment_offsets(
  340. str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
  341. str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
  342. str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
  343. );
  344. rules =
  345. boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE],
  346. result[END_DATE_RULE]));
  347. }
  348. string_type id(result[ID]);
  349. boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
  350. return (add_record(id, zone));
  351. }
  352. };
  353. } } // namespace
  354. #endif // DATE_TIME_TZ_DB_BASE_HPP__