generate.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. #!/usr/bin/python
  2. """Utility to generate files to benchmark"""
  3. # Copyright Abel Sinkovics (abel@sinkovics.hu) 2016.
  4. # Distributed under the Boost Software License, Version 1.0.
  5. # (See accompanying file LICENSE_1_0.txt or copy at
  6. # http://www.boost.org/LICENSE_1_0.txt)
  7. import argparse
  8. import os
  9. import string
  10. import random
  11. import re
  12. import json
  13. import Cheetah.Template
  14. import chars
  15. def regex_to_error_msg(regex):
  16. """Format a human-readable error message from a regex"""
  17. return re.sub('([^\\\\])[()]', '\\1', regex) \
  18. .replace('[ \t]*$', '') \
  19. .replace('^', '') \
  20. .replace('$', '') \
  21. .replace('[ \t]*', ' ') \
  22. .replace('[ \t]+', ' ') \
  23. .replace('[0-9]+', 'X') \
  24. \
  25. .replace('\\[', '[') \
  26. .replace('\\]', ']') \
  27. .replace('\\(', '(') \
  28. .replace('\\)', ')') \
  29. .replace('\\.', '.')
  30. def mkdir_p(path):
  31. """mkdir -p path"""
  32. try:
  33. os.makedirs(path)
  34. except OSError:
  35. pass
  36. def in_comment(regex):
  37. """Builds a regex matching "regex" in a comment"""
  38. return '^[ \t]*//[ \t]*' + regex + '[ \t]*$'
  39. def random_chars(number):
  40. """Generate random characters"""
  41. char_map = {
  42. k: v for k, v in chars.CHARS.iteritems()
  43. if not format_character(k).startswith('\\x')
  44. }
  45. char_num = sum(char_map.values())
  46. return (
  47. format_character(nth_char(char_map, random.randint(0, char_num - 1)))
  48. for _ in xrange(0, number)
  49. )
  50. def random_string(length):
  51. """Generate a random string or character list depending on the mode"""
  52. return \
  53. 'BOOST_METAPARSE_STRING("{0}")'.format(''.join(random_chars(length)))
  54. class Mode(object):
  55. """Represents a generation mode"""
  56. def __init__(self, name):
  57. self.name = name
  58. if name == 'BOOST_METAPARSE_STRING':
  59. self.identifier = 'bmp'
  60. elif name == 'manual':
  61. self.identifier = 'man'
  62. else:
  63. raise Exception('Invalid mode: {0}'.format(name))
  64. def description(self):
  65. """The description of the mode"""
  66. if self.identifier == 'bmp':
  67. return 'Using BOOST_METAPARSE_STRING'
  68. elif self.identifier == 'man':
  69. return 'Generating strings manually'
  70. def convert_from(self, base):
  71. """Convert a BOOST_METAPARSE_STRING mode document into one with
  72. this mode"""
  73. if self.identifier == 'bmp':
  74. return base
  75. elif self.identifier == 'man':
  76. result = []
  77. prefix = 'BOOST_METAPARSE_STRING("'
  78. while True:
  79. bmp_at = base.find(prefix)
  80. if bmp_at == -1:
  81. return ''.join(result) + base
  82. else:
  83. result.append(
  84. base[0:bmp_at] + '::boost::metaparse::string<'
  85. )
  86. new_base = ''
  87. was_backslash = False
  88. comma = ''
  89. for i in xrange(bmp_at + len(prefix), len(base)):
  90. if was_backslash:
  91. result.append(
  92. '{0}\'\\{1}\''.format(comma, base[i])
  93. )
  94. was_backslash = False
  95. comma = ','
  96. elif base[i] == '"':
  97. new_base = base[i+2:]
  98. break
  99. elif base[i] == '\\':
  100. was_backslash = True
  101. else:
  102. result.append('{0}\'{1}\''.format(comma, base[i]))
  103. comma = ','
  104. base = new_base
  105. result.append('>')
  106. class Template(object):
  107. """Represents a loaded template"""
  108. def __init__(self, name, content):
  109. self.name = name
  110. self.content = content
  111. def instantiate(self, value_of_n):
  112. """Instantiates the template"""
  113. template = Cheetah.Template.Template(
  114. self.content,
  115. searchList={'n': value_of_n}
  116. )
  117. template.random_string = random_string
  118. return str(template)
  119. def range(self):
  120. """Returns the range for N"""
  121. match = self._match(in_comment(
  122. 'n[ \t]+in[ \t]*\\[([0-9]+)\\.\\.([0-9]+)\\),[ \t]+'
  123. 'step[ \t]+([0-9]+)'
  124. ))
  125. return range(
  126. int(match.group(1)),
  127. int(match.group(2)),
  128. int(match.group(3))
  129. )
  130. def property(self, name):
  131. """Parses and returns a property"""
  132. return self._get_line(in_comment(name + ':[ \t]*(.*)'))
  133. def modes(self):
  134. """Returns the list of generation modes"""
  135. return [Mode(s.strip()) for s in self.property('modes').split(',')]
  136. def _match(self, regex):
  137. """Find the first line matching regex and return the match object"""
  138. cregex = re.compile(regex)
  139. for line in self.content.splitlines():
  140. match = cregex.match(line)
  141. if match:
  142. return match
  143. raise Exception('No "{0}" line in {1}.cpp'.format(
  144. regex_to_error_msg(regex),
  145. self.name
  146. ))
  147. def _get_line(self, regex):
  148. """Get a line based on a regex"""
  149. return self._match(regex).group(1)
  150. def load_file(path):
  151. """Returns the content of the file"""
  152. with open(path, 'rb') as in_file:
  153. return in_file.read()
  154. def templates_in(path):
  155. """Enumerate the templates found in path"""
  156. ext = '.cpp'
  157. return (
  158. Template(f[0:-len(ext)], load_file(os.path.join(path, f)))
  159. for f in os.listdir(path) if f.endswith(ext)
  160. )
  161. def nth_char(char_map, index):
  162. """Returns the nth character of a character->occurrence map"""
  163. for char in char_map:
  164. if index < char_map[char]:
  165. return char
  166. index = index - char_map[char]
  167. return None
  168. def format_character(char):
  169. """Returns the C-formatting of the character"""
  170. if \
  171. char in string.ascii_letters \
  172. or char in string.digits \
  173. or char in [
  174. '_', '.', ':', ';', ' ', '!', '?', '+', '-', '/', '=', '<',
  175. '>', '$', '(', ')', '@', '~', '`', '|', '#', '[', ']', '{',
  176. '}', '&', '*', '^', '%']:
  177. return char
  178. elif char in ['"', '\'', '\\']:
  179. return '\\{0}'.format(char)
  180. elif char == '\n':
  181. return '\\n'
  182. elif char == '\r':
  183. return '\\r'
  184. elif char == '\t':
  185. return '\\t'
  186. else:
  187. return '\\x{:02x}'.format(ord(char))
  188. def write_file(filename, content):
  189. """Create the file with the given content"""
  190. print 'Generating {0}'.format(filename)
  191. with open(filename, 'wb') as out_f:
  192. out_f.write(content)
  193. def out_filename(template, n_val, mode):
  194. """Determine the output filename"""
  195. return '{0}_{1}_{2}.cpp'.format(template.name, n_val, mode.identifier)
  196. def main():
  197. """The main function of the script"""
  198. desc = 'Generate files to benchmark'
  199. parser = argparse.ArgumentParser(description=desc)
  200. parser.add_argument(
  201. '--src',
  202. dest='src_dir',
  203. default='src',
  204. help='The directory containing the templates'
  205. )
  206. parser.add_argument(
  207. '--out',
  208. dest='out_dir',
  209. default='generated',
  210. help='The output directory'
  211. )
  212. parser.add_argument(
  213. '--seed',
  214. dest='seed',
  215. default='13',
  216. help='The random seed (to ensure consistent regeneration)'
  217. )
  218. args = parser.parse_args()
  219. random.seed(int(args.seed))
  220. mkdir_p(args.out_dir)
  221. for template in templates_in(args.src_dir):
  222. modes = template.modes()
  223. n_range = template.range()
  224. for n_value in n_range:
  225. base = template.instantiate(n_value)
  226. for mode in modes:
  227. write_file(
  228. os.path.join(
  229. args.out_dir,
  230. out_filename(template, n_value, mode)
  231. ),
  232. mode.convert_from(base)
  233. )
  234. write_file(
  235. os.path.join(args.out_dir, '{0}.json'.format(template.name)),
  236. json.dumps({
  237. 'files': {
  238. n: {
  239. m.identifier: out_filename(template, n, m)
  240. for m in modes
  241. } for n in n_range
  242. },
  243. 'name': template.name,
  244. 'x_axis_label': template.property('x_axis_label'),
  245. 'desc': template.property('desc'),
  246. 'modes': {m.identifier: m.description() for m in modes}
  247. })
  248. )
  249. if __name__ == '__main__':
  250. main()