char_stat.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/usr/bin/python
  2. """Utility to generate character statistics about a number of source files"""
  3. # Copyright Abel Sinkovics (abel@sinkovics.hu) 2016.
  4. # Distributed under the Boost Software License, Version 1.0.
  5. # (See accompanying file LICENSE_1_0.txt or copy at
  6. # http://www.boost.org/LICENSE_1_0.txt)
  7. import argparse
  8. import os
  9. def count_characters(root, out):
  10. """Count the occurrances of the different characters in the files"""
  11. if os.path.isfile(root):
  12. with open(root, 'rb') as in_f:
  13. for line in in_f:
  14. for char in line:
  15. if char not in out:
  16. out[char] = 0
  17. out[char] = out[char] + 1
  18. elif os.path.isdir(root):
  19. for filename in os.listdir(root):
  20. count_characters(os.path.join(root, filename), out)
  21. def generate_statistics(root):
  22. """Generate the statistics from all files in root (recursively)"""
  23. out = dict()
  24. count_characters(root, out)
  25. return out
  26. def main():
  27. """The main function of the script"""
  28. desc = 'Generate character statistics from a source tree'
  29. parser = argparse.ArgumentParser(description=desc)
  30. parser.add_argument(
  31. '--src',
  32. dest='src',
  33. required=True,
  34. help='The root of the source tree'
  35. )
  36. parser.add_argument(
  37. '--out',
  38. dest='out',
  39. default='chars.py',
  40. help='The output filename'
  41. )
  42. args = parser.parse_args()
  43. stats = generate_statistics(args.src)
  44. with open(args.out, 'wb') as out_f:
  45. out_f.write('CHARS={0}\n'.format(stats))
  46. if __name__ == '__main__':
  47. main()