gaussian.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. // Copyright Jim Bosch 2010-2012.
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. #include <boost/python/numpy.hpp>
  6. #include <cmath>
  7. #include <memory>
  8. #ifndef M_PI
  9. #include <boost/math/constants/constants.hpp>
  10. const double M_PI = boost::math::constants::pi<double>();
  11. #endif
  12. namespace bp = boost::python;
  13. namespace bn = boost::python::numpy;
  14. /**
  15. * A 2x2 matrix class, purely for demonstration purposes.
  16. *
  17. * Instead of wrapping this class with Boost.Python, we'll convert it to/from numpy.ndarray.
  18. */
  19. class matrix2 {
  20. public:
  21. double & operator()(int i, int j) {
  22. return _data[i*2 + j];
  23. }
  24. double const & operator()(int i, int j) const {
  25. return _data[i*2 + j];
  26. }
  27. double const * data() const { return _data; }
  28. private:
  29. double _data[4];
  30. };
  31. /**
  32. * A 2-element vector class, purely for demonstration purposes.
  33. *
  34. * Instead of wrapping this class with Boost.Python, we'll convert it to/from numpy.ndarray.
  35. */
  36. class vector2 {
  37. public:
  38. double & operator[](int i) {
  39. return _data[i];
  40. }
  41. double const & operator[](int i) const {
  42. return _data[i];
  43. }
  44. double const * data() const { return _data; }
  45. vector2 operator+(vector2 const & other) const {
  46. vector2 r;
  47. r[0] = _data[0] + other[0];
  48. r[1] = _data[1] + other[1];
  49. return r;
  50. }
  51. vector2 operator-(vector2 const & other) const {
  52. vector2 r;
  53. r[0] = _data[0] - other[0];
  54. r[1] = _data[1] - other[1];
  55. return r;
  56. }
  57. private:
  58. double _data[2];
  59. };
  60. /**
  61. * Matrix-vector multiplication.
  62. */
  63. vector2 operator*(matrix2 const & m, vector2 const & v) {
  64. vector2 r;
  65. r[0] = m(0, 0) * v[0] + m(0, 1) * v[1];
  66. r[1] = m(1, 0) * v[0] + m(1, 1) * v[1];
  67. return r;
  68. }
  69. /**
  70. * Vector inner product.
  71. */
  72. double dot(vector2 const & v1, vector2 const & v2) {
  73. return v1[0] * v2[0] + v1[1] * v2[1];
  74. }
  75. /**
  76. * This class represents a simple 2-d Gaussian (Normal) distribution, defined by a
  77. * mean vector 'mu' and a covariance matrix 'sigma'.
  78. */
  79. class bivariate_gaussian {
  80. public:
  81. vector2 const & get_mu() const { return _mu; }
  82. matrix2 const & get_sigma() const { return _sigma; }
  83. /**
  84. * Evaluate the density of the distribution at a point defined by a two-element vector.
  85. */
  86. double operator()(vector2 const & p) const {
  87. vector2 u = _cholesky * (p - _mu);
  88. return 0.5 * _cholesky(0, 0) * _cholesky(1, 1) * std::exp(-0.5 * dot(u, u)) / M_PI;
  89. }
  90. /**
  91. * Evaluate the density of the distribution at an (x, y) point.
  92. */
  93. double operator()(double x, double y) const {
  94. vector2 p;
  95. p[0] = x;
  96. p[1] = y;
  97. return operator()(p);
  98. }
  99. /**
  100. * Construct from a mean vector and covariance matrix.
  101. */
  102. bivariate_gaussian(vector2 const & mu, matrix2 const & sigma)
  103. : _mu(mu), _sigma(sigma), _cholesky(compute_inverse_cholesky(sigma))
  104. {}
  105. private:
  106. /**
  107. * This evaluates the inverse of the Cholesky factorization of a 2x2 matrix;
  108. * it's just a shortcut in evaluating the density.
  109. */
  110. static matrix2 compute_inverse_cholesky(matrix2 const & m) {
  111. matrix2 l;
  112. // First do cholesky factorization: l l^t = m
  113. l(0, 0) = std::sqrt(m(0, 0));
  114. l(0, 1) = m(0, 1) / l(0, 0);
  115. l(1, 1) = std::sqrt(m(1, 1) - l(0,1) * l(0,1));
  116. // Now do forward-substitution (in-place) to invert:
  117. l(0, 0) = 1.0 / l(0, 0);
  118. l(1, 0) = l(0, 1) = -l(0, 1) / l(1, 1);
  119. l(1, 1) = 1.0 / l(1, 1);
  120. return l;
  121. }
  122. vector2 _mu;
  123. matrix2 _sigma;
  124. matrix2 _cholesky;
  125. };
  126. /*
  127. * We have a two options for wrapping get_mu and get_sigma into NumPy-returning Python methods:
  128. * - we could deep-copy the data, making totally new NumPy arrays;
  129. * - we could make NumPy arrays that point into the existing memory.
  130. * The latter is often preferable, especially if the arrays are large, but it's dangerous unless
  131. * the reference counting is correct: the returned NumPy array needs to hold a reference that
  132. * keeps the memory it points to from being deallocated as long as it is alive. This is what the
  133. * "owner" argument to from_data does - the NumPy array holds a reference to the owner, keeping it
  134. * from being destroyed.
  135. *
  136. * Note that this mechanism isn't completely safe for data members that can have their internal
  137. * storage reallocated. A std::vector, for instance, can be invalidated when it is resized,
  138. * so holding a Python reference to a C++ class that holds a std::vector may not be a guarantee
  139. * that the memory in the std::vector will remain valid.
  140. */
  141. /**
  142. * These two functions are custom wrappers for get_mu and get_sigma, providing the shallow-copy
  143. * conversion with reference counting described above.
  144. *
  145. * It's also worth noting that these return NumPy arrays that cannot be modified in Python;
  146. * the const overloads of vector::data() and matrix::data() return const references,
  147. * and passing a const pointer to from_data causes NumPy's 'writeable' flag to be set to false.
  148. */
  149. static bn::ndarray py_get_mu(bp::object const & self) {
  150. vector2 const & mu = bp::extract<bivariate_gaussian const &>(self)().get_mu();
  151. return bn::from_data(
  152. mu.data(),
  153. bn::dtype::get_builtin<double>(),
  154. bp::make_tuple(2),
  155. bp::make_tuple(sizeof(double)),
  156. self
  157. );
  158. }
  159. static bn::ndarray py_get_sigma(bp::object const & self) {
  160. matrix2 const & sigma = bp::extract<bivariate_gaussian const &>(self)().get_sigma();
  161. return bn::from_data(
  162. sigma.data(),
  163. bn::dtype::get_builtin<double>(),
  164. bp::make_tuple(2, 2),
  165. bp::make_tuple(2 * sizeof(double), sizeof(double)),
  166. self
  167. );
  168. }
  169. /**
  170. * To allow the constructor to work, we need to define some from-Python converters from NumPy arrays
  171. * to the matrix/vector types. The rvalue-from-python functionality is not well-documented in Boost.Python
  172. * itself; you can learn more from boost/python/converter/rvalue_from_python_data.hpp.
  173. */
  174. /**
  175. * We start with two functions that just copy a NumPy array into matrix/vector objects. These will be used
  176. * in the templated converted below. The first just uses the operator[] overloads provided by
  177. * bp::object.
  178. */
  179. static void copy_ndarray_to_mv2(bn::ndarray const & array, vector2 & vec) {
  180. vec[0] = bp::extract<double>(array[0]);
  181. vec[1] = bp::extract<double>(array[1]);
  182. }
  183. /**
  184. * Here, we'll take the alternate approach of using the strides to access the array's memory directly.
  185. * This can be much faster for large arrays.
  186. */
  187. static void copy_ndarray_to_mv2(bn::ndarray const & array, matrix2 & mat) {
  188. // Unfortunately, get_strides() can't be inlined, so it's best to call it once up-front.
  189. Py_intptr_t const * strides = array.get_strides();
  190. for (int i = 0; i < 2; ++i) {
  191. for (int j = 0; j < 2; ++j) {
  192. mat(i, j) = *reinterpret_cast<double const *>(array.get_data() + i * strides[0] + j * strides[1]);
  193. }
  194. }
  195. }
  196. /**
  197. * Here's the actual converter. Because we've separated the differences into the above functions,
  198. * we can write a single template class that works for both matrix2 and vector2.
  199. */
  200. template <typename T, int N>
  201. struct mv2_from_python {
  202. /**
  203. * Register the converter.
  204. */
  205. mv2_from_python() {
  206. bp::converter::registry::push_back(
  207. &convertible,
  208. &construct,
  209. bp::type_id< T >()
  210. );
  211. }
  212. /**
  213. * Test to see if we can convert this to the desired type; if not return zero.
  214. * If we can convert, returned pointer can be used by construct().
  215. */
  216. static void * convertible(PyObject * p) {
  217. try {
  218. bp::object obj(bp::handle<>(bp::borrowed(p)));
  219. std::auto_ptr<bn::ndarray> array(
  220. new bn::ndarray(
  221. bn::from_object(obj, bn::dtype::get_builtin<double>(), N, N, bn::ndarray::V_CONTIGUOUS)
  222. )
  223. );
  224. if (array->shape(0) != 2) return 0;
  225. if (N == 2 && array->shape(1) != 2) return 0;
  226. return array.release();
  227. } catch (bp::error_already_set & err) {
  228. bp::handle_exception();
  229. return 0;
  230. }
  231. }
  232. /**
  233. * Finish the conversion by initializing the C++ object into memory prepared by Boost.Python.
  234. */
  235. static void construct(PyObject * obj, bp::converter::rvalue_from_python_stage1_data * data) {
  236. // Extract the array we passed out of the convertible() member function.
  237. std::auto_ptr<bn::ndarray> array(reinterpret_cast<bn::ndarray*>(data->convertible));
  238. // Find the memory block Boost.Python has prepared for the result.
  239. typedef bp::converter::rvalue_from_python_storage<T> storage_t;
  240. storage_t * storage = reinterpret_cast<storage_t*>(data);
  241. // Use placement new to initialize the result.
  242. T * m_or_v = new (storage->storage.bytes) T();
  243. // Fill the result with the values from the NumPy array.
  244. copy_ndarray_to_mv2(*array, *m_or_v);
  245. // Finish up.
  246. data->convertible = storage->storage.bytes;
  247. }
  248. };
  249. BOOST_PYTHON_MODULE(gaussian) {
  250. bn::initialize();
  251. // Register the from-python converters
  252. mv2_from_python< vector2, 1 >();
  253. mv2_from_python< matrix2, 2 >();
  254. typedef double (bivariate_gaussian::*call_vector)(vector2 const &) const;
  255. bp::class_<bivariate_gaussian>("bivariate_gaussian", bp::init<bivariate_gaussian const &>())
  256. // Declare the constructor (wouldn't work without the from-python converters).
  257. .def(bp::init< vector2 const &, matrix2 const & >())
  258. // Use our custom reference-counting getters
  259. .add_property("mu", &py_get_mu)
  260. .add_property("sigma", &py_get_sigma)
  261. // First overload accepts a two-element array argument
  262. .def("__call__", (call_vector)&bivariate_gaussian::operator())
  263. // This overload works like a binary NumPy universal function: you can pass
  264. // in scalars or arrays, and the C++ function will automatically be called
  265. // on each element of an array argument.
  266. .def("__call__", bn::binary_ufunc<bivariate_gaussian,double,double,double>::make())
  267. ;
  268. }