// Copyright Nick Thompson, 2019 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_MATH_QUADRATURE_DETAIL_OOURA_FOURIER_INTEGRALS_DETAIL_HPP #define BOOST_MATH_QUADRATURE_DETAIL_OOURA_FOURIER_INTEGRALS_DETAIL_HPP #include // for std::pair. #include #include #include #include #include #include #include #include namespace boost { namespace math { namespace quadrature { namespace detail { // Ooura and Mori, A robust double exponential formula for Fourier-type integrals, // eta is the argument to the exponential in equation 3.3: template std::pair ooura_eta(Real x, Real alpha) { using std::expm1; using std::exp; using std::abs; Real expx = exp(x); Real eta_prime = 2 + alpha/expx + expx/4; Real eta; // This is the fast branch: if (abs(x) > 0.125) { eta = 2*x - alpha*(1/expx - 1) + (expx - 1)/4; } else {// this is the slow branch using expm1 for small x: eta = 2*x - alpha*expm1(-x) + expm1(x)/4; } return {eta, eta_prime}; } // Ooura and Mori, A robust double exponential formula for Fourier-type integrals, // equation 3.6: template Real calculate_ooura_alpha(Real h) { using boost::math::constants::pi; using std::log1p; using std::sqrt; Real x = sqrt(16 + 4*log1p(pi()/h)/h); return 1/x; } template std::pair ooura_sin_node_and_weight(long n, Real h, Real alpha) { using std::expm1; using std::exp; using std::abs; using boost::math::constants::pi; using std::isnan; if (n == 0) { // Equation 44 of https://arxiv.org/pdf/0911.4796.pdf // Fourier Transform of the Stretched Exponential Function: Analytic Error Bounds, // Double Exponential Transform, and Open-Source Implementation, // Joachim Wuttke, // The C library libkww provides functions to compute the Kohlrausch-Williams-Watts function, // the Laplace-Fourier transform of the stretched (or compressed) exponential function exp(-t^beta) // for exponent beta between 0.1 and 1.9 with sixteen decimal digits accuracy. Real eta_prime_0 = Real(2) + alpha + Real(1)/Real(4); Real node = pi()/(eta_prime_0*h); Real weight = pi()*boost::math::sin_pi(1/(eta_prime_0*h)); Real eta_dbl_prime = -alpha + Real(1)/Real(4); Real phi_prime_0 = (1 - eta_dbl_prime/(eta_prime_0*eta_prime_0))/2; weight *= phi_prime_0; return {node, weight}; } Real x = n*h; auto p = ooura_eta(x, alpha); auto eta = p.first; auto eta_prime = p.second; Real expm1_meta = expm1(-eta); Real exp_meta = exp(-eta); Real node = -n*pi()/expm1_meta; // I have verified that this is not a significant source of inaccuracy in the weight computation: Real phi_prime = -(expm1_meta + x*exp_meta*eta_prime)/(expm1_meta*expm1_meta); // The main source of inaccuracy is in computation of sin_pi. // But I've agonized over this, and I think it's as good as it can get: Real s = pi(); Real arg; if(eta > 1) { arg = n/( 1/exp_meta - 1 ); s *= boost::math::sin_pi(arg); if (n&1) { s *= -1; } } else if (eta < -1) { arg = n/(1-exp_meta); s *= boost::math::sin_pi(arg); } else { arg = -n*exp_meta/expm1_meta; s *= boost::math::sin_pi(arg); if (n&1) { s *= -1; } } Real weight = s*phi_prime; return {node, weight}; } #ifdef BOOST_MATH_INSTRUMENT_OOURA template void print_ooura_estimate(size_t i, Real I0, Real I1, Real omega) { using std::abs; std::cout << std::defaultfloat << std::setprecision(std::numeric_limits::digits10) << std::fixed; std::cout << "h = " << Real(1)/Real(1<::epsilon() * 2) { throw std::domain_error("The relative error goal cannot be smaller than the unit roundoff."); } using std::abs; requested_levels_ = levels; starting_level_ = 0; rel_err_goal_ = relative_error_goal; big_nodes_.reserve(levels); bweights_.reserve(levels); little_nodes_.reserve(levels); lweights_.reserve(levels); for (size_t i = 0; i < levels; ++i) { if (std::is_same::value) { add_level(i); } else if (std::is_same::value) { add_level(i); } else { add_level(i); } } } std::vector> const & big_nodes() const { return big_nodes_; } std::vector> const & weights_for_big_nodes() const { return bweights_; } std::vector> const & little_nodes() const { return little_nodes_; } std::vector> const & weights_for_little_nodes() const { return lweights_; } template std::pair integrate(F const & f, Real omega) { using std::abs; using std::max; using boost::math::constants::pi; if (omega == 0) { return {Real(0), Real(0)}; } if (omega < 0) { auto p = this->integrate(f, -omega); return {-p.first, p.second}; } Real I1 = std::numeric_limits::quiet_NaN(); Real relative_error_estimate = std::numeric_limits::quiet_NaN(); // As we compute integrals, we learn about their structure. // Assuming we compute f(t)sin(wt) for many different omega, this gives some // a posteriori ability to choose a refinement level that is roughly appropriate. size_t i = starting_level_; do { Real I0 = estimate_integral(f, omega, i); #ifdef BOOST_MATH_INSTRUMENT_OOURA print_ooura_estimate(i, I0, I1, omega); #endif Real absolute_error_estimate = abs(I0-I1); Real scale = (max)(abs(I0), abs(I1)); if (!isnan(I1) && absolute_error_estimate <= rel_err_goal_*scale) { starting_level_ = (max)(long(i) - 1, long(0)); return {I0/omega, absolute_error_estimate/scale}; } I1 = I0; } while(++i < big_nodes_.size()); // We've used up all our precomputed levels. // Now we need to add more. // It might seems reasonable to just keep adding levels indefinitely, if that's what the user wants. // But in fact the nodes and weights just merge into each other and the error gets worse after a certain number. // This value for max_additional_levels was chosen by observation of a slowly converging oscillatory integral: // f(x) := cos(7cos(x))sin(x)/x size_t max_additional_levels = 4; while (big_nodes_.size() < requested_levels_ + max_additional_levels) { size_t ii = big_nodes_.size(); if (std::is_same::value) { add_level(ii); } else if (std::is_same::value) { add_level(ii); } else { add_level(ii); } Real I0 = estimate_integral(f, omega, ii); Real absolute_error_estimate = abs(I0-I1); Real scale = (max)(abs(I0), abs(I1)); #ifdef BOOST_MATH_INSTRUMENT_OOURA print_ooura_estimate(ii, I0, I1, omega); #endif if (absolute_error_estimate <= rel_err_goal_*scale) { starting_level_ = (max)(long(ii) - 1, long(0)); return {I0/omega, absolute_error_estimate/scale}; } I1 = I0; ++ii; } starting_level_ = static_cast(big_nodes_.size() - 2); return {I1/omega, relative_error_estimate}; } private: template void add_level(size_t i) { using std::abs; size_t current_num_levels = big_nodes_.size(); Real unit_roundoff = std::numeric_limits::epsilon()/2; // h0 = 1. Then all further levels have h_i = 1/2^i. // Since the nodes don't nest, we could conceivably divide h by (say) 1.5, or 3. // It's not clear how much benefit (or loss) would be obtained from this. PreciseReal h = PreciseReal(1)/PreciseReal(1< bnode_row; std::vector bweight_row; // This is a pretty good estimate for how many elements will be placed in the vector: bnode_row.reserve((static_cast(1)<(1)< lnode_row; std::vector lweight_row; lnode_row.reserve((static_cast(1)<(1)<(precise_nw.first); Real weight = static_cast(precise_nw.second); w = weight; if (bnode_row.size() == bnode_row.capacity()) { bnode_row.reserve(2*bnode_row.size()); bweight_row.reserve(2*bnode_row.size()); } bnode_row.push_back(node); bweight_row.push_back(weight); if (abs(weight) > max_weight) { max_weight = abs(weight); } ++n; // f(t)->0 as t->infty, which is why the weights are computed up to the unit roundoff. } while(abs(w) > unit_roundoff*max_weight); // This class tends to consume a lot of memory; shrink the vectors back down to size: bnode_row.shrink_to_fit(); bweight_row.shrink_to_fit(); // Why we are splitting the nodes into regimes where t_n >> 1 and t_n << 1? // It will create the opportunity to sensibly truncate the quadrature sum to significant terms. n = -1; do { auto precise_nw = ooura_sin_node_and_weight(n, h, alpha); Real node = static_cast(precise_nw.first); if (node <= 0) { break; } Real weight = static_cast(precise_nw.second); w = weight; using std::isnan; if (isnan(node)) { // This occurs at n = -11 in quad precision: break; } if (lnode_row.size() > 0) { if (lnode_row[lnode_row.size()-1] == node) { // The nodes have fused into each other: break; } } if (lnode_row.size() == lnode_row.capacity()) { lnode_row.reserve(2*lnode_row.size()); lweight_row.reserve(2*lnode_row.size()); } lnode_row.push_back(node); lweight_row.push_back(weight); if (abs(weight) > max_weight) { max_weight = abs(weight); } --n; // f(t)->infty is possible as t->0, hence compute up to the min. } while(abs(w) > (std::numeric_limits::min)()*max_weight); lnode_row.shrink_to_fit(); lweight_row.shrink_to_fit(); // std::scoped_lock once C++17 is more common? std::lock_guard lock(node_weight_mutex_); // Another thread might have already finished this calculation and appended it to the nodes/weights: if (current_num_levels == big_nodes_.size()) { big_nodes_.push_back(bnode_row); bweights_.push_back(bweight_row); little_nodes_.push_back(lnode_row); lweights_.push_back(lweight_row); } } template Real estimate_integral(F const & f, Real omega, size_t i) { // Because so few function evaluations are required to get high accuracy on the integrals in the tests, // Kahan summation doesn't really help. //auto cond = boost::math::tools::summation_condition_number(0); Real I0 = 0; auto const & b_nodes = big_nodes_[i]; auto const & b_weights = bweights_[i]; // Will benchmark if this is helpful: Real inv_omega = 1/omega; for(size_t j = 0 ; j < b_nodes.size(); ++j) { I0 += f(b_nodes[j]*inv_omega)*b_weights[j]; } auto const & l_nodes = little_nodes_[i]; auto const & l_weights = lweights_[i]; // If f decays rapidly as |t|->infty, not all of these calls are necessary. for (size_t j = 0; j < l_nodes.size(); ++j) { I0 += f(l_nodes[j]*inv_omega)*l_weights[j]; } return I0; } std::mutex node_weight_mutex_; // Nodes for n >= 0, giving t_n = pi*phi(nh)/h. Generally t_n >> 1. std::vector> big_nodes_; // The term bweights_ will indicate that these are weights corresponding // to the big nodes: std::vector> bweights_; // Nodes for n < 0: Generally t_n << 1, and an invariant is that t_n > 0. std::vector> little_nodes_; std::vector> lweights_; Real rel_err_goal_; std::atomic starting_level_; size_t requested_levels_; }; template class ooura_fourier_cos_detail { public: ooura_fourier_cos_detail(const Real relative_error_goal, size_t levels) { #ifdef BOOST_MATH_INSTRUMENT_OOURA std::cout << "ooura_fourier_cos with relative error goal " << relative_error_goal << " & " << levels << " levels." << std::endl; std::cout << "epsilon for type = " << std::numeric_limits::epsilon() << std::endl; #endif // BOOST_MATH_INSTRUMENT_OOURA if (relative_error_goal < std::numeric_limits::epsilon() * 2) { throw std::domain_error("The relative error goal cannot be smaller than the unit roundoff!"); } using std::abs; requested_levels_ = levels; starting_level_ = 0; rel_err_goal_ = relative_error_goal; big_nodes_.reserve(levels); bweights_.reserve(levels); little_nodes_.reserve(levels); lweights_.reserve(levels); for (size_t i = 0; i < levels; ++i) { if (std::is_same::value) { add_level(i); } else if (std::is_same::value) { add_level(i); } else { add_level(i); } } } template std::pair integrate(F const & f, Real omega) { using std::abs; using std::max; using boost::math::constants::pi; if (omega == 0) { throw std::domain_error("At omega = 0, the integral is not oscillatory. The user must choose an appropriate method for this case.\n"); } if (omega < 0) { return this->integrate(f, -omega); } Real I1 = std::numeric_limits::quiet_NaN(); Real absolute_error_estimate = std::numeric_limits::quiet_NaN(); Real scale = std::numeric_limits::quiet_NaN(); size_t i = starting_level_; do { Real I0 = estimate_integral(f, omega, i); #ifdef BOOST_MATH_INSTRUMENT_OOURA print_ooura_estimate(i, I0, I1, omega); #endif absolute_error_estimate = abs(I0-I1); scale = (max)(abs(I0), abs(I1)); if (!isnan(I1) && absolute_error_estimate <= rel_err_goal_*scale) { starting_level_ = (max)(long(i) - 1, long(0)); return {I0/omega, absolute_error_estimate/scale}; } I1 = I0; } while(++i < big_nodes_.size()); size_t max_additional_levels = 4; while (big_nodes_.size() < requested_levels_ + max_additional_levels) { size_t ii = big_nodes_.size(); if (std::is_same::value) { add_level(ii); } else if (std::is_same::value) { add_level(ii); } else { add_level(ii); } Real I0 = estimate_integral(f, omega, ii); #ifdef BOOST_MATH_INSTRUMENT_OOURA print_ooura_estimate(ii, I0, I1, omega); #endif absolute_error_estimate = abs(I0-I1); scale = (max)(abs(I0), abs(I1)); if (absolute_error_estimate <= rel_err_goal_*scale) { starting_level_ = (max)(long(ii) - 1, long(0)); return {I0/omega, absolute_error_estimate/scale}; } I1 = I0; ++ii; } starting_level_ = static_cast(big_nodes_.size() - 2); return {I1/omega, absolute_error_estimate/scale}; } private: template void add_level(size_t i) { using std::abs; size_t current_num_levels = big_nodes_.size(); Real unit_roundoff = std::numeric_limits::epsilon()/2; PreciseReal h = PreciseReal(1)/PreciseReal(1< bnode_row; std::vector bweight_row; bnode_row.reserve((static_cast(1)<(1)< lnode_row; std::vector lweight_row; lnode_row.reserve((static_cast(1)<(1)<(precise_nw.first); Real weight = static_cast(precise_nw.second); w = weight; if (bnode_row.size() == bnode_row.capacity()) { bnode_row.reserve(2*bnode_row.size()); bweight_row.reserve(2*bnode_row.size()); } bnode_row.push_back(node); bweight_row.push_back(weight); if (abs(weight) > max_weight) { max_weight = abs(weight); } ++n; // f(t)->0 as t->infty, which is why the weights are computed up to the unit roundoff. } while(abs(w) > unit_roundoff*max_weight); bnode_row.shrink_to_fit(); bweight_row.shrink_to_fit(); n = -1; do { auto precise_nw = ooura_cos_node_and_weight(n, h, alpha); Real node = static_cast(precise_nw.first); // The function cannot be singular at zero, // so zero is not a unreasonable node, // unlike in the case of the Fourier Sine. // Hence only break if the node is negative. if (node < 0) { break; } Real weight = static_cast(precise_nw.second); w = weight; if (lnode_row.size() > 0) { if (lnode_row.back() == node) { // The nodes have fused into each other: break; } } if (lnode_row.size() == lnode_row.capacity()) { lnode_row.reserve(2*lnode_row.size()); lweight_row.reserve(2*lnode_row.size()); } lnode_row.push_back(node); lweight_row.push_back(weight); if (abs(weight) > max_weight) { max_weight = abs(weight); } --n; } while(abs(w) > (std::numeric_limits::min)()*max_weight); lnode_row.shrink_to_fit(); lweight_row.shrink_to_fit(); std::lock_guard lock(node_weight_mutex_); // Another thread might have already finished this calculation and appended it to the nodes/weights: if (current_num_levels == big_nodes_.size()) { big_nodes_.push_back(bnode_row); bweights_.push_back(bweight_row); little_nodes_.push_back(lnode_row); lweights_.push_back(lweight_row); } } template Real estimate_integral(F const & f, Real omega, size_t i) { Real I0 = 0; auto const & b_nodes = big_nodes_[i]; auto const & b_weights = bweights_[i]; Real inv_omega = 1/omega; for(size_t j = 0 ; j < b_nodes.size(); ++j) { I0 += f(b_nodes[j]*inv_omega)*b_weights[j]; } auto const & l_nodes = little_nodes_[i]; auto const & l_weights = lweights_[i]; for (size_t j = 0; j < l_nodes.size(); ++j) { I0 += f(l_nodes[j]*inv_omega)*l_weights[j]; } return I0; } std::mutex node_weight_mutex_; std::vector> big_nodes_; std::vector> bweights_; std::vector> little_nodes_; std::vector> lweights_; Real rel_err_goal_; std::atomic starting_level_; size_t requested_levels_; }; }}}} #endif