123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- // Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2005.
- // Distributed under the Boost Software License, Version 1.0.
- // (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- #include <boost/parameter/name.hpp>
- #include <boost/config/workaround.hpp>
- #include <boost/timer.hpp>
- #include <iostream>
- namespace test {
- //
- // This test measures the abstraction overhead of using the named
- // parameter interface. Some actual test results have been recorded
- // in timings.txt in this source file's directory, or
- // http://www.boost.org/libs/parameter/test/timings.txt.
- //
- // Caveats:
- //
- // 1. This test penalizes the named parameter library slightly, by
- // passing two arguments through the named interface, while
- // only passing one through the plain C++ interface.
- //
- // 2. This test does not measure the case where an ArgumentPack is
- // so large that it doesn't fit in the L1 cache.
- //
- // 3. Although we've tried to make this test as general as possible,
- // we are targeting it at a specific application. Where that
- // affects design decisions, we've noted it below in ***...***.
- //
- // 4. The first time you run this program, the time may not be
- // representative because of disk and memory cache effects, so
- // always run it multiple times and ignore the first
- // measurement. This approach will also allow you to estimate
- // the statistical error of your test by observing the
- // variation in the valid times.
- //
- // 5. Try to run this program on a machine that's otherwise idle,
- // or other processes and even device hardware interrupts may
- // interfere by causing caches to be flushed.
- // Accumulator function object with plain C++ interface
- template <typename T>
- struct plain_weight_running_total
- {
- plain_weight_running_total()
- #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
- : sum(T())
- #else
- : sum()
- #endif
- {
- }
- void operator()(T w)
- {
- this->sum += w;
- }
- T sum;
- };
- BOOST_PARAMETER_NAME(weight)
- BOOST_PARAMETER_NAME(value)
- // Accumulator function object with named parameter interface
- template <typename T>
- struct named_param_weight_running_total
- {
- named_param_weight_running_total()
- #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
- : sum(T())
- #else
- : sum()
- #endif
- {
- }
- template <typename ArgumentPack>
- void operator()(ArgumentPack const& variates)
- {
- this->sum += variates[test::_weight];
- }
- T sum;
- };
- // This value is required to ensure that a smart compiler's dead code
- // elimination doesn't optimize away anything we're testing. We'll use it
- // to compute the return code of the executable to make sure it's needed.
- double live_code;
- // Call objects of the given Accumulator type repeatedly
- // with x an argument.
- template <typename Accumulator, typename Arg>
- void hammer(Arg const& x, long const repeats)
- {
- // Strategy: because the sum in an accumulator after each call
- // depends on the previous value of the sum, the CPU's pipeline
- // might be stalled while waiting for the previous addition to
- // complete. Therefore, we allocate an array of accumulators,
- // and update them in sequence, so that there's no dependency
- // between adjacent addition operations.
- //
- // Additionally, if there were only one accumulator, the compiler or
- // CPU might decide to update the value in a register rather than
- // writing it back to memory. We want each operation to at least
- // update the L1 cache. *** Note: This concern is specific to the
- // particular application at which we're targeting the test. ***
- // This has to be at least as large as the number of simultaneous
- // accumulations that can be executing in the compiler pipeline. A
- // safe number here is larger than the machine's maximum pipeline
- // depth. If you want to test the L2 or L3 cache, or main memory,
- // you can increase the size of this array. 1024 is an upper limit
- // on the pipeline depth of current vector machines.
- std::size_t const number_of_accumulators = 1024;
- Accumulator a[number_of_accumulators];
- for (long iteration = 0; iteration < repeats; ++iteration)
- {
- for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
- {
- (*ap)(x);
- }
- }
- // Accumulate all the partial sums to avoid dead code elimination.
- for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
- {
- test::live_code += ap->sum;
- }
- }
- // Measure the time required to hammer accumulators of the given
- // type with the argument x.
- template <typename Accumulator, typename T>
- double measure(T const& x, long const repeats)
- {
- // Hammer accumulators a couple of times to ensure the instruction
- // cache is full of our test code, and that we don't measure the cost
- // of a page fault for accessing the data page containing the memory
- // where the accumulators will be allocated.
- test::hammer<Accumulator>(x, repeats);
- test::hammer<Accumulator>(x, repeats);
- // Now start a timer.
- boost::timer time;
- test::hammer<Accumulator>(x, repeats); // This time, we'll measure.
- return time.elapsed();
- }
- }
- int main()
- {
- // First decide how many repetitions to measure.
- long repeats = 100;
- double measured = 0;
- while (measured < 1.0 && repeats <= 10000000)
- {
- repeats *= 10;
- boost::timer time;
- test::hammer<test::plain_weight_running_total<double> >(.1, repeats);
- test::hammer<test::named_param_weight_running_total<double> >(
- (test::_weight = .1, test::_value = .2), repeats
- );
- measured = time.elapsed();
- }
-
- std::cout
- << "plain time: "
- << test::measure<test::plain_weight_running_total<double> >(
- .1, repeats
- )
- << std::endl;
- std::cout
- << "named parameter time: "
- << test::measure<test::named_param_weight_running_total<double> >(
- (test::_weight = .1, test::_value = .2), repeats
- )
- << std::endl;
- // This is ultimately responsible for preventing all the test code
- // from being optimized away. Change this to return 0 and you
- // unplug the whole test's life support system.
- return test::live_code < 0.;
- }
|