Added Template line profiler. Fixed documentation.
This commit is contained in:
parent
33ea40a02f
commit
6a13eb0c44
2 changed files with 204 additions and 8 deletions
6
Makefile
6
Makefile
|
|
@ -3,7 +3,7 @@ C = clang
|
||||||
COMMON_FLAGS = -Wall -MMD
|
COMMON_FLAGS = -Wall -MMD
|
||||||
C_FLAGS = $(COMMON_FLAGS)
|
C_FLAGS = $(COMMON_FLAGS)
|
||||||
CC = clang++
|
CC = clang++
|
||||||
CC_FLAGS = $(COMMON_FLAGS) -std=c++17
|
CC_FLAGS = $(COMMON_FLAGS) -std=c++17 # -funsafe-math-optimizations
|
||||||
LD_FLAGS =
|
LD_FLAGS =
|
||||||
INCLUDES =
|
INCLUDES =
|
||||||
|
|
||||||
|
|
@ -18,11 +18,11 @@ OBJECTS = $(SOURCES:.cpp=.o)
|
||||||
$(EXEC): $(COBJECTS) $(OBJECTS)
|
$(EXEC): $(COBJECTS) $(OBJECTS)
|
||||||
$(CC) $(COBJECTS) $(OBJECTS) -o $(EXEC) $(LD_FLAGS)
|
$(CC) $(COBJECTS) $(OBJECTS) -o $(EXEC) $(LD_FLAGS)
|
||||||
|
|
||||||
# To obtain object files
|
# To obtain C++ object files
|
||||||
%.o: %.cpp
|
%.o: %.cpp
|
||||||
$(CC) $(INCLUDES) $(CC_FLAGS) -o $@ -c $<
|
$(CC) $(INCLUDES) $(CC_FLAGS) -o $@ -c $<
|
||||||
|
|
||||||
# To obtain object files
|
# To obtain C object files
|
||||||
%.o: %.c
|
%.o: %.c
|
||||||
$(C) $(INCLUDES) $(C_FLAGS) -o $@ -c $<
|
$(C) $(INCLUDES) $(C_FLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
|
|
||||||
206
utils.hpp
206
utils.hpp
|
|
@ -28,7 +28,7 @@ std::ostream& operator<<(std::ostream& os, const C<T,Args...>& objs)
|
||||||
return os;
|
return os;
|
||||||
}//*/
|
}//*/
|
||||||
|
|
||||||
/// define UTILS_NO_CHRONO to deactivate chronometer
|
// Define UTILS_NO_CHRONO to deactivate the chronometer.
|
||||||
#ifndef UTILS_NO_CHRONO
|
#ifndef UTILS_NO_CHRONO
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
@ -36,15 +36,18 @@ std::ostream& operator<<(std::ostream& os, const C<T,Args...>& objs)
|
||||||
#include <ratio>
|
#include <ratio>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
|
||||||
/// Convenience macro to create a Chronometer object that will measure the execution time of its scope.
|
#define TIMER(str) Chronometer __chrono((str)); ///< Convenience macro to create a Chronometer object that will measure the execution time of its scope.
|
||||||
#define TIMER(str) Chronometer __chrono((str))
|
|
||||||
|
|
||||||
/// Convenience macro to create a Chronometer object that will measure the execution time of its scope, with a for-loop enveloping the code to time.
|
/// Convenience macro to create a Chronometer object that will measure the execution time of its scope, with a for-loop enveloping the code to time.
|
||||||
///
|
///
|
||||||
/// Usage : TIMER_FOR("My timer", N, /* The code will be executed N times ! */)
|
/// Usage : `TIMER_FOR("My timer", N, /* The code will be executed N times ! */)`
|
||||||
#define TIMER_FOR(str, N, code) {Chronometer __chrono((str)); for(size_t i_TIMER_FOR = 0 ; i_TIMER_FOR < N ; i_TIMER_FOR++) {code} }
|
#define TIMER_FOR(str, N, code) {Chronometer __chrono((str)); for(size_t i_TIMER_FOR = 0 ; i_TIMER_FOR < N ; i_TIMER_FOR++) {code} }
|
||||||
|
|
||||||
/// \class Chronometer class. Used to measure the execution time of its scope.
|
/// \brief Chronometer class. Used to measure the execution time of its scope. Uses C++11 <chrono>.
|
||||||
|
///
|
||||||
|
/// The chronometer can be disabled by defining `UTILS_NO_CHRONO` before including the header. All the chronometer macros will be disabled safely and can be left in the code.
|
||||||
|
///
|
||||||
|
/// It is recommended to use the macros to use the Chronometer class.
|
||||||
///
|
///
|
||||||
/// Usage :
|
/// Usage :
|
||||||
/// \code{.cpp}
|
/// \code{.cpp}
|
||||||
|
|
@ -72,6 +75,13 @@ std::ostream& operator<<(std::ostream& os, const C<T,Args...>& objs)
|
||||||
/// } // no message is displayed because displayAtDestruction_ is set to false
|
/// } // no message is displayed because displayAtDestruction_ is set to false
|
||||||
/// \endcode
|
/// \endcode
|
||||||
///
|
///
|
||||||
|
/// A loop-macro for averaging time measurements is provided for convenience :
|
||||||
|
/// \code{.cpp}
|
||||||
|
/// TIMER_FOR("My timer", N, /* Code with varying execution time that will be executed N times. */)
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// It allows easy averaging of a code that would have a highly varying execution time.
|
||||||
|
///
|
||||||
template<int T=1>
|
template<int T=1>
|
||||||
struct Chronometer
|
struct Chronometer
|
||||||
{
|
{
|
||||||
|
|
@ -122,4 +132,190 @@ struct Chronometer
|
||||||
#define TIMER_FOR(str, N, code) { for(size_t i_TIMER_FOR = 0 ; i_TIMER_FOR < N ; i_TIMER_FOR++) {code} }
|
#define TIMER_FOR(str, N, code) { for(size_t i_TIMER_FOR = 0 ; i_TIMER_FOR < N ; i_TIMER_FOR++) {code} }
|
||||||
#endif// chronometer
|
#endif// chronometer
|
||||||
|
|
||||||
|
// Define UTILS_NO_PROFILER to deactivate the profiler.
|
||||||
|
#ifndef UTILS_NO_PROFILER
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <chrono>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
/// Maximum number of lines to store. If the file you want to profile has more lines than this value, change the value of this define before including the header.
|
||||||
|
#ifndef UTILS_PROFILER_NMAX
|
||||||
|
#define UTILS_PROFILER_NMAX 1000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define P_LINE_INIT() Profiler _line_profiler(__FILE__); ///< Initializes the profiler object.
|
||||||
|
#define P_LINE_BEGIN() _line_profiler.Reset(); ///< Resets the timer of the profiler object.
|
||||||
|
#define P_LINE() _line_profiler.ProfileLine(__LINE__); ///< Measures the time elapsed since the last timer reset.
|
||||||
|
#define P_LINE_DISPLAY() _line_profiler.DisplayData(); ///< Prints the profiler results to the console.
|
||||||
|
|
||||||
|
/// \brief Profiler class. This class profiles a code line by line. It is intrusive : you need to modify your code in order to profile it.
|
||||||
|
///
|
||||||
|
/// Note that the presence of the profiler prevents the compiler from optimizing parts of the code, resulting in a much slower execution.
|
||||||
|
/// The profiler also adds overhead. In the example below, the profiled code is about 40 times slower when the profiler is active.
|
||||||
|
///
|
||||||
|
/// The profiler stores the measured execution time of each line in a static array stored on the stack. The size of this array is `UTILS_PROFILER_NMAX`, with a default value of 1000.
|
||||||
|
/// If the file you want to profile has more lines than this value, change the value of this define before including the header.
|
||||||
|
///
|
||||||
|
/// In order to use the profiler, it is recommended to use the provided macros : `P_LINE_INIT(), P_LINE_BEGIN(), P_LINE(), and P_LINE_DISPLAY()`.
|
||||||
|
///
|
||||||
|
/// The profiler can be disabled by defining `UTILS_NO_PROFILER` before including the header. The profiler macros can be left in the code without impacting its performance.
|
||||||
|
///
|
||||||
|
/// Here is an example of use :
|
||||||
|
/// \code{.cpp}
|
||||||
|
/// P_LINE_INIT() // Initializes the profiler object
|
||||||
|
/// {TIMER("Some code"); P_LINE()// Uses a regular timer to time the whole execution.
|
||||||
|
/// constexpr int N = 1000000; P_LINE()// Every P_LINE() call measures the time elapsed since the last reset.
|
||||||
|
/// float *a1 = new float[N]; P_LINE()// Every P_LINE() call also automatically resets the timer after measuring the time.
|
||||||
|
/// float *a2 = new float[N]; P_LINE()
|
||||||
|
///
|
||||||
|
/// for (size_t i = 0; i < N; i++) { P_LINE()// Placing the P_LINE() like so allows for the measurement of the condition checking and increment.
|
||||||
|
/// a1[i] = i; P_LINE()
|
||||||
|
/// a2[i] = N-i; P_LINE()
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// for (size_t i = 0; i < N; i++) { P_LINE()
|
||||||
|
/// float v = std::sqrt(a1[i]*a1[i] + a2[i]*a2[i]); P_LINE()
|
||||||
|
/// a1[i] = v; P_LINE()
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// /* Code that does not matter for profiling */
|
||||||
|
///
|
||||||
|
/// P_LINE_BEGIN() // Resets the timer of the profiler.
|
||||||
|
/// delete[] a1; P_LINE()// Measures only the time of this line (instead of everything from the last P_LINE() call).
|
||||||
|
/// delete[] a2; P_LINE()
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// // display results of the profiler
|
||||||
|
/// {TIMER("P_LINE_DISPLAY()");
|
||||||
|
/// P_LINE_DISPLAY()
|
||||||
|
/// }
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// Here is a possible output of the profiler :
|
||||||
|
/// \code
|
||||||
|
/// Some code : 0.199525752 s (+/- 1e-09 s)
|
||||||
|
/// Profiler results :
|
||||||
|
/// Total time of profiled lines = 102050953 ns (0.1020509530000000 s)
|
||||||
|
/// Profiler results sorted by ascending line number (times in ns) :
|
||||||
|
/// main.cpp:15 134 ( 0.0001 %)
|
||||||
|
/// main.cpp:16 30 ( 0.0000 %)
|
||||||
|
/// main.cpp:17 4129 ( 0.0040 %)
|
||||||
|
/// main.cpp:18 2063 ( 0.0020 %)
|
||||||
|
/// main.cpp:20 15958766 ( 15.6380 %)
|
||||||
|
/// main.cpp:21 17706218 ( 17.3504 %)
|
||||||
|
/// main.cpp:22 17500810 ( 17.1491 %)
|
||||||
|
/// main.cpp:25 15480408 ( 15.1693 %)
|
||||||
|
/// main.cpp:26 19006015 ( 18.6240 %)
|
||||||
|
/// main.cpp:27 16052417 ( 15.7298 %)
|
||||||
|
/// main.cpp:30 223831 ( 0.2193 %)
|
||||||
|
/// main.cpp:31 116132 ( 0.1138 %)
|
||||||
|
///
|
||||||
|
/// Profiler results sorted by desending line time (times in ns) :
|
||||||
|
/// main.cpp:26 19006015 ( 18.6240 %)
|
||||||
|
/// main.cpp:21 17706218 ( 17.3504 %)
|
||||||
|
/// main.cpp:22 17500810 ( 17.1491 %)
|
||||||
|
/// main.cpp:27 16052417 ( 15.7298 %)
|
||||||
|
/// main.cpp:20 15958766 ( 15.6380 %)
|
||||||
|
/// main.cpp:25 15480408 ( 15.1693 %)
|
||||||
|
/// main.cpp:30 223831 ( 0.2193 %)
|
||||||
|
/// main.cpp:31 116132 ( 0.1138 %)
|
||||||
|
/// main.cpp:17 4129 ( 0.0040 %)
|
||||||
|
/// main.cpp:18 2063 ( 0.0020 %)
|
||||||
|
/// main.cpp:15 134 ( 0.0001 %)
|
||||||
|
/// main.cpp:16 30 ( 0.0000 %)
|
||||||
|
/// P_LINE_DISPLAY() : 8.2959e-05 s (+/- 1e-09 s)
|
||||||
|
/// \endcode
|
||||||
|
|
||||||
|
template<int T=1>
|
||||||
|
class Profiler
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
std::string file_name; ///< File name to be displayed in the report.
|
||||||
|
int line_times[UTILS_PROFILER_NMAX]; ///< Line times in nanoseconds. The values are stored like so : line_times[__LINE__] = time_for_the_line.
|
||||||
|
std::chrono::high_resolution_clock::time_point t0; ///< Time point storing the time when Reset() is called.
|
||||||
|
|
||||||
|
public:
|
||||||
|
Profiler(const std::string & file_name_ = "")
|
||||||
|
: file_name(file_name_)
|
||||||
|
{
|
||||||
|
// initialize the line times to 0
|
||||||
|
for (size_t i = 0; i < UTILS_PROFILER_NMAX; i++) {
|
||||||
|
line_times[i] = 0;
|
||||||
|
}
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds the time elapsed since the last reset to the corresponding line number. Usually called with __LINE__ as an argument.
|
||||||
|
void ProfileLine(int line_number)
|
||||||
|
{
|
||||||
|
line_times[line_number] += GetTime(); // increment the time of the line (when in loops)
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Displays detailed tables of the measured execution time of all the profiled lines.
|
||||||
|
void DisplayData()
|
||||||
|
{
|
||||||
|
// compute sum of times
|
||||||
|
int sumOfTimes = 0;
|
||||||
|
for (size_t i = 0; i < UTILS_PROFILER_NMAX; i++) {
|
||||||
|
sumOfTimes += line_times[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// display the list sorted by line number
|
||||||
|
auto currentStreamPrecision = std::cout.precision();
|
||||||
|
std::cout << std::fixed;
|
||||||
|
std::cout << "Profiler results :\n";
|
||||||
|
std::cout << "Total time of profiled lines = " << sumOfTimes << " ns (" << double(sumOfTimes)/1e9 << " s)\n";
|
||||||
|
std::cout << "Profiler results sorted by ascending line number (times in ns) :\n";
|
||||||
|
for (size_t i = 0; i < UTILS_PROFILER_NMAX; i++)
|
||||||
|
if(line_times[i] != 0)
|
||||||
|
std::cout << file_name << ':' << i << std::setw(20) << line_times[i] << " (" << std::setw(8) << std::setprecision(4) << ((double(line_times[i])/sumOfTimes)*100.) << " %)" << std::endl;
|
||||||
|
|
||||||
|
// Display the list sorted by descending line time
|
||||||
|
{
|
||||||
|
// convert the map to a vector of pairs...
|
||||||
|
std::vector<std::pair<int,int>> line_times_vec;
|
||||||
|
for (size_t i = 0; i < UTILS_PROFILER_NMAX; i++)
|
||||||
|
if(line_times[i] != 0)
|
||||||
|
line_times_vec.push_back(std::make_pair(i, line_times[i]));
|
||||||
|
|
||||||
|
// sort the vector of pairs
|
||||||
|
std::sort(line_times_vec.begin(), line_times_vec.end(), [](const std::pair<int,int> & a, const std::pair<int,int> & b) {
|
||||||
|
return a.second > b.second;// Descending order using the second entry of the pair
|
||||||
|
});
|
||||||
|
|
||||||
|
std::cout << "\nProfiler results sorted by desending line time (times in ns) :\n";
|
||||||
|
for (auto it = line_times_vec.begin() ; it != line_times_vec.end() ; ++it) {
|
||||||
|
std::cout << file_name << ':' << it->first << std::setw(20) << it->second << " (" << std::setw(8) << std::setprecision(4) << ((double(it->second)/sumOfTimes)*100.) << " %)" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout.precision(currentStreamPrecision);// restore previous stream precision
|
||||||
|
std::cout << std::defaultfloat;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the time elapsed since the last reset of the timer. The time is given in nanoseconds.
|
||||||
|
int GetTime() const
|
||||||
|
{
|
||||||
|
std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
|
||||||
|
std::chrono::nanoseconds time_span = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
|
||||||
|
return time_span.count();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resets the timer of the profiler.
|
||||||
|
void Reset() { t0 = std::chrono::high_resolution_clock::now(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
#else // UTILS_NO_PROFILER defined
|
||||||
|
#define P_LINE_INIT()
|
||||||
|
#define P_LINE_BEGIN()
|
||||||
|
#define P_LINE()
|
||||||
|
#define P_LINE_DISPLAY()
|
||||||
|
#endif // UTILS_NO_PROFILER
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue