|
|
#pragma once |
|
|
#include "gpu_types.h" |
|
|
#include "gpu_libs.h" |
|
|
#include <memory> |
|
|
#include <vector> |
|
|
|
|
|
typedef void (*TimerCompletionCallback)(float elapsed_time, size_t calc_ops, float *time_ptr, float *gflops_ptr, |
|
|
void *user_data); |
|
|
|
|
|
class KernelTimer { |
|
|
private: |
|
|
size_t calc_ops; |
|
|
HOST_TYPE(Event_t) start, stop; |
|
|
float *time_ptr; |
|
|
float *gflops_ptr; |
|
|
void *user_data; |
|
|
TimerCompletionCallback callback; |
|
|
bool callback_executed; |
|
|
|
|
|
public: |
|
|
KernelTimer(size_t calc_ops, float *time, float *gflops); |
|
|
|
|
|
void start_timer(hipStream_t stream = 0); |
|
|
void stop_timer(hipStream_t stream = 0); |
|
|
void set_callback(TimerCompletionCallback cb, void *data = nullptr); |
|
|
|
|
|
|
|
|
void synchronize(); |
|
|
|
|
|
|
|
|
HOST_TYPE(Event_t) get_start_event() const { return start; } |
|
|
HOST_TYPE(Event_t) get_stop_event() const { return stop; } |
|
|
size_t get_calc_ops() const { return calc_ops; } |
|
|
float *get_time_ptr() const { return time_ptr; } |
|
|
float *get_gflops_ptr() const { return gflops_ptr; } |
|
|
void execute_callback(float elapsed_time); |
|
|
void set_callback_executed(bool executed) { callback_executed = executed; } |
|
|
bool is_callback_executed() const { return callback_executed; } |
|
|
|
|
|
~KernelTimer(); |
|
|
}; |
|
|
|
|
|
class KernelTimerScoped { |
|
|
private: |
|
|
std::shared_ptr<KernelTimer> timer; |
|
|
hipStream_t stream; |
|
|
|
|
|
public: |
|
|
KernelTimerScoped(std::vector<std::shared_ptr<KernelTimer>> &timers, size_t calc_ops, float *time, float *gflops, |
|
|
hipStream_t stream = 0) |
|
|
: timer(std::make_shared<KernelTimer>(calc_ops, time, gflops)), stream(stream) { |
|
|
timers.push_back(timer); |
|
|
timer->start_timer(stream); |
|
|
} |
|
|
|
|
|
~KernelTimerScoped() { timer->stop_timer(stream); } |
|
|
}; |
|
|
|