Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ set(headers
src/sceneStructs.h
src/preview.h
src/utilities.h
src/common.h
)

set(sources
Expand All @@ -84,6 +85,7 @@ set(sources
src/scene.cpp
src/preview.cpp
src/utilities.cpp
src/common.cu
)

set(imgui
Expand Down
40 changes: 40 additions & 0 deletions src/common.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include "common.h"


namespace StreamCompaction {
namespace Common {

/**
* Maps an array to an array of 0s and 1s for stream compaction. Elements
* which map to 0 will be removed, and elements which map to 1 will be kept.
*/
__global__ void kernMapToBoolean(int n, int* bools1, int* bools2, const int* idata) {
// TODO
int index = threadIdx.x + (blockIdx.x * blockDim.x);
if (index >= n) {
return;
}

int result = idata[index] != 0;
bools1[index] = result;
bools2[index] = result;
}

/**
* Performs scatter on an array. That is, for each element in idata,
* if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]].
*/
__global__ void kernScatter(int n, int* odata,
const int* idata, const int* bools, const int* indices) {
// TODO
int index = threadIdx.x + (blockIdx.x * blockDim.x);
if (index >= n) {
return;
}

if (bools[index]) {
odata[indices[index]] = idata[index];
}
}
}
}
132 changes: 132 additions & 0 deletions src/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#pragma once

#include <cuda.h>
#include <cuda_runtime.h>

#include <cstdio>
#include <cstring>
#include <cmath>
#include <algorithm>
#include <chrono>
#include <stdexcept>

#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)

/**
* Check for CUDA errors; print and exit if there was a problem.
*/
void checkCUDAErrorFn(const char *msg, const char *file = NULL, int line = -1);

inline int ilog2(int x) {
int lg = 0;
while (x >>= 1) {
++lg;
}
return lg;
}

inline int ilog2ceil(int x) {
return x == 1 ? 0 : ilog2(x - 1) + 1;
}

namespace StreamCompaction {
namespace Common {
__global__ void kernMapToBoolean(int n, int *bools, const int *idata);

__global__ void kernScatter(int n, int *odata,
const int *idata, const int *bools, const int *indices);

/**
* This class is used for timing the performance
* Uncopyable and unmovable
*
* Adapted from WindyDarian(https://github.com/WindyDarian)
*/
class PerformanceTimer
{
public:
PerformanceTimer()
{
cudaEventCreate(&event_start);
cudaEventCreate(&event_end);
}

~PerformanceTimer()
{
cudaEventDestroy(event_start);
cudaEventDestroy(event_end);
}

void startCpuTimer()
{
if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); }
cpu_timer_started = true;

time_start_cpu = std::chrono::high_resolution_clock::now();
}

void endCpuTimer()
{
time_end_cpu = std::chrono::high_resolution_clock::now();

if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); }

std::chrono::duration<double, std::milli> duro = time_end_cpu - time_start_cpu;
prev_elapsed_time_cpu_milliseconds =
static_cast<decltype(prev_elapsed_time_cpu_milliseconds)>(duro.count());

cpu_timer_started = false;
}

void startGpuTimer()
{
if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); }
gpu_timer_started = true;

cudaEventRecord(event_start);
}

void endGpuTimer()
{
cudaEventRecord(event_end);
cudaEventSynchronize(event_end);

if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); }

cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end);
gpu_timer_started = false;
}

float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015
{
return prev_elapsed_time_cpu_milliseconds;
}

float getGpuElapsedTimeForPreviousOperation() //noexcept
{
return prev_elapsed_time_gpu_milliseconds;
}

// remove copy and move functions
PerformanceTimer(const PerformanceTimer&) = delete;
PerformanceTimer(PerformanceTimer&&) = delete;
PerformanceTimer& operator=(const PerformanceTimer&) = delete;
PerformanceTimer& operator=(PerformanceTimer&&) = delete;

private:
cudaEvent_t event_start = nullptr;
cudaEvent_t event_end = nullptr;

using time_point_t = std::chrono::high_resolution_clock::time_point;
time_point_t time_start_cpu;
time_point_t time_end_cpu;

bool cpu_timer_started = false;
bool gpu_timer_started = false;

float prev_elapsed_time_cpu_milliseconds = 0.f;
float prev_elapsed_time_gpu_milliseconds = 0.f;
};
}
}
91 changes: 86 additions & 5 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "../imgui/imgui.h"
#include "../imgui/imgui_impl_glfw.h"
#include "../imgui/imgui_impl_opengl3.h"

#include "common.h"
static std::string startTimeString;

// For camera controls
Expand All @@ -29,6 +29,7 @@ float ui_colorWeight = 0.45f;
float ui_normalWeight = 0.35f;
float ui_positionWeight = 0.2f;
bool ui_saveAndExit = false;
bool imageDenoised = false;

static bool camchanged = true;
static float dtheta = 0, dphi = 0;
Expand All @@ -45,6 +46,44 @@ int iteration;
int width;
int height;

static float timePT;
static float timeAT;
static bool hasPrinted;
using StreamCompaction::Common::PerformanceTimer;
#define TIMER 1

PerformanceTimer& timer()
{
static PerformanceTimer timer;
return timer;
}

void FilterCreation(int filter_size, float *kernel)
{
// initialising standard deviation to 1.0
float sigma = 1.0;
float r, s = 2.0 * sigma * sigma;
// sum is for normalization
float sum = 0.0;
int itr = 0;
// generating filter_sizexfilter_size kernel
for (int x = -filter_size/2; x <= filter_size/2; x++) {
for (int y = -filter_size/2; y <= filter_size/2; y++) {
r = x * x + y * y ;
kernel[itr] = (glm::exp(-(r) / s)) / (PI * s);
sum += kernel[itr];
itr++;
}
}

// normalising the Kernel
for (int i = 0; i < filter_size * filter_size; ++i)
{
kernel[i] /= sum;
}
}


//-------------------------------
//-------------MAIN--------------
//-------------------------------
Expand All @@ -56,7 +95,6 @@ int main(int argc, char** argv) {
printf("Usage: %s SCENEFILE.txt\n", argv[0]);
return 1;
}

const char *sceneFile = argv[1];

// Load scene file
Expand Down Expand Up @@ -150,8 +188,11 @@ void runCuda() {
// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer

if (iteration == 0) {
float *gKernel = new float[5 * 5];
FilterCreation(5, gKernel);
pathtraceFree();
pathtraceInit(scene);
pathtraceInit(scene, gKernel);
imageDenoised = false;
}

uchar4 *pbo_dptr = NULL;
Expand All @@ -160,14 +201,54 @@ void runCuda() {
if (iteration < ui_iterations) {
iteration++;

#if TIMER
// Start Timer
if (iteration == 1)
{
timePT = 0.f;
}
timer().startCpuTimer();
#endif // TIMER

// execute the kernel
int frame = 0;
pathtrace(frame, iteration);
pathtrace(frame, iteration);

#if TIMER
timer().endCpuTimer();
timePT += timer().getCpuElapsedTimeForPreviousOperation();
if (iteration == ui_iterations) {
std::cout << "Path-trace time for " << iteration << " iterations: " << timePT << "ms" << std::endl;
}
#endif // TIMER
}

if (ui_showGbuffer) {
showGBuffer(pbo_dptr);
} else {
}
else if (ui_denoise && iteration == ui_iterations)
{
if (!imageDenoised)
{
#if TIMER
// Start Timer
timeAT = 0.f;
if (!hasPrinted) {
timer().startCpuTimer();
}
#endif // TIMER
imageDenoised = DenoiseImage(renderState->camera.resolution.x, renderState->camera.resolution.y, iteration, ui_filterSize,
ui_colorWeight, ui_normalWeight, ui_positionWeight);

#if TIMER
timer().endCpuTimer();
timeAT += timer().getCpuElapsedTimeForPreviousOperation();
std::cout << "Denoise time for " << iteration << " iterations: " << timeAT << "ms\n\n" << std::endl;
#endif // TIMER
}
showDenoise(pbo_dptr, iteration);
}
else {
showImage(pbo_dptr, iteration);
}

Expand Down
Loading