diff --git a/CMakeLists.txt b/CMakeLists.txt index 162568b..ca9eb84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ set(headers src/sceneStructs.h src/preview.h src/utilities.h +src/common.h ) set(sources @@ -84,6 +85,7 @@ set(sources src/scene.cpp src/preview.cpp src/utilities.cpp +src/common.cu ) set(imgui diff --git a/src/common.cu b/src/common.cu new file mode 100644 index 0000000..7a83529 --- /dev/null +++ b/src/common.cu @@ -0,0 +1,40 @@ +#include "common.h" + + +namespace StreamCompaction { + namespace Common { + + /** + * Maps an array to an array of 0s and 1s for stream compaction. Elements + * which map to 0 will be removed, and elements which map to 1 will be kept. + */ + __global__ void kernMapToBoolean(int n, int* bools1, int* bools2, const int* idata) { + // TODO + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) { + return; + } + + int result = idata[index] != 0; + bools1[index] = result; + bools2[index] = result; + } + + /** + * Performs scatter on an array. That is, for each element in idata, + * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. + */ + __global__ void kernScatter(int n, int* odata, + const int* idata, const int* bools, const int* indices) { + // TODO + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) { + return; + } + + if (bools[index]) { + odata[indices[index]] = idata[index]; + } + } + } +} \ No newline at end of file diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..d2c1fed --- /dev/null +++ b/src/common.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) + +/** + * Check for CUDA errors; print and exit if there was a problem. + */ +void checkCUDAErrorFn(const char *msg, const char *file = NULL, int line = -1); + +inline int ilog2(int x) { + int lg = 0; + while (x >>= 1) { + ++lg; + } + return lg; +} + +inline int ilog2ceil(int x) { + return x == 1 ? 0 : ilog2(x - 1) + 1; +} + +namespace StreamCompaction { + namespace Common { + __global__ void kernMapToBoolean(int n, int *bools, const int *idata); + + __global__ void kernScatter(int n, int *odata, + const int *idata, const int *bools, const int *indices); + + /** + * This class is used for timing the performance + * Uncopyable and unmovable + * + * Adapted from WindyDarian(https://github.com/WindyDarian) + */ + class PerformanceTimer + { + public: + PerformanceTimer() + { + cudaEventCreate(&event_start); + cudaEventCreate(&event_end); + } + + ~PerformanceTimer() + { + cudaEventDestroy(event_start); + cudaEventDestroy(event_end); + } + + void startCpuTimer() + { + if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); } + cpu_timer_started = true; + + time_start_cpu = std::chrono::high_resolution_clock::now(); + } + + void endCpuTimer() + { + time_end_cpu = std::chrono::high_resolution_clock::now(); + + if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); } + + std::chrono::duration duro = time_end_cpu - time_start_cpu; + prev_elapsed_time_cpu_milliseconds = + static_cast(duro.count()); + + cpu_timer_started = false; + } + + void startGpuTimer() + { + if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); } + gpu_timer_started = true; + + cudaEventRecord(event_start); + } + + void endGpuTimer() + { + cudaEventRecord(event_end); + cudaEventSynchronize(event_end); + + if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); } + + cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); + gpu_timer_started = false; + } + + float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015 + { + return prev_elapsed_time_cpu_milliseconds; + } + + float getGpuElapsedTimeForPreviousOperation() //noexcept + { + return prev_elapsed_time_gpu_milliseconds; + } + + // remove copy and move functions + PerformanceTimer(const PerformanceTimer&) = delete; + PerformanceTimer(PerformanceTimer&&) = delete; + PerformanceTimer& operator=(const PerformanceTimer&) = delete; + PerformanceTimer& operator=(PerformanceTimer&&) = delete; + + private: + cudaEvent_t event_start = nullptr; + cudaEvent_t event_end = nullptr; + + using time_point_t = std::chrono::high_resolution_clock::time_point; + time_point_t time_start_cpu; + time_point_t time_end_cpu; + + bool cpu_timer_started = false; + bool gpu_timer_started = false; + + float prev_elapsed_time_cpu_milliseconds = 0.f; + float prev_elapsed_time_gpu_milliseconds = 0.f; + }; + } +} diff --git a/src/main.cpp b/src/main.cpp index 4092ae4..ce6124c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,7 +5,7 @@ #include "../imgui/imgui.h" #include "../imgui/imgui_impl_glfw.h" #include "../imgui/imgui_impl_opengl3.h" - +#include "common.h" static std::string startTimeString; // For camera controls @@ -29,6 +29,7 @@ float ui_colorWeight = 0.45f; float ui_normalWeight = 0.35f; float ui_positionWeight = 0.2f; bool ui_saveAndExit = false; +bool imageDenoised = false; static bool camchanged = true; static float dtheta = 0, dphi = 0; @@ -45,6 +46,44 @@ int iteration; int width; int height; +static float timePT; +static float timeAT; +static bool hasPrinted; +using StreamCompaction::Common::PerformanceTimer; +#define TIMER 1 + +PerformanceTimer& timer() +{ + static PerformanceTimer timer; + return timer; +} + +void FilterCreation(int filter_size, float *kernel) +{ + // initialising standard deviation to 1.0 + float sigma = 1.0; + float r, s = 2.0 * sigma * sigma; + // sum is for normalization + float sum = 0.0; + int itr = 0; + // generating filter_sizexfilter_size kernel + for (int x = -filter_size/2; x <= filter_size/2; x++) { + for (int y = -filter_size/2; y <= filter_size/2; y++) { + r = x * x + y * y ; + kernel[itr] = (glm::exp(-(r) / s)) / (PI * s); + sum += kernel[itr]; + itr++; + } + } + + // normalising the Kernel + for (int i = 0; i < filter_size * filter_size; ++i) + { + kernel[i] /= sum; + } +} + + //------------------------------- //-------------MAIN-------------- //------------------------------- @@ -56,7 +95,6 @@ int main(int argc, char** argv) { printf("Usage: %s SCENEFILE.txt\n", argv[0]); return 1; } - const char *sceneFile = argv[1]; // Load scene file @@ -150,8 +188,11 @@ void runCuda() { // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if (iteration == 0) { + float *gKernel = new float[5 * 5]; + FilterCreation(5, gKernel); pathtraceFree(); - pathtraceInit(scene); + pathtraceInit(scene, gKernel); + imageDenoised = false; } uchar4 *pbo_dptr = NULL; @@ -160,14 +201,54 @@ void runCuda() { if (iteration < ui_iterations) { iteration++; +#if TIMER + // Start Timer + if (iteration == 1) + { + timePT = 0.f; + } + timer().startCpuTimer(); +#endif // TIMER + // execute the kernel int frame = 0; - pathtrace(frame, iteration); + pathtrace(frame, iteration); + +#if TIMER + timer().endCpuTimer(); + timePT += timer().getCpuElapsedTimeForPreviousOperation(); + if (iteration == ui_iterations) { + std::cout << "Path-trace time for " << iteration << " iterations: " << timePT << "ms" << std::endl; + } +#endif // TIMER } if (ui_showGbuffer) { showGBuffer(pbo_dptr); - } else { + } + else if (ui_denoise && iteration == ui_iterations) + { + if (!imageDenoised) + { +#if TIMER + // Start Timer + timeAT = 0.f; + if (!hasPrinted) { + timer().startCpuTimer(); + } +#endif // TIMER + imageDenoised = DenoiseImage(renderState->camera.resolution.x, renderState->camera.resolution.y, iteration, ui_filterSize, + ui_colorWeight, ui_normalWeight, ui_positionWeight); + +#if TIMER + timer().endCpuTimer(); + timeAT += timer().getCpuElapsedTimeForPreviousOperation(); + std::cout << "Denoise time for " << iteration << " iterations: " << timeAT << "ms\n\n" << std::endl; +#endif // TIMER + } + showDenoise(pbo_dptr, iteration); + } + else { showImage(pbo_dptr, iteration); } diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 23e5f90..c295723 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -16,118 +16,214 @@ #define ERRORCHECK 1 + +//Flags for Denoising +#define SimpleGaussianDenoise 0 +#define ATrous1 1 +#define ATrous2 0 + + + #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) -void checkCUDAErrorFn(const char *msg, const char *file, int line) { +void checkCUDAErrorFn(const char* msg, const char* file, int line) { #if ERRORCHECK - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } - - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); # ifdef _WIN32 - getchar(); + getchar(); # endif - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); #endif } + +float gaussianKernel[25] = { 0.003765, 0.015019, 0.023792, 0.015019, 0.003765, +0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +0.023792, 0.094907, 0.150342, 0.094907, 0.023792, +0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +0.003765, 0.015019, 0.023792, 0.015019, 0.003765, }; + + +//glm::vec2 offsetKernel[25]; + __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { - int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); - return thrust::default_random_engine(h); + int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); + return thrust::default_random_engine(h); } //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int)((pix.x / iter) * 255.0), 0, 255); + color.y = glm::clamp((int)((pix.y / iter) * 255.0), 0, 255); + color.z = glm::clamp((int)((pix.z / iter) * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - float timeToIntersect = gBuffer[index].t * 256.0; - - pbo[index].w = 0; - pbo[index].x = timeToIntersect; - pbo[index].y = timeToIntersect; - pbo[index].z = timeToIntersect; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + float timeToIntersect = gBuffer[index].t * 256.0f; + + pbo[index].w = 0; + pbo[index].x = timeToIntersect; + pbo[index].y = timeToIntersect; + pbo[index].z = timeToIntersect; + } +} + +__global__ void gbufferToPBO_Normals(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + glm::vec3 normal = glm::abs(gBuffer[index].normal); + glm::ivec3 color; + color.x = glm::clamp((int)(normal.x * 255.0), 0, 255); + color.y = glm::clamp((int)(normal.y * 255.0), 0, 255); + color.z = glm::clamp((int)(normal.z * 255.0), 0, 255); + + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } -static Scene * hst_scene = NULL; -static glm::vec3 * dev_image = NULL; -static Geom * dev_geoms = NULL; -static Material * dev_materials = NULL; -static PathSegment * dev_paths = NULL; -static ShadeableIntersection * dev_intersections = NULL; +__global__ void gbufferToPBO_Position(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + glm::vec3 position = glm::abs(gBuffer[index].position) ; + glm::ivec3 color; + color.x = glm::clamp((int)(position.x * 20.0), 0, 255); + color.y = glm::clamp((int)(position.y * 20.0), 0, 255); + color.z = glm::clamp((int)(position.z * 20.0), 0, 255); + + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } +} + + +static Scene* hst_scene = NULL; +static glm::vec3* dev_image = NULL; +static Geom* dev_geoms = NULL; +static Material* dev_materials = NULL; +static PathSegment* dev_paths = NULL; +static ShadeableIntersection* dev_intersections = NULL; static GBufferPixel* dev_gBuffer = NULL; // TODO: static variables for device memory, any extra info you need, etc // ... +static float* dev_gausKernel = NULL; +static glm::vec2* dev_offsetKernel = NULL; +static glm::vec3* dev_TrousImage = NULL; +static glm::vec3* dev_pingPongImage = NULL; + +//static glm::vec3* dev_IntermediaryImage = NULL; + +void generateOffsetKern(int filterSize, vector &offsetKernel) +{ + int index = 0; + filterSize = filterSize % 2 == 0 ? filterSize - 1 : filterSize; + for (int y = -filterSize/2; y <= filterSize/2; y++) + { + for (int x = -filterSize/2; x <= filterSize/2; x++) + { + offsetKernel.push_back(glm::vec2(x, y)); + index++; + } + } +} + +void pathtraceInit(Scene* scene,float *gausKernel) { + hst_scene = scene; + const Camera& cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; + + cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); -void pathtraceInit(Scene *scene) { - hst_scene = scene; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + + cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); + cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); + cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); - cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); - cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + // TODO: initialize any extra device memeory you need - cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); - cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_gausKernel, 25 * sizeof(float)); + cudaMemcpy(dev_gausKernel, gaussianKernel, 25 * sizeof(float), cudaMemcpyHostToDevice); - cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); - cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + vector< glm::vec2> offKern; - cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + generateOffsetKern(5, offKern); + cudaMalloc(&dev_offsetKernel, 25 * sizeof(glm::vec2)); + cudaMemcpy(dev_offsetKernel, offKern.data(), 25 * sizeof(glm::vec2), cudaMemcpyHostToDevice); - cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); + cudaMalloc(&dev_TrousImage, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_pingPongImage, pixelcount * sizeof(glm::vec3)); - // TODO: initialize any extra device memeory you need - checkCUDAError("pathtraceInit"); + checkCUDAError("pathtraceInit"); } void pathtraceFree() { - cudaFree(dev_image); // no-op if dev_image is null - cudaFree(dev_paths); - cudaFree(dev_geoms); - cudaFree(dev_materials); - cudaFree(dev_intersections); - cudaFree(dev_gBuffer); - // TODO: clean up any extra device memory you created - - checkCUDAError("pathtraceFree"); + cudaFree(dev_image); // no-op if dev_image is null + cudaFree(dev_paths); + cudaFree(dev_geoms); + cudaFree(dev_materials); + cudaFree(dev_intersections); + cudaFree(dev_gBuffer); + // TODO: clean up any extra device memory you created + + cudaFree(dev_gausKernel); + cudaFree(dev_offsetKernel); + cudaFree(dev_TrousImage); + cudaFree(dev_pingPongImage); + checkCUDAError("pathtraceFree"); } /** @@ -145,289 +241,585 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path if (x < cam.resolution.x && y < cam.resolution.y) { int index = x + (y * cam.resolution.x); - PathSegment & segment = pathSegments[index]; + PathSegment& segment = pathSegments[index]; segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); segment.ray.direction = glm::normalize(cam.view - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) - ); - + ); segment.pixelIndex = index; segment.remainingBounces = traceDepth; } } -__global__ void computeIntersections( - int depth - , int num_paths - , PathSegment * pathSegments - , Geom * geoms - , int geoms_size - , ShadeableIntersection * intersections + + __global__ void GenerateGaussianBlur( + int pixelCount, int stepWidth, + float* dev_gausKernel, glm::vec2 *dev_offsetKernel, + glm::vec3* dev_colorImage, glm::vec3 *dev_TrousImage, + int resolutionX, int resolutionY ) -{ - int path_index = blockIdx.x * blockDim.x + threadIdx.x; + { - if (path_index < num_paths) + int index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index < pixelCount) + { + glm::vec3 currColor =glm::vec3(0.0f); + //glm::vec3 currColor = dev_colorImage[index]; + for (int i = 0; i < 25 ; i++) + { + int index2D_y = index / resolutionX; + int index2D_x = (int)(index % resolutionX); + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = index2D_x + offsetX * stepWidth; + int finalValue_Y = index2D_y + offsetY * stepWidth; + + if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) + { + float gausValue = dev_gausKernel[i]; + int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < pixelCount) + { + glm::vec3 newColor = dev_colorImage[offsetColorIdx]; + currColor += newColor * dev_gausKernel[i]; + } + } + } + dev_TrousImage[index] = currColor; + } + + } + + /// + /// This A Trous Kernel performs conversion from 1D index to 2D. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + __global__ void GenerateAtrousImage( + int pixelCount, int stepWidth, + float* dev_gausKernel, glm::vec2* dev_offsetKernel, + glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, + GBufferPixel* gbuf, int resolutionX, int resolutionY, float ui_colorWeight, + float ui_normalWeight, float ui_positionWeight + ) { - PathSegment pathSegment = pathSegments[path_index]; - float t; - glm::vec3 intersect_point; - glm::vec3 normal; - float t_min = FLT_MAX; - int hit_geom_index = -1; - bool outside = true; + int index = blockIdx.x * blockDim.x + threadIdx.x; - glm::vec3 tmp_intersect; - glm::vec3 tmp_normal; + if (index < pixelCount) + { + glm::vec3 sum = glm::vec3(0.0f); + glm::vec3 cval = dev_colorImage[index]; + glm::vec3 nval = gbuf[index].normal; + glm::vec3 pval = gbuf[index].position; + + float cphi = ui_colorWeight * ui_colorWeight; + float nphi = ui_normalWeight * ui_normalWeight; + float pphi = ui_positionWeight * ui_positionWeight; + + float cum_w = 0.0f; + for (int i = 0; i < 25; i++) + { + // Calculate Offseted Index + int index2D_y = index / resolutionX ; + int index2D_x = (int)(index % resolutionX); + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = index2D_x + offsetX * stepWidth; // Final Offset Values + int finalValue_Y = index2D_y + offsetY * stepWidth; // Final Offset Values + if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) + { + int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < pixelCount) + { + glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; + glm::vec3 t = cval - ctmp; + float dist2 = glm::dot(t, t); + float c_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); + + glm::vec3 ntmp = gbuf[offsetColorIdx].normal; + t = nval - ntmp; + dist2 = glm::max(glm::dot(t, t)/ (stepWidth * stepWidth), 0.0f); + float n_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); + + glm::vec3 ptmp = gbuf[offsetColorIdx].position; + t = pval - ptmp; + dist2 = glm::dot(t, t); + float p_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); + float weight = c_w * n_w * p_w; + + sum += ctmp * weight * dev_gausKernel[i]; + cum_w += weight * dev_gausKernel[i]; + + } + } + } + if (cum_w == 0.f) + { + dev_TrousImage[index] = cval; + return; + } + + dev_TrousImage[index] = sum / cum_w; + + } - // naive parse through global geoms + } + + /// + /// This A Trous Kernel already starts out with 2D indexes + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + __global__ void GenerateAtrousImageGeneral( + int pixelCount, int stepWidth, + float* dev_gausKernel, glm::vec2* dev_offsetKernel, + glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, + GBufferPixel* gbuf, int resolutionX, int resolutionY, float ui_colorWeight, + float ui_normalWeight, float ui_positionWeight + ) + { + + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolutionX && y < resolutionY) + { + int index = x + (y * resolutionX); + glm::vec3 sum = glm::vec3(0.0f); + glm::vec3 cval = dev_colorImage[index]; + glm::vec3 nval = gbuf[index].normal; + glm::vec3 pval = gbuf[index].position; + + float cphi = ui_colorWeight ; + float nphi = ui_normalWeight ; + float pphi = ui_positionWeight ; + + float cum_w = 0.0f; + for (int i = 0; i < 5 * 5; i++) + { + // Calculate Offseted Index + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = x + offsetX * stepWidth; + int finalValue_Y = y + offsetY * stepWidth; + if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) + { + int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < pixelCount) + { + glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; + glm::vec3 t = cval - ctmp; + float dist2 = glm::length(t) * glm::length(t); + float newVal = glm::exp(-(dist2) / cphi); + float c_w = glm::min(newVal, 1.0f); + + glm::vec3 ntmp = gbuf[offsetColorIdx].normal; + t = nval - ntmp; + dist2 = glm::max( (glm::length(t) * glm::length(t)) / (stepWidth * stepWidth), 0.f); + newVal = glm::exp(-1 * (dist2) / nphi); + float n_w = glm::min(newVal, 1.0f); + + glm::vec3 ptmp = gbuf[offsetColorIdx].position; + t = pval - ptmp; + dist2 = glm::length(t) * glm::length(t); + newVal = glm::exp(-1 * (dist2) / pphi); + float p_w = glm::min(newVal, 1.0f); + + + float weight = c_w * n_w * p_w; + sum += ctmp * weight * dev_gausKernel[i]; + cum_w += weight * dev_gausKernel[i]; + + } + } + + } + + dev_TrousImage[index] = sum / cum_w; + } + + } + + __global__ void computeIntersections( + int depth + , int num_paths + , PathSegment * pathSegments + , Geom * geoms + , int geoms_size + , ShadeableIntersection * intersections + ) + { + int path_index = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = 0; i < geoms_size; i++) + if (path_index < num_paths) { - Geom & geom = geoms[i]; + PathSegment pathSegment = pathSegments[path_index]; + + float t; + glm::vec3 intersect_point; + glm::vec3 normal; + float t_min = FLT_MAX; + int hit_geom_index = -1; + bool outside = true; + + glm::vec3 tmp_intersect; + glm::vec3 tmp_normal; + + // naive parse through global geoms - if (geom.type == CUBE) + for (int i = 0; i < geoms_size; i++) { - t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + Geom& geom = geoms[i]; + + if (geom.type == CUBE) + { + t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + else if (geom.type == SPHERE) + { + t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + + // Compute the minimum t from the intersection tests to determine what + // scene geometry object was hit first. + if (t > 0.0f && t_min > t) + { + t_min = t; + hit_geom_index = i; + intersect_point = tmp_intersect; + normal = tmp_normal; + } } - else if (geom.type == SPHERE) + + if (hit_geom_index == -1) { - t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + intersections[path_index].t = -1.0f; } - - // Compute the minimum t from the intersection tests to determine what - // scene geometry object was hit first. - if (t > 0.0f && t_min > t) + else { - t_min = t; - hit_geom_index = i; - intersect_point = tmp_intersect; - normal = tmp_normal; + //The ray hits something + intersections[path_index].t = t_min; + intersections[path_index].materialId = geoms[hit_geom_index].materialid; + intersections[path_index].surfaceNormal = normal; } } + } - if (hit_geom_index == -1) + __global__ void shadeSimpleMaterials( + int iter + , int num_paths + , ShadeableIntersection * shadeableIntersections + , PathSegment * pathSegments + , Material * materials + ) + { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) { - intersections[path_index].t = -1.0f; + ShadeableIntersection intersection = shadeableIntersections[idx]; + PathSegment segment = pathSegments[idx]; + if (segment.remainingBounces == 0) { + return; + } + + if (intersection.t > 0.0f) { // if the intersection exists... + segment.remainingBounces--; + // Set up the RNG + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + segment.color *= (materialColor * material.emittance); + segment.remainingBounces = 0; + } + else { + segment.color *= materialColor; + glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin; + scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng); + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } + else { + segment.color = glm::vec3(0.0f); + segment.remainingBounces = 0; + } + + pathSegments[idx] = segment; } - else + } + + __global__ void generateGBuffer( + int num_paths, + ShadeableIntersection * shadeableIntersections, + PathSegment * pathSegments, + GBufferPixel * gBuffer) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) { - //The ray hits something - intersections[path_index].t = t_min; - intersections[path_index].materialId = geoms[hit_geom_index].materialid; - intersections[path_index].surfaceNormal = normal; + int pixelPosition = pathSegments[idx].pixelIndex; + gBuffer[idx].t = shadeableIntersections[idx].t; + gBuffer[idx].normal = shadeableIntersections[idx].surfaceNormal; + gBuffer[idx].position = getPointOnRay(pathSegments[idx].ray, shadeableIntersections[idx].t); } } -} -__global__ void shadeSimpleMaterials ( - int iter - , int num_paths - , ShadeableIntersection * shadeableIntersections - , PathSegment * pathSegments - , Material * materials - ) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - ShadeableIntersection intersection = shadeableIntersections[idx]; - PathSegment segment = pathSegments[idx]; - if (segment.remainingBounces == 0) { - return; - } - - if (intersection.t > 0.0f) { // if the intersection exists... - segment.remainingBounces--; - // Set up the RNG - thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces); - - Material material = materials[intersection.materialId]; - glm::vec3 materialColor = material.color; - - // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - segment.color *= (materialColor * material.emittance); - segment.remainingBounces = 0; - } - else { - segment.color *= materialColor; - glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin; - scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng); - } - // If there was no intersection, color the ray black. - // Lots of renderers use 4 channel color, RGBA, where A = alpha, often - // used for opacity, in which case they can indicate "no opacity". - // This can be useful for post-processing and image compositing. - } else { - segment.color = glm::vec3(0.0f); - segment.remainingBounces = 0; - } - - pathSegments[idx] = segment; - } -} + // Add the current iteration's output to the overall image + __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; -__global__ void generateGBuffer ( - int num_paths, - ShadeableIntersection* shadeableIntersections, - PathSegment* pathSegments, - GBufferPixel* gBuffer) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - gBuffer[idx].t = shadeableIntersections[idx].t; - } -} + if (index < nPaths) + { + PathSegment iterationPath = iterationPaths[index]; + image[iterationPath.pixelIndex] += iterationPath.color; + } + } -// Add the current iteration's output to the overall image -__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths) -{ - int index = (blockIdx.x * blockDim.x) + threadIdx.x; + /** + * Wrapper for the __global__ call that sets up the kernel calls and does a ton + * of memory management + */ + void pathtrace(int frame, int iter) { + const int traceDepth = hst_scene->state.traceDepth; + const Camera& cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; + + // 2D block for generating ray from camera + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // 1D block for path tracing + const int blockSize1d = 128; + + /////////////////////////////////////////////////////////////////////////// + + // Pathtracing Recap: + // * Initialize array of path rays (using rays that come out of the camera) + // * You can pass the Camera object to that kernel. + // * Each path ray must carry at minimum a (ray, color) pair, + // * where color starts as the multiplicative identity, white = (1, 1, 1). + // * This has already been done for you. + // * NEW: For the first depth, generate geometry buffers (gbuffers) + // * For each depth: + // * Compute an intersection in the scene for each path ray. + // A very naive version of this has been implemented for you, but feel + // free to add more primitives and/or a better algorithm. + // Currently, intersection distance is recorded as a parametric distance, + // t, or a "distance along the ray." t = -1.0 indicates no intersection. + // * Color is attenuated (multiplied) by reflections off of any object + // * Stream compact away all of the terminated paths. + // You may use either your implementation or `thrust::remove_if` or its + // cousins. + // * Note that you can't really use a 2D kernel launch any more - switch + // to 1D. + // * Shade the rays that intersected something or didn't bottom out. + // That is, color the ray by performing a color computation according + // to the shader, then generate a new ray to continue the ray path. + // We recommend just updating the ray's PathSegment in place. + // Note that this step may come before or after stream compaction, + // since some shaders you write may also cause a path to terminate. + // * Finally: + // * if not denoising, add this iteration's results to the image + // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl + + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths); + checkCUDAError("generate camera ray"); + + int depth = 0; + PathSegment* dev_path_end = dev_paths + pixelcount; + int num_paths = dev_path_end - dev_paths; + + // --- PathSegment Tracing Stage --- + // Shoot ray into scene, bounce between objects, push shading chunks + + // Empty gbuffer + cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel)); + + // clean shading chunks + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + bool iterationComplete = false; + while (!iterationComplete) { + + // tracing + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + computeIntersections << > > ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + + if (depth == 0) { + generateGBuffer << > > (num_paths, dev_intersections, dev_paths, dev_gBuffer); + } + + depth++; + shadeSimpleMaterials << > > ( + iter, + num_paths, + dev_intersections, + dev_paths, + dev_materials + ); + iterationComplete = depth == traceDepth; + } + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + finalGather << > > (num_paths, dev_image, dev_paths); + /////////////////////////////////////////////////////////////////////////// + + // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. + // Otherwise, screenshots are also acceptable. + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + + checkCUDAError("pathtrace"); + } + + // CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging. + void showGBuffer(uchar4 * pbo) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + + // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization + //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); + //gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); + } - if (index < nPaths) + __global__ void GeneratePingPongImage(int pixelCount, glm::vec3* devImage, glm::vec3* pingPongImage, int iter) { - PathSegment iterationPath = iterationPaths[index]; - image[iterationPath.pixelIndex] += iterationPath.color; + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index < pixelCount) + { + pingPongImage[index] = devImage[index]; + } } -} -/** - * Wrapper for the __global__ call that sets up the kernel calls and does a ton - * of memory management - */ -void pathtrace(int frame, int iter) { - const int traceDepth = hst_scene->state.traceDepth; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; - - // 2D block for generating ray from camera - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - - // 1D block for path tracing - const int blockSize1d = 128; - - /////////////////////////////////////////////////////////////////////////// - - // Pathtracing Recap: - // * Initialize array of path rays (using rays that come out of the camera) - // * You can pass the Camera object to that kernel. - // * Each path ray must carry at minimum a (ray, color) pair, - // * where color starts as the multiplicative identity, white = (1, 1, 1). - // * This has already been done for you. - // * NEW: For the first depth, generate geometry buffers (gbuffers) - // * For each depth: - // * Compute an intersection in the scene for each path ray. - // A very naive version of this has been implemented for you, but feel - // free to add more primitives and/or a better algorithm. - // Currently, intersection distance is recorded as a parametric distance, - // t, or a "distance along the ray." t = -1.0 indicates no intersection. - // * Color is attenuated (multiplied) by reflections off of any object - // * Stream compact away all of the terminated paths. - // You may use either your implementation or `thrust::remove_if` or its - // cousins. - // * Note that you can't really use a 2D kernel launch any more - switch - // to 1D. - // * Shade the rays that intersected something or didn't bottom out. - // That is, color the ray by performing a color computation according - // to the shader, then generate a new ray to continue the ray path. - // We recommend just updating the ray's PathSegment in place. - // Note that this step may come before or after stream compaction, - // since some shaders you write may also cause a path to terminate. - // * Finally: - // * if not denoising, add this iteration's results to the image - // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl - - generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); - checkCUDAError("generate camera ray"); - - int depth = 0; - PathSegment* dev_path_end = dev_paths + pixelcount; - int num_paths = dev_path_end - dev_paths; - - // --- PathSegment Tracing Stage --- - // Shoot ray into scene, bounce between objects, push shading chunks - - // Empty gbuffer - cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel)); - - // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - bool iterationComplete = false; - while (!iterationComplete) { - - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections - ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); + bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, float colWeight, float norWeight, float posWeight) + { + int pixelCount = resolutionX * resolutionY; + const int blockSize1d = 128; + dim3 numblocksPathSegmentTracing = (pixelCount + blockSize1d - 1) / blockSize1d; - if (depth == 0) { - generateGBuffer<<>>(num_paths, dev_intersections, dev_paths, dev_gBuffer); - } + - depth++; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (resolutionX + blockSize2d.x - 1) / blockSize2d.x, + (resolutionY + blockSize2d.y - 1) / blockSize2d.y); - shadeSimpleMaterials<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = depth == traceDepth; - } + int stepWidth = 1; + //int blur_iterations = ceil(glm::log2( (filterSize/25.f) * (filterSize / 25.f)) ); // This wavelet scalling is the correct option though + int blur_iterations = ceil(glm::log2( (filterSize* filterSize) / 25.f) ); // This wavelet scalling worked best for me + blur_iterations = blur_iterations <= 0 ? 1 : blur_iterations; + int colorWeight = colWeight; - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; - finalGather<<>>(num_paths, dev_image, dev_paths); + GeneratePingPongImage << < numblocksPathSegmentTracing, blockSize1d >> > (pixelCount, dev_image, dev_pingPongImage, iteration); + for (int i = 1; i < blur_iterations; i ++) + { - /////////////////////////////////////////////////////////////////////////// +#if SimpleGaussianDenoise - // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. - // Otherwise, screenshots are also acceptable. - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - checkCUDAError("pathtrace"); -} + GenerateGaussianBlur <<< numblocksPathSegmentTracing, blockSize1d >> > (pixelCount, stepWidth, dev_gausKernel, dev_offsetKernel, + dev_pingPongImage, dev_TrousImage, resolutionX, resolutionY); -// CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging. -void showGBuffer(uchar4* pbo) { - const Camera &cam = hst_scene->state.camera; - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); +#endif - // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization - gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); -} +#if ATrous1 + // 1D block for path tracing + GenerateAtrousImage << > > (pixelCount, stepWidth, dev_gausKernel, dev_offsetKernel, + dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colorWeight, norWeight, posWeight); +#endif -void showImage(uchar4* pbo, int iter) { -const Camera &cam = hst_scene->state.camera; - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); -} +#if ATrous2 + GenerateAtrousImage2 << > > (pixelCount, i,dev_gausKernel, dev_offsetKernel, + dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colWeight, norWeight, posWeight); +#endif + + stepWidth *= 2; + colorWeight /= 2; + swap(dev_pingPongImage, dev_TrousImage); + } + swap(dev_pingPongImage, dev_TrousImage); // Getting the data back to dev_TrousImage + cudaDeviceSynchronize(); + return true; + } + + void showImage(uchar4 * pbo, int iter) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); + } + + + void showDenoise(uchar4* pbo, int iter) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_TrousImage); + } diff --git a/src/pathtrace.h b/src/pathtrace.h index 9e12f44..a2791df 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -3,8 +3,11 @@ #include #include "scene.h" -void pathtraceInit(Scene *scene); +void pathtraceInit(Scene *scene, float* gausKernel); void pathtraceFree(); void pathtrace(int frame, int iteration); void showGBuffer(uchar4 *pbo); void showImage(uchar4 *pbo, int iter); +void showDenoise(uchar4 *pbo, int iter); + +bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, float colWeight, float norWeight, float posWeight); \ No newline at end of file diff --git a/src/preview.cpp b/src/preview.cpp index 3ca2718..994f657 100644 --- a/src/preview.cpp +++ b/src/preview.cpp @@ -214,7 +214,7 @@ void drawGui(int windowWidth, int windowHeight) { ImGui::Checkbox("Denoise", &ui_denoise); - ImGui::SliderInt("Filter Size", &ui_filterSize, 0, 100); + ImGui::SliderInt("Filter Size", &ui_filterSize, 0, 300); ImGui::SliderFloat("Color Weight", &ui_colorWeight, 0.0f, 10.0f); ImGui::SliderFloat("Normal Weight", &ui_normalWeight, 0.0f, 10.0f); ImGui::SliderFloat("Position Weight", &ui_positionWeight, 0.0f, 10.0f); diff --git a/src/sceneStructs.h b/src/sceneStructs.h index da7e558..131f211 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -79,4 +79,6 @@ struct ShadeableIntersection { // What information might be helpful for guiding a denoising filter? struct GBufferPixel { float t; + glm::vec3 normal; + glm::vec3 position; };