CIS565-Fall-2021 · PacosLelouch · Jan 16, 2021 · Sep 10, 2021 · Sep 10, 2021 · Sep 18, 2021
diff --git a/.gitignore b/.gitignore
@@ -558,3 +558,4 @@ xcuserdata
 *.xccheckout
 *.moved-aside
 *.xcuserstate
+!scenes/*/*.obj

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -62,6 +62,9 @@ set(GLM_ROOT_DIR "external")
 find_package(GLM REQUIRED)
 include_directories(${GLM_INCLUDE_DIRS})
 
+add_subdirectory(profile_log)
+include_directories(.)
+
 set(headers
     src/main.h
     src/image.h
@@ -73,6 +76,20 @@ set(headers
     src/sceneStructs.h
     src/preview.h
     src/utilities.h
+    src/denoise.h
+
+    src/scenestruct/texture.h
+    src/scenestruct/material.h
+    src/scenestruct/geometry.h
+
+    src/scenestruct/material.inl
+    src/scenestruct/materialPhong.inl
+    src/scenestruct/materialPerfectDielectric.inl
+    src/scenestruct/materialMicrofacetGGX.inl
+
+    src/scenestruct/geometry.inl
+
+    src/thirdparty/tiny_obj_loader.h
     )
 
 set(sources
@@ -84,6 +101,11 @@ set(sources
     src/scene.cpp
     src/preview.cpp
     src/utilities.cpp
+    src/denoise.cu
+
+    src/scene.cu
+
+    src/thirdparty/tiny_obj_loader.cc
     )
 
 set(imgui
@@ -110,10 +132,8 @@ source_group(Headers FILES ${headers})
 source_group(Sources FILES ${sources})
 source_group(imgui FILES ${imgui})
 
-#add_subdirectory(stream_compaction)  # TODO: uncomment if using your stream compaction
-
 cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers} ${imgui})
 target_link_libraries(${CMAKE_PROJECT_NAME}
     ${LIBRARIES}
-    #stream_compaction  # TODO: uncomment if using your stream compaction
+    profile_log
     )
diff --git a/README.md b/README.md
@@ -3,11 +3,169 @@ CUDA Denoiser For CUDA Path Tracer
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Xuntong Liang
+  * [LinkedIn](https://www.linkedin.com/in/xuntong-liang-406429181/), [GitHub](https://github.com/PacosLelouch), [twitter](https://twitter.com/XTL90234545).
+* Tested on: Windows 10, i7-10750H @ 2.60GHz 16GB, RTX 2070 Super with Max-Q 8192MB
 
-### (TODO: Your README)
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+
+- This project is an extension of my [CUDA Path Tracer](https://github.com/PacosLelouch/Project3-CUDA-Path-Tracer).
+
+
+
+![Overall381](img/readme/cornell_garage_kit_ADVPIPE_depth12_denoise1_filterSize4_PP01--CACHE1st--BVH.2021-10-21_13-01-14z.381samp.png)
+
+
+
+## Features
+
+
+
+### Overall
+
+- Implemented the A-trous wavelet filter.
+- Implemented the edge avoiding A-trous wavelet filter.
+- Implemented temporal sampling.
+- Implemented shared memory version.
+
+
+
+### A-Trous Wavelet Filter
+
+A-trous wavelet filter is an approximation of gaussian filter. It provides filtered images by repeated convolution with different stride of generating kernels. This process only considers the final color so it may eliminate many high frequency features of an image. 
+
+
+
+### Edge Avoiding A-Trous Wavelet Filter
+
+Edge avoiding A-trous wavelet filter takes the advantage of bilateral gaussian filter. The weight of kernels could be computed with several edge-stopping function, which takes considerations of more features such as the surface normal, the position, instead of the final color only. In this case, we should generate a G-buffer in each frame. 
+
+Here are some results that ran in two scenes, "Ceiling Light" and "Micro Facet".
+
+
+
+| Ceiling Light 20 Iterations                                  | Ceiling Light 20 Iterations with Denoising                   | Ceiling Light 200 Iterations                                 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| ![20i](img/denoising_time/cornell_ceiling_light_ADVPIPE_depth8--CACHE1st--BVH.2021-10-21_10-13-44z.20samp.png) | ![20id](img/denoising_time/cornell_ceiling_light_ADVPIPE_depth8_denoise1_filterSize4--CACHE1st--BVH.2021-10-21_09-50-19z.20samp.png) | ![200i](img/denoising_time/cornell_ceiling_light_ADVPIPE_depth8--CACHE1st--BVH.2021-10-21_10-30-34z.200samp.png) |
+
+| Micro Facet 50 Iterations                                    | Micro Facet 50 Iterations with Denoising                     | Micro Facet 1500 Iterations                                  |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| ![50i](img/denoising_time2/cornellMF_ADVPIPE_depth8--CACHE1st--BVH.2021-10-21_11-57-34z.50samp.png) | ![50id](img/denoising_time2/cornellMF_ADVPIPE_depth8_denoise1_filterSize4--CACHE1st--BVH.2021-10-21_12-00-25z.50samp.png) | ![1500i](img/denoising_time2/cornellMF_ADVPIPE_depth8--CACHE1st--BVH.2021-10-21_12-17-41z.1500samp.png) |
+
+
+
+These images show the effect of the filter. As a result of using the filter, we can get a less noisy image with clear edges within a few iterations, which is much faster than waiting for a lot of iterations without denoising to get a less noisy image. 
+
+
+
+### Temporal Filter
+
+If the camera, the lights or the objects moves, we can also take advantage of the spatial or temporal continuity of a sequence of images, which means that we can use historical data for denoising (temporal accumulation) as long as we can find the corresponding filtered pixel of the target noisy pixel in history (reprojection). The temporal filter process can be divided into two parts: reprojection and temporal accumulation.
+
+
+
+| Ceiling Light without Temporal        | Ceiling Light with Temporal       |
+| ------------------------------------- | --------------------------------- |
+| ![CLnoT](img/readme/CLnoTemporal.gif) | ![CLT](img/readme/CLTemporal.gif) |
+
+| Micro Facet without Temporal          | Micro Facet with Temporal         |
+| ------------------------------------- | --------------------------------- |
+| ![MFnoT](img/readme/MFnoTemporal.gif) | ![MFT](img/readme/MFTemporal.gif) |
+
+
+
+I have not implemented the complete version of SVGF and do not separate direct illumination and indirect illumination, so it cannot reach the performance that SVGF does. But the figures above show that with temporal filter, we can keep much filtered data while moving the camera as much as possible. 
+
+It also shows that one of the disadvantage of temporal filter is the lagging of the reflected pixels. In scene "Micro Facet", even though the changing of the image is smoother with temporal filter, the reflected pixels appears lagging. 
+
+
+
+### Shared Memory Optimization
+
+For each pixel, the filter process reads several neighboring pixels to compute a final value, so there are many pixels read for several times in each blocks. This process is likely to benefit from shared memory. 
+
+
+
+## Performance Analysis
+
+
+
+### Filter Size and Resolution
+
+I did the performance analysis with the two scenes and different filter size. 
+
+Notice that `the actual filter size = 2 * the parameter of filter size + 1`.
+
+
+
+![PA_FilterSize](img/readme/DenoisingTimeWithDifferentFilterSize.png)
+
+
+
+With the filter size increased, the duration of each frame becomes longer. It is obvious because it needs more wavelet filters if the filter size is larger. It is also obvious that it spends more time for larger resolutions. 
+
+So how do the filter size influence the image? In a diffuse scene, increasing filter size may have little effect ("Ceiling Light"), while in a scene with more specular objects, or with smaller lights, it still affect much ("Micro Facet"). Here are some results with filter size greater than or equal to 4.
+
+
+
+| Ceiling Light Filter Size 4                                  | Ceiling Light Filter Size 8                                  | Ceiling Light Filter Size 16                                 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| ![CLFS4](img/denoising_time/cornell_ceiling_light_ADVPIPE_depth8_denoise1_filterSize4--CACHE1st--BVH.2021-10-21_09-50-19z.20samp.png) | ![CLFS8](img/denoising_time/cornell_ceiling_light_ADVPIPE_depth8_denoise1_filterSize8--CACHE1st--BVH.2021-10-21_11-13-07z.20samp.png) | ![CLFS16](img/denoising_time/cornell_ceiling_light_ADVPIPE_depth8_denoise1_filterSize16--CACHE1st--BVH.2021-10-21_11-13-19z.20samp.png) |
+
+
+
+| Micro Facet Filter Size 4                                    | Micro Facet Filter Size 8                                    | Micro Facet Filter Size 16                                   |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| ![CLFS4](img/denoising_time2/cornellMF_ADVPIPE_depth8_denoise1_filterSize4--CACHE1st--BVH.2021-10-21_12-00-25z.50samp.png) | ![CLFS8](img/denoising_time2/cornellMF_ADVPIPE_depth8_denoise1_filterSize8--CACHE1st--BVH.2021-10-21_12-00-39z.50samp.png) | ![CLFS16](img/denoising_time2/cornellMF_ADVPIPE_depth8_denoise1_filterSize16--CACHE1st--BVH.2021-10-21_12-01-00z.50samp.png) |
+
+
+
+We can see that with the filter size larger, the light is more blurry in the scene "Micro Facet".
+
+
+
+### Material Type
+
+We can also infer that, the filter makes the glossy material more diffuse, or overblurred, as is also shown in the overall image. Also, I believe that the denoising effect on diffuse objects is better, so I think that we can also add considerations about some properties of materials such as roughness, specular, in the filter.
+
+
+
+### Shared Memory
+
+I find my shared memory optimization only gets faster with the first two sweep of the wavelet filter, but gets slower with the third sweep. This disadvantage may comes from the large size of the G-buffer pixel. It even causes that I cannot make the block size and the filter size larger, otherwise there will be invalid arguments of calling the denoising kernels. According to my test result, the maximum filter size is 4 with the default 8x8 block size. However, this block size is the minimum size so the filter size cannot be larger unless I should optimize the G-buffer.
+
+
+
+![DenoisingTimeWithAndWithoutSharedMemory](img/readme/DenoisingTimeWithAndWithoutSharedMemory.png)
+
+
+
+### Temporal Filter
+
+The aim of the temporal filter is to keep the historical data for denoising as much as possible, for interactive rendering applications, which means that we should get an acceptable render result in real-time. If it cost too much, it still be problem for real-time applications. 
+
+In previous chapter, we can see that when the camera moves around, the duration of rendering is still acceptable. Next I provide analysis on temporal filter, compared with no denoising, and applying spatial filter only.
+
+
+
+![DenoisingTimeWithAndWithoutTemporalFilter](img/readme/DenoisingTimeWithAndWithoutTemporalFilter.png)
+
+
+
+This figure shows that the temporal filter is much faster than spatial filter, so it hardly makes the denoising much slower. 
+
+
+
+## Changes
+
+### CMakeList Changes
+
+I add a new project `log_profile` for logging the profile (like CUDA Path Tracer).
+
+
+
+## Reference
+
+1. [Edge-Avoiding A-Trous Wavelet Transform for fast Global Illumination Filtering](https://jo.dreggn.org/home/2010_atrous.pdf)
+2. [Spatiotemporal Variance-Guided Filtering](https://research.nvidia.com/publication/2017-07_Spatiotemporal-Variance-Guided-Filtering%3A)
 
diff --git a/cmake/CUDAComputesList.cmake b/cmake/CUDAComputesList.cmake
@@ -60,6 +60,8 @@ IF(    CUDA_COMPUTE_20
     OR CUDA_COMPUTE_70
     OR CUDA_COMPUTE_72
     OR CUDA_COMPUTE_75
+    OR CUDA_COMPUTE_80
+    OR CUDA_COMPUTE_86
     )
     SET(FALLBACK OFF)
 ELSE()
@@ -70,8 +72,8 @@ LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
 IF(${COMPUTES_LEN} EQUAL 0 AND ${FALLBACK})
     MESSAGE(STATUS "You can use -DCOMPUTES_DETECTED_LIST=\"AB;XY\" (semicolon separated list of CUDA Compute versions to enable the specified computes")
     MESSAGE(STATUS "Individual compute versions flags are also available under CMake Advance options")
-    LIST(APPEND COMPUTES_DETECTED_LIST "30" "50" "60" "70")
-    MESSAGE(STATUS "No computes detected. Fall back to 30, 50, 60 70")
+    LIST(APPEND COMPUTES_DETECTED_LIST "30" "50" "60" "70" "80")
+    MESSAGE(STATUS "No computes detected. Fall back to 30, 50, 60, 70, 80")
 ENDIF()
 
 LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
@@ -90,7 +92,7 @@ MACRO(SET_COMPUTE VERSION)
 ENDMACRO(SET_COMPUTE)
 
 # Iterate over compute versions. Create variables and enable computes if needed
-FOREACH(VER 20 30 32 35 37 50 52 53 60 61 62 70 72 75)
+FOREACH(VER 20 30 32 35 37 50 52 53 60 61 62 70 72 75 80 86)
     OPTION(CUDA_COMPUTE_${VER} "CUDA Compute Capability ${VER}" OFF)
     MARK_AS_ADVANCED(CUDA_COMPUTE_${VER})
     IF(${CUDA_COMPUTE_${VER}})

diff --git a/cmake/FindGLFW.cmake b/cmake/FindGLFW.cmake
@@ -20,66 +20,66 @@
 include(FindPackageHandleStandardArgs)
 
 if (WIN32)
-	# Find include files
-	find_path(
-		GLFW_INCLUDE_DIR
-		NAMES GLFW/glfw3.h
-		PATHS
-		$ENV{PROGRAMFILES}/include
-		${GLFW_ROOT_DIR}/include
-		DOC "The directory where GLFW/glfw.h resides")
+  # Find include files
+  find_path(
+    GLFW_INCLUDE_DIR
+    NAMES GLFW/glfw3.h
+    PATHS
+    $ENV{PROGRAMFILES}/include
+    ${GLFW_ROOT_DIR}/include
+    DOC "The directory where GLFW/glfw.h resides")
 
-	# Use glfw3.lib for static library
-	if (GLFW_USE_STATIC_LIBS)
-		set(GLFW_LIBRARY_NAME glfw3)
-	else()
-		set(GLFW_LIBRARY_NAME glfw3dll)
-	endif()
+  # Use glfw3.lib for static library
+  if (GLFW_USE_STATIC_LIBS)
+    set(GLFW_LIBRARY_NAME glfw3)
+  else()
+    set(GLFW_LIBRARY_NAME glfw3dll)
+  endif()
 
-	# Find library files
-	find_library(
-		GLFW_LIBRARY
-		NAMES ${GLFW_LIBRARY_NAME}
-		PATHS
-		$ENV{PROGRAMFILES}/lib
-		${GLFW_ROOT_DIR}/lib)
+  # Find library files
+  find_library(
+    GLFW_LIBRARY
+    NAMES ${GLFW_LIBRARY_NAME}
+    PATHS
+    $ENV{PROGRAMFILES}/lib
+    ${GLFW_ROOT_DIR}/lib)
 
-	unset(GLFW_LIBRARY_NAME)
+  unset(GLFW_LIBRARY_NAME)
 else()
-	# Find include files
-	find_path(
-		GLFW_INCLUDE_DIR
-		NAMES GLFW/glfw.h
-		PATHS
-		/usr/include
-		/usr/local/include
-		/sw/include
-		/opt/local/include
-		DOC "The directory where GL/glfw.h resides")
+  # Find include files
+  find_path(
+    GLFW_INCLUDE_DIR
+    NAMES GLFW/glfw.h
+    PATHS
+    /usr/include
+    /usr/local/include
+    /sw/include
+    /opt/local/include
+    DOC "The directory where GL/glfw.h resides")
 
-	# Find library files
-	# Try to use static libraries
-	find_library(
-		GLFW_LIBRARY
-		NAMES glfw3
-		PATHS
-		/usr/lib64
-		/usr/lib
-		/usr/local/lib64
-		/usr/local/lib
-		/sw/lib
-		/opt/local/lib
-		${GLFW_ROOT_DIR}/lib
-		DOC "The GLFW library")
+  # Find library files
+  # Try to use static libraries
+  find_library(
+    GLFW_LIBRARY
+    NAMES glfw3
+    PATHS
+    /usr/lib64
+    /usr/lib
+    /usr/local/lib64
+    /usr/local/lib
+    /sw/lib
+    /opt/local/lib
+    ${GLFW_ROOT_DIR}/lib
+    DOC "The GLFW library")
 endif()
 
 # Handle REQUIRD argument, define *_FOUND variable
 find_package_handle_standard_args(GLFW DEFAULT_MSG GLFW_INCLUDE_DIR GLFW_LIBRARY)
 
 # Define GLFW_LIBRARIES and GLFW_INCLUDE_DIRS
 if (GLFW_FOUND)
-	set(GLFW_LIBRARIES ${OPENGL_LIBRARIES} ${GLFW_LIBRARY})
-	set(GLFW_INCLUDE_DIRS ${GLFW_INCLUDE_DIR})
+  set(GLFW_LIBRARIES ${OPENGL_LIBRARIES} ${GLFW_LIBRARY})
+  set(GLFW_INCLUDE_DIRS ${GLFW_INCLUDE_DIR})
 endif()
 
 # Hide some variables