Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8421472
Remove unused freeglut copyrights
shehzan10 Jan 16, 2021
a79ca55
Update CUDA Computes List
shehzan10 Sep 10, 2021
78e028a
Retab
shehzan10 Sep 10, 2021
c03780d
Update instructions for Fall 2021
shehzan10 Sep 18, 2021
80ad206
Add vulkan option
shehzan10 Sep 18, 2021
cc3da39
Add sample readmes for inspiration
shehzan10 Sep 19, 2021
784f5ca
Merge pull request #1 from CIS565-Fall-2021/update-instructions-2021
shehzan10 Sep 21, 2021
6d7e696
Added Imgui Integration Files
codeplay9800 Sep 19, 2022
8462d52
Updated Instruction.md
codeplay9800 Sep 19, 2022
c44fccb
Update Instruction.md
codeplay9800 Sep 19, 2022
7165f5c
Merge branch 'main' into Integrate_Imgui
codeplay9800 Sep 19, 2022
26e8bf3
Imgui Integrated
codeplay9800 Sep 20, 2022
3dac24e
Added GUIDataContainer Class
codeplay9800 Sep 20, 2022
e666e6a
Removed ImGUI Cmake
codeplay9800 Sep 21, 2022
5000086
Update INSTRUCTION.md
shehzan10 Sep 21, 2022
98f098c
Fix a bug that causes MouseOverImGuiWindow() to not work
dw218192 Sep 22, 2022
16d6638
Merge pull request #1 from dw218192/patch-1
codeplay9800 Sep 22, 2022
6c3b5a5
Update README.md
eyadNabeel Sep 23, 2022
0887259
Core Pathtracer Implemented
eyadNabeel Oct 5, 2022
83baac0
refraction -- schlick approximation
eyadNabeel Oct 6, 2022
cc815d8
Added antialiasing and thin lens ray generation model
eyadNabeel Oct 7, 2022
08e16a2
Direct Light Implementation
eyadNabeel Oct 7, 2022
18e45a6
Motion Blur
eyadNabeel Oct 8, 2022
7541453
readme
eyadNabeel Oct 11, 2022
6e83942
readme
eyadNabeel Oct 11, 2022
0ab8270
added imgui elements
eyadNabeel Oct 21, 2022
2cce266
implemented gBuffer and added Atrous blur
eyadNabeel Oct 22, 2022
c1f1558
implemented Atrois Denoiser
eyadNabeel Oct 23, 2022
46ef0bc
Performance testing and readme update
eyadNabeel Oct 23, 2022
fa929db
Update README.md
eyadNabeel Oct 23, 2022
526626d
Update README.md
eyadNabeel Oct 23, 2022
1c2273c
Update README.md
eyadNabeel Oct 23, 2022
19d6c92
Update README.md
eyadNabeel Oct 23, 2022
19a8e59
Update README.md
eyadNabeel Oct 23, 2022
8479f66
Update README.md
eyadNabeel Oct 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ set(headers
src/sceneStructs.h
src/preview.h
src/utilities.h
src/ImGui/imconfig.h

src/ImGui/imgui.h
src/ImGui/imconfig.h
src/ImGui/imgui_impl_glfw.h
src/ImGui/imgui_impl_opengl3.h
src/ImGui/imgui_impl_opengl3_loader.h
src/ImGui/imgui_internal.h
src/ImGui/imstb_rectpack.h
src/ImGui/imstb_textedit.h
src/ImGui/imstb_truetype.h
)

set(sources
Expand All @@ -84,6 +95,14 @@ set(sources
src/scene.cpp
src/preview.cpp
src/utilities.cpp

src/ImGui/imgui.cpp
src/ImGui/imgui_demo.cpp
src/ImGui/imgui_draw.cpp
src/ImGui/imgui_impl_glfw.cpp
src/ImGui/imgui_impl_opengl3.cpp
src/ImGui/imgui_tables.cpp
src/ImGui/imgui_widgets.cpp
)

list(SORT headers)
Expand All @@ -92,6 +111,7 @@ list(SORT sources)
source_group(Headers FILES ${headers})
source_group(Sources FILES ${sources})

#add_subdirectory(src/ImGui)
#add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction

cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers})
Expand Down
183 changes: 88 additions & 95 deletions INSTRUCTION.md

Large diffs are not rendered by default.

117 changes: 112 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,118 @@ CUDA Path Tracer

**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**

* (TODO) YOUR NAME HERE
* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
* Eyad Almoamen
* [LinkedIn](https://www.linkedin.com/in/eyadalmoamen/), [personal website](https://eyadnabeel.com)
* Tested on: Windows 11, i7-10750H CPU @ 2.60GHz 2.59 GHz 16GB, RTX 2070 Super Max-Q Design 8GB (Personal Computer)

### (TODO: Your README)
Introduction
================
I've built a GPU accelerated monte carlo path tracer using CUDA and C++. The parallelization is happening on a ray-by-ray basis, with the terminated rays being eliminated via stream compaction and sorted by material type in order to avoid warp divergence. The path tracer takes in a scene description .txt file and outputs a rendered image.

Features implemented include:

* [Specular Reflective Material](#specular-reflective-material)
* [Refractive Material](#refractive-material)
* [Thin Lens Model DOF](#thin-lens-model-dof)
* [Motion Blur](#motion-blur)
* [Stochastic Antialiasing](#stochastic-antialiasing)
* [Direct Lighting](#direct-lighting)
* [Denoising](#denoising)

## Specular Reflective Material
The specular reflective material either reflects light perfectly (incident angle == exitent angle), or diffusely, the rate of each is manually set and the two percentages sum up to 100% (for example, if the material was 63% specular, it'd have to be 37% diffuse):

<img align="center" src="img/cornell.2022-10-11_03-01-03z.11379samp.png" width=50% height=50%>

## Refractive Material
The specular refractive material either reflects light or transmits it according to [Snell's Law](https://en.wikipedia.org/wiki/Snell%27s_law), the rate of each is based on the material type and index of refration. This is usually calculated by the [Fresnel Equations](https://en.wikipedia.org/wiki/Fresnel_equations), however, here I use the [Schlick approximation](https://en.wikipedia.org/wiki/Schlick%27s_approximation) to calculate the rates as it's more computationally efficient with a very low error rate:

<img align="center" src="img/cornell.2022-10-11_02-20-06z.5201samp.png" width=50% height=50%>

<img align="center" src="img/cornell.2022-10-11_00-50-38z.5598samp.png" width=50% height=50%>

## Thin Lens Model DOF
I utilized the [Thin Lens Model](https://pbr-book.org/3ed-2018/Camera_Models/Projective_Camera_Models#TheThinLensModelandDepthofField) in order to replace the pinhole camera we have with a more realistic virtual lens which allows me to introduce depth of field effects and bokeh:

| Focal Distance | 0 | 3 | 8.5 | 20.5 |
| :------- | :-------: | :-------: | :-------: | :-------: |
| Iterations | 7759 | 5082 | 5142 | 5009 |
| Scene | <img src="img/cornell.2022-10-11_02-43-13z.7759samp.png"> | <img src="img/cornell.2022-10-11_01-23-17z.5082samp.png"> | <img src="img/cornell.2022-10-10_23-09-12z.5142samp.png"> | <img src="img/cornell.2022-10-11_01-07-49z.5009samp.png"> |

## Motion Blur
I added a velocity component to the geometry struct and that allows me to render the image in such a way that it seems the object is moving in the direction of the velocity:

## Stochastic Antialiasing
I added support for stochastic antialiasing by jittering the ray produced from the camera randomly within the range of a pixel length:

| Antialiasing | Without | With |
| :------- | :-------: | :-------: |
| Scene | <img src="img/cornell.2022-10-11_03-38-02z.1000samp.png"> | <img src="img/cornell.2022-10-11_03-40-14z.1000samp.png"> |
| Scene | <img src="img/cornell.2022-10-11_03-54-58z.1000samp.png"> | <img src="img/cornell.2022-10-11_03-53-19z.1000samp.png"> |

## Direct Lighting
To optimize the result and speed up the convergence of the image, I had the pathtracer trace its last ray to a light source in the scene, guaranteeing that we get light contribution. To demonstrate, I've rendered the same scene up to 1000 iterations with and without direct lighting:

| Direct Lighting | Without | With |
| :------- | :-------: | :-------: |
| Scene | <img src="img/"> | <img src="img/"> |

## Denoising
In order to be able to get an acceptable render faster, I've implemented the [Edge Avoiding À-Trous Wavelet Transform](https://jo.dreggn.org/home/2010_atrous.pdf) denoising function. The basic idea is to apply blur to the image while preserving edges between different objects and materials to create the impression of a converged image. Features such as geometry position, surface normal, and material color are used to detect edges between objects and from there apply a blur kernel to the image with varying filter sizes:

| Render | Distance | Position | Normal | Material Color |
| :------- | :-------: | :-------: | :------- | :-------: |
| <img src="img/cornelldenoise1000.png"> | <img src="img/distbuffer_cornell.png"> | <img src="img/posbuffer_cornell.png"> | <img src="img/norbuffer_cornell.png"> | <img src="img/colbuffer_cornell.png"> |
| <img src="img/3matdenoise1000.png"> | <img src="img/distbuffer_3mat.png"> | <img src="img/posbuffer_3mat.png"> | <img src="img/norbuffer_3mat.png"> | <img src="img/colbuffer_3mat.png"> |
| <img src="img/thinlensdenoise1000.png"> | <img src="img/distbuffer_thinlens.png"> | <img src="img/posbuffer_thinlens.png"> | <img src="img/norbuffer_thinlens.png"> | <img src="img/colbuffer_thinlens.png"> |

In general I've found that with filter size 2 or 3, I can get feasible results within 1000 iterations where it usually takes 5000:

| Scene | <img src="img/cornell1000.png"> | <img src="img/cornell5000.png"> | <img src="img/cornelldenoise1000.png"> |
| :------- | :-------: | :-------: | :------- |
| Denoised | No | No | Yes |
| Iterations | 1000 | 5000 | 1000 |

| Scene | <img src="img/3mat1000.png"> | <img src="img/3mat5000.png"> | <img src="img/3matdenoise1000.png"> |
| :------- | :-------: | :-------: | :------- |
| Denoised | No | No | Yes |
| Iterations | 1000 | 5000 | 1000 |

| Scene | <img src="img/thinlens1000.png"> | <img src="img/thinlens5000.png"> | <img src="img/thinlensdenoise1000.png"> |
| :------- | :-------: | :-------: | :------- |
| Denoised | No | No | Yes |
| Iterations | 1000 | 5000 | 1000 |

Here I've rendered the same scene with different filter sizes to illustrate the effect. All renders ran for 1000 iterations:

| Filter Size | 1 | 2 | 3 | 4 | 8 |
| :------- | :-------: | :-------: | :------- | :-------: | :-------: |
| Scene | <img src="img/flitersize1.png"> | <img src="img/flitersize2.png"> | <img src="img/flitersize3.png"> | <img src="img/flitersize4.png"> | <img src="img/flitersize8.png"> |

As we can see here, one of the limitations of this approach is loss of data when it comes to specular surfaces or refractive surfaces, although this can be potentially counteracted by making more bounces for specular materials until we reach a diffuse surface. Of course if the surface is imperfect specular there's no need to do that as the result won't vary that much.

Performance Testing
================
I ran a few tests to see the effect of some of the optimizations I've performed on this path tracer:

The effect of caching is very much evident and it increases as the size of the image increases:

<img align="center" src="img/cachingchart.png" width=50% height=50%>

This is because we're precomputing a potentially very large computation, sparing ourselves the trouble for upcoming iterations

The effect of material sorting doesn't seem to be too encouraging; initially I tried testing it on a scene with one material, it wasn't an improvement (since we'd be sorting to avoid nonexistent warp divergence). However I switched to a scene with diffuse, reflective, and refractive material to no avail:

<img align="center" src="img/materialsortchart.png" width=50% height=50%>

Denoising the image predictably increases running time (assuming all else is constant) which makes sense since for every iteration, it has to apply the kernel and weights to the image. The results are as follows:

<img align="center" src="img/denoiserperformance.png" width=50% height=50%>

The results are more or less what you'd expect, approximately a linear factor slower with denoising. After that, I tested the effect of filter size on running time. My expectation was to see running time increasing linearly with filter size since you have to do that same amount of computation once more for every time you increment filter size. While I was testing filter size, I also tested the effect of using `#pragma unroll` on the `atrousDenoise()` function. The results were as follows:

<img align="center" src="img/flitersize.png" width=50% height=50%>

*DO NOT* leave the README to the last minute! It is a crucial part of the
project, and we will not be able to grade you without a good README.
Initially, as I was increasing filter size, the amount of time it took was relatively flat. While that could've been linear with a small factor, it could just be measurement error. So I started using logarithmic intervals, and for a few iterations, it remained flat. When I hit filter size 32 I started to see a solid increase in running time, however it seemed it could be linear (on logarithmic intervals), however when I reached filter size 128, the exponential increase started to become clearer. So it seems the relationship between filter size and running time is linear with a small constant factor.

As far as unrolling goes, it did introduce a consistent imrovement in running time, however it wasn't very substantial.
8 changes: 5 additions & 3 deletions cmake/CUDAComputesList.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ IF( CUDA_COMPUTE_20
OR CUDA_COMPUTE_70
OR CUDA_COMPUTE_72
OR CUDA_COMPUTE_75
OR CUDA_COMPUTE_80
OR CUDA_COMPUTE_86
)
SET(FALLBACK OFF)
ELSE()
Expand All @@ -70,8 +72,8 @@ LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
IF(${COMPUTES_LEN} EQUAL 0 AND ${FALLBACK})
MESSAGE(STATUS "You can use -DCOMPUTES_DETECTED_LIST=\"AB;XY\" (semicolon separated list of CUDA Compute versions to enable the specified computes")
MESSAGE(STATUS "Individual compute versions flags are also available under CMake Advance options")
LIST(APPEND COMPUTES_DETECTED_LIST "30" "50" "60" "70")
MESSAGE(STATUS "No computes detected. Fall back to 30, 50, 60 70")
LIST(APPEND COMPUTES_DETECTED_LIST "30" "50" "60" "70" "80")
MESSAGE(STATUS "No computes detected. Fall back to 30, 50, 60, 70, 80")
ENDIF()

LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
Expand All @@ -90,7 +92,7 @@ MACRO(SET_COMPUTE VERSION)
ENDMACRO(SET_COMPUTE)

# Iterate over compute versions. Create variables and enable computes if needed
FOREACH(VER 20 30 32 35 37 50 52 53 60 61 62 70 72 75)
FOREACH(VER 20 30 32 35 37 50 52 53 60 61 62 70 72 75 80 86)
OPTION(CUDA_COMPUTE_${VER} "CUDA Compute Capability ${VER}" OFF)
MARK_AS_ADVANCED(CUDA_COMPUTE_${VER})
IF(${CUDA_COMPUTE_${VER}})
Expand Down
96 changes: 48 additions & 48 deletions cmake/FindGLFW.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,66 +20,66 @@
include(FindPackageHandleStandardArgs)

if (WIN32)
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw3.h
PATHS
$ENV{PROGRAMFILES}/include
${GLFW_ROOT_DIR}/include
DOC "The directory where GLFW/glfw.h resides")
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw3.h
PATHS
$ENV{PROGRAMFILES}/include
${GLFW_ROOT_DIR}/include
DOC "The directory where GLFW/glfw.h resides")

# Use glfw3.lib for static library
if (GLFW_USE_STATIC_LIBS)
set(GLFW_LIBRARY_NAME glfw3)
else()
set(GLFW_LIBRARY_NAME glfw3dll)
endif()
# Use glfw3.lib for static library
if (GLFW_USE_STATIC_LIBS)
set(GLFW_LIBRARY_NAME glfw3)
else()
set(GLFW_LIBRARY_NAME glfw3dll)
endif()

# Find library files
find_library(
GLFW_LIBRARY
NAMES ${GLFW_LIBRARY_NAME}
PATHS
$ENV{PROGRAMFILES}/lib
${GLFW_ROOT_DIR}/lib)
# Find library files
find_library(
GLFW_LIBRARY
NAMES ${GLFW_LIBRARY_NAME}
PATHS
$ENV{PROGRAMFILES}/lib
${GLFW_ROOT_DIR}/lib)

unset(GLFW_LIBRARY_NAME)
unset(GLFW_LIBRARY_NAME)
else()
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw.h
PATHS
/usr/include
/usr/local/include
/sw/include
/opt/local/include
DOC "The directory where GL/glfw.h resides")
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw.h
PATHS
/usr/include
/usr/local/include
/sw/include
/opt/local/include
DOC "The directory where GL/glfw.h resides")

# Find library files
# Try to use static libraries
find_library(
GLFW_LIBRARY
NAMES glfw3
PATHS
/usr/lib64
/usr/lib
/usr/local/lib64
/usr/local/lib
/sw/lib
/opt/local/lib
${GLFW_ROOT_DIR}/lib
DOC "The GLFW library")
# Find library files
# Try to use static libraries
find_library(
GLFW_LIBRARY
NAMES glfw3
PATHS
/usr/lib64
/usr/lib
/usr/local/lib64
/usr/local/lib
/sw/lib
/opt/local/lib
${GLFW_ROOT_DIR}/lib
DOC "The GLFW library")
endif()

# Handle REQUIRD argument, define *_FOUND variable
find_package_handle_standard_args(GLFW DEFAULT_MSG GLFW_INCLUDE_DIR GLFW_LIBRARY)

# Define GLFW_LIBRARIES and GLFW_INCLUDE_DIRS
if (GLFW_FOUND)
set(GLFW_LIBRARIES ${OPENGL_LIBRARIES} ${GLFW_LIBRARY})
set(GLFW_INCLUDE_DIRS ${GLFW_INCLUDE_DIR})
set(GLFW_LIBRARIES ${OPENGL_LIBRARIES} ${GLFW_LIBRARY})
set(GLFW_INCLUDE_DIRS ${GLFW_INCLUDE_DIR})
endif()

# Hide some variables
Expand Down
44 changes: 22 additions & 22 deletions cmake/FindGLM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Find GLM
#
# Try to find GLM : OpenGL Mathematics.
# This module defines
# This module defines
# - GLM_INCLUDE_DIRS
# - GLM_FOUND
#
# The following variables can be set as arguments for the module.
# - GLM_ROOT_DIR : Root library directory of GLM
# - GLM_ROOT_DIR : Root library directory of GLM
#
# References:
# - https://github.com/Groovounet/glm/blob/master/util/FindGLM.cmake
Expand All @@ -18,34 +18,34 @@
include(FindPackageHandleStandardArgs)

if (WIN32)
# Find include files
find_path(
GLM_INCLUDE_DIR
NAMES glm/glm.hpp
PATHS
$ENV{PROGRAMFILES}/include
${GLM_ROOT_DIR}/include
DOC "The directory where glm/glm.hpp resides")
# Find include files
find_path(
GLM_INCLUDE_DIR
NAMES glm/glm.hpp
PATHS
$ENV{PROGRAMFILES}/include
${GLM_ROOT_DIR}/include
DOC "The directory where glm/glm.hpp resides")
else()
# Find include files
find_path(
GLM_INCLUDE_DIR
NAMES glm/glm.hpp
PATHS
/usr/include
/usr/local/include
/sw/include
/opt/local/include
${GLM_ROOT_DIR}/include
DOC "The directory where glm/glm.hpp resides")
# Find include files
find_path(
GLM_INCLUDE_DIR
NAMES glm/glm.hpp
PATHS
/usr/include
/usr/local/include
/sw/include
/opt/local/include
${GLM_ROOT_DIR}/include
DOC "The directory where glm/glm.hpp resides")
endif()

# Handle REQUIRD argument, define *_FOUND variable
find_package_handle_standard_args(GLM DEFAULT_MSG GLM_INCLUDE_DIR)

# Define GLM_INCLUDE_DIRS
if (GLM_FOUND)
set(GLM_INCLUDE_DIRS ${GLM_INCLUDE_DIR})
set(GLM_INCLUDE_DIRS ${GLM_INCLUDE_DIR})
endif()

# Hide some variables
Expand Down
Loading