diff --git a/CMakeLists.txt b/CMakeLists.txt index c473e2c..77e7c4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,9 +71,11 @@ set(headers src/pathtrace.h src/scene.h src/sceneStructs.h + src/tiny_obj_loader.h src/preview.h src/utilities.h src/ImGui/imconfig.h + src/lbvh.h src/ImGui/imgui.h src/ImGui/imconfig.h @@ -95,6 +97,7 @@ set(sources src/scene.cpp src/preview.cpp src/utilities.cpp + src/lbvh.cu src/ImGui/imgui.cpp src/ImGui/imgui_demo.cpp @@ -111,6 +114,7 @@ list(SORT sources) source_group(Headers FILES ${headers}) source_group(Sources FILES ${sources}) +#add_subdirectory(src/libmorton) #add_subdirectory(src/ImGui) #add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction diff --git a/README.md b/README.md index 110697c..10fdb2b 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,396 @@ CUDA Path Tracer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Megan Reddy + * [LinkedIn](https://www.linkedin.com/in/meganr25a949125/), [personal website](https://meganr28.github.io/) +* Tested on: Windows 10, AMD Ryzen 9 5900HS with Radeon Graphics @ 3301 MHz 16GB, NVIDIA GeForce RTX 3060 Laptop GPU 6GB (Personal Computer) +* Compute Capability: 8.6 -### (TODO: Your README) +### Overview -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +**Path tracing** is a an algorithm for synthesizing images by simulating the physical properties of light. Some effects +that you can obtain for free in a path tracer include global illumination, caustics, soft shadows, motion blur, and depth-of-field. +Generating a converged, noise-free image involves tracing millions of rays for each scene. Tracing ray paths is an "embarrassingly parallel" +problem, meaning that it can be easily converted into a parallel task on the GPU. + +This path tracer is implemented for the GPU using NVIDIA's CUDA API. It parallelizes several steps that would run serially on the CPU, +greatly reducing rendering time from hours to minutes. This includes generating camera rays, computing ray intersections for each pixel, +shading, and writing to the output image. + +

+ +

+

A custom-made model of a violin mesh with reflective and refractive spheres (183,024 triangles)

+ +#### Features Implemented + +- Core Features + * Shading kernel for ideal diffuse, perfectly specular, and imperfectly specular surfaces + * Path continuation/termination with stream compaction + * Material sorting + * Caching first bounce intersections +- Extra Features + * Performance + * Linear Bounding Volume Hierarchy (LBVH) + * Bounding Volume Hierarchy (with Midpoint and SAH split methods) + * Russian roulette ray termination + * Visual + * Refraction (with Fresnel) + * Depth-of-field + * Stochastic sampled anti-aliasing + * Arbitrary mesh loading (with bounding box culling) + * Reinhard operator and gamma correction (conversion to sRGB) + +### Usage + +#### Loading and Running a Scene + +In order to run a `.txt` file from the `scenes` folder, you must provide it as a command line argument. You can do this two ways: +* Call the program with the argument: `cis565_path_tracer scenes/sphere.txt` +* In Visual Studio, navigate to your project `Properties` and select `Configuration Properties -> Debugging -> Command Arguments` and provide the path to the scene file: + `../scenes/sphere.txt`. Note that you may need to provide the full path instead of the relative path. + +#### Macros +There are several macros that enable the user to test out different performance features and visual improvements. +They are located in the following files: + +##### pathtrace.cu + +* `ANTIALIASING` - jitter the camera ray direction to remove jagged edges from shapes. +* `MATERIAL_SORT_` - sort materials so that the same materials are contiguous in memory. +* `STREAM_COMPACTION_` - remove rays that have terminated early. +* `CACHE_FIRST_BOUNCE` - cache first bounce intersections to use in subsequent iterations. + +##### utilities.h + +I have added these two macros to easily enable/disable features in this file. + +* `ENABLE` 1 +* `DISABLE` 0 + +An example usage would be `RUSSIAN_ROULETTE ENABLE`. + +* `RUSSIAN_ROULETTE` - turn on to use russian roulette ray termination. +* `CONVERT_TO_SRGB` - convert final image from HDR to sRGB. +* `BB_CULLING` - first check if a ray intersects the bounding volume encompassing a mesh. If so, check all the triangles within the mesh. +* `USE_LBVH` - use a Linear Bounding Volume Hierarchy to accelerate intersection testing. +* `USE_BVH` - use a standard Bounding Volume Hierarchy to accelerate intersection testing. One of the split methods below must also be enabled. +* `USE_BVH_MIDPOINT` - split BVH based on midpoint of primitives' centroids. +* `USE_BVH_SAH` - split BVH based on Surface Area Heuristic. +* `DISPLAY_HEATMAP` - enable heatmap visualization of BVH to diagnose areas with most intersections. + +### Visual Features + +#### Materials + +In path tracing, we sample a Bidirectional Scattering Distribution Function (BSDF) to choose a direction for the ray to bounce. +This function describes the probability that light incoming along a ray `wi` will leave along a direction `wo`. In this implementation, +there are four material types available: diffuse, perfectly specular reflective, imperfectly specular reflective, and specular transmissive. + +##### Diffuse + +Perfectly diffuse surfaces scatter illumination equally in all directions. In order to choose a new ray direction, we choose a random +direction within the hemisphere centered about the normal (see `calculateRandomDirectionInHemisphere`). + +![](img/final/diffuse.PNG) + +##### Perfect Specular Reflective + +Perfectly specular reflective surfaces only have one outgoing direction that contributes light energy. This single direction +is computed using `glm::reflect` and is the reflection of `wo` about the surface normal. + +![](img/final/perfect_specular.PNG) + +##### Imperfect Specular Reflective + +Imperfectly specular surfaces commonly include microfacet materials, however I opted to implement a more plastic-like material. To do +this, I generated a random variable `xi` between 0 to 1. This represents the probability of choosing the Diffuse BRDF or Specular BRDF. +We use this variable to choose one and then scatter the ray in that direction, multiplying the ray's throughput by the Fresnel term and diffuse or specular +color, depending on which branch was chosen. + +![](img/final/imperfect_specular.PNG) + +##### Refraction (with Fresnel) + +For specular transmission, we use Snell's Law to compute the direction of the transmitted ray. If the angle of incidence is greater than a critical angle, +we must reflect the ray instead. In the code, we check the return value of `glm::refract` to determine whether we should reflect or not. Additionally, +we compute the Fresnel term and factor this into the color. The Fresnel effect describes the relationship between reflectance and viewing angle, where surfaces +tend to be more reflective at grazing angles. + +![](img/final/refractive.PNG) + +#### Anti-Aliasing + +Anti-aliasing is a technique to remove jagged edges from images. The idea is to jitter the samples within a pixel, obtain the color +from casting rays in those directions, and then average the samples to get the final color. Since we are already taking many samples +per pixel (each sample is an "iteration"), we can get this for free by simply jittering the x and y pixel position by some random value between 0 and 1 +every time we generate a ray from the camera. This produces the "smoothed" appearance seen in the images below. + +| No Anti-Aliasing | Anti-Aliasing | +:-------------------------:|:-------------------------: +![](img/final/noantialiased-zoomed.PNG) | ![](img/final/antialiased-zoomed.PNG) + +#### Depth-of-field + +Depth-of-field is when objects that are within some focal distance appear in-focus whereas other objects appear out-of-focus. +To achieve this effect, we sample a point on a concentric disk with radius `r`. This disk represents the camera aperture. Once we have +this sample point, we add it to the ray origin and compute the focal distance as the distance between the camera `lookAt` point and +`eye`. To use depth-of-field, be sure to set `LENS_RADIUS` to a value greater than zero in the scene file. + +| No Depth-of-Field | Depth-of-Field (Lens Radius: 0.5) | +:-------------------------:|:-------------------------: +![](img/final/gamma.PNG) | ![](img/final/dof.PNG) + +#### Reinhard Tone Mapping and Gamma Correction + +Before writing to the output image, the Reinhard operator is applied to map the output color from HDR to sRGB. Additionally, +gamma correction is applied to control the brightness of the image. These can be toggled using the `CONVERT_TO_SRGB` macro in `utilities.h`. + +| No Tone Mapping or Gamma | Tone Mapping and Gamma | +:-------------------------:|:-------------------------: +![](img/final/nogamma.PNG) | ![](img/final/gamma.PNG) + +#### Mesh Loading + +This project relies on the `tinyobj` header file for loading meshes. In a scene file, the user must specify a mesh in the +following format: + +``` +OBJECT 0 +mesh dragon.obj +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 +``` + +The `mesh` keyword must be followed by an `.obj` file to load. In this implementation, the transformations in the file are ignored +since the transformations are already baked into the mesh being imported. Mesh loading involves iterating through all the faces in the mesh, +constructing a `Triangle` struct, and pushing this back to a vector of triangles that lives in the `Scene` class. Each mesh is `Geom` that stores +an offset into this array of triangles. + +

+ Stream Compaction Chart +

+

Two Dragons

+ +### Performance Improvements + +The following features are intended to improve total render time by reducing the total number of ray intersections +and re-ordering materials so that they are contiguous in memory before shading. + +#### Testing Methodology + +To measure performance, I used a `PerformanceTimer` class with methods `startGpuTimer()`, `endGpuTimer()`, +and `getGpuElapsedTimeForPreviousOperation()`. These methods are wrappers around calls to the CUDA runtime library. +For measuring individual kernels, I placed a call to `startGpuTimer` and `endGpuTimer` around the kernel and called `getGpuElapsedTime` after +the call to `endGpuTimer`. To measure the performance of one iteration, I summed the times for each kernel/thrust call within a single call to the +`pathtrace` function. I could have also placed a timer around the `pathtrace` call, but I wanted to obtain timing without memory I/O and setup. + +During performance testing, I noticed that the timing for the first iteration is usually an outlier within the data +(the value is either much higher or much lower than the rest of the iterations), +which might be caused due to the application startup/warmup being factored into the elapsed time. This number is factored into the averages below, meaning +that the averages might be slightly higher/lower than normal, but still capture the performance of each feature well. + +#### Acceleration Structures + +For this project, I wanted to learn more about path tracer performance and acceleration structures, so I chose to implement a Linear Bounding Volume Hierarchy (LBVH), which is described further +in [this paper](https://research.nvidia.com/sites/default/files/pubs/2012-06_Maximizing-Parallelism-in/karras2012hpg_paper.pdf). +After implementing this feature, I was interested to see the comparison between an LBVH and a BVH that uses the Midpoint split method +and the SAH split method, so I added those too. The BVH with both Midpoint and SAH splits is currently functional, but also suffers from a performance bug that may be caused by +a bug in the splitting code. Since the LBVH was my main focus, I didn't quite polish this aspect of the BVH (therefore it runs slowly for larger triangle counts), +but have still included it here in the discussion. + +In the future, I would also like to explore spatial-partitioning schemes such as an octree or kd-tree. + +##### Linear Bounding Volume Hierarchy + +The Linear Bounding Volume Hierarchy (LBVH) was first described in [this paper](https://luebke.us/publications/eg09.pdf) by Lauterbach, et al. (2009). The basic idea +is to order the primitives (in this case triangles) in a way that will place primitives that are close to each other in 3D space +close to each other in the tree structure. To do this, we sort the triangles by the centroid of their bounding boxes expressed relative to the "scene" bounding box. In other words, +we compute a normalized centroid: + +```glm::vec3 norm_centroid = (centroid - sceneAABB.min) / (sceneAABB.max - sceneAABB.min);``` + +Once we have the Morton codes, we sort them using `std::sort`. The paper mentions using parallel radix sort, which is a good option if we're doing construction on the GPU. Since the construction +is being done on the CPU, it suffices to use the C++ STL function. After sorting, we start building the tree. The method presented in Karras (2012) uses a numbering scheme to assign a range of Morton codes +to each tree node. This numbering scheme was developed so that construction could be parallelized, but I followed the same method incase I wish to do GPU construction later on. The most important part is the split +criteria - the nodes are split according to their highest differing bit in their Morton Codes. This diagram demonstrates this idea: + +

+ Stream Compaction Chart +

+

Linear Bounding Volume Hierarchy Split Based on Highest Differing Bit (credit: NVIDIA)

+ +After splitting the nodes, we construct the bounding boxes in a bottom-to-top fashion by first assigning boxes to the leaf nodes and then constructing +the parents' bounding boxes based off of those. This is to ensure less overlap between bounding boxes, but it is not perfect. + +To traverse the LBVH, we use an iterative traversal with a per-thread stack. For each node, we check whether the current ray intersects the children, and depending +on the results, we choose whether to keep traversing the left subtree, right subtree, or both. If we choose to traverse both, we move to the left child, and push the right +child onto the stack. This essentially allows us to search the entire left subtree first before moving to the right-hand side. One downside of the LBVH is that it is optimized for +fast construction, which isn't really necessary for this path tracer since the construction is only done once at startup. If this were a dynamic scene with moving objects, then it would have been more important +to have optimized construction. As a result, this can lead to less-optimal or unbalanced tree structures. + +##### Bounding Volume Hierarchy (Midpoint and SAH) + +Initially, I was only planning to implement the LBVH, but after getting extremely slow runtimes for it, +I decided to try out a BVH with Midpoint split. Turns out, the LBVH was working fine, I just had a minor bug in my AABB intersection +test! (see the bloopers below). After I fixed that, the LBVH outperformed the BVH. + +The first method I implemented was splitting by the midpoint of the triangles' centroids. This gave some performance improvement, but led to very +unbalanced trees. For the teapot, it placed close to ~2000 triangles in one leaf node. I believe there is another bug that's causing this imbalance, but in +general it is possible for Midpoint split to result in very unbalanced trees. Since the Midpoint split was inefficient, I gave the Surface Area Heuristic (SAH) +a try. In this method, we compute the cost of splitting along a particular axis at a triangle's centroid. This means we perform `3 * num_triangles` cost checks +to determine a split. The cost function is dependent on the number of triangles that would be placed in each child of the current node as well as +the surface area of those boxes. We give a higher probability of being hit by a ray to larger boxes. After implementing this heuristic, the trees were more balanced, +but the construction time had increased immensely because of the cost checks. The construction time was too long for the Stanford Bunny and Stanford Dragon, so I have omitted these from the +graphs. + +##### Bounding Box Culling + +A basic optimization I implemented was bounding box culling for loaded meshes. This can be toggled with `BB_CULLING` in `utilities.h`. +I calculated a bounding box for the entire mesh once loaded, and if the ray hits this box during intersection testing, it will check all its triangles. +If it misses, it will skip the entire mesh. Bounding box culling works best if the mesh takes up a smaller portion of the screen. If the mesh is very large, +the probability of hitting the bounding box and check all the triangles is higher. + +The follow meshes were used to analyze the performance of each acceleration structure: + +| Teapot | Bunny | Dragon +:-------------------------:|:-------------------------:|:-------------------------: +![](img/final/acceleration_teapot.PNG) | ![](img/final/acceleration_bunny.PNG) | ![](img/final/acceleration_dragon.PNG) + +Performance comparisons for each acceleration structure can be found in the table and chart below: + +

+ Stream Compaction Chart +

+

Table 1. Performance Comparison of Different Acceleration Structures

+ +

+ Stream Compaction Chart +

+

Figure 1. Performance Comparison of Different Acceleration Structures

+ +The LBVH performed the best out of all optimizations. Even for meshes with high triangle counts, it still ran much faster and did not +suffer from a large performance drop like the rest of the implementations. One cause of this is its balanced tree structure and ordering of +of the nodes (see heatmap below). Sibling nodes are always next to each other in memory. The performance bug in the BVH implementations is apparent from the table and chart, since the +runtimes are unusually long. + +

+ Stream Compaction Chart +

+

Stanford Dragon Heatmap

+ +The following performance tests were done using the scenes below. A closed version of the scene is provided for use during stream compaction analysis. + +| Pastel Room Open | Pastel Room Closed | +:-------------------------:|:-------------------------: +![](img/final/stream_compact_open_labels.PNG) | ![](img/final/stream_compact_closed.PNG) + +#### Path Termination with Stream Compaction + +Stream compaction helps remove terminated rays after each bounce. This ensures that we spend less time performing computations +for rays that will contribute no light energy to the final image. This graph shows the effect of performing stream compaction +within a single iteration for an open scene and a closed scene. Without stream compaction, the total number of ray paths would be +800,000 for this scene (resolution 1000x800). + +

+ Stream Compaction Chart +

+

Figure 2. Stream Compaction Performance, Unterminated Rays

+ +

+ Stream Compaction Chart +

+

Figure 3. Stream Compaction Performance, Time in Intersections and Shading

+ +From the charts, it is evident that stream compaction lowers the amount of time spent in the intersection and shading kernels +and reduces time wasted on terminated rays. The contrast between open and closed scenes is also apparent, where stream compaction removed +significantly more rays from the open scene than the closed scene. This is expected because the rays have an easier way to "escape" out of the scene, +whereas a closed room does not allow them to terminate as quickly since they can still bounce around for multiple depths. + +#### Russian Roulette Ray Termination + +Russian roulette is an optimization technique intended to terminate ray paths that contribute very little to the final result. This +involves a simple check of whether some random value `xi` is less than a value based on the `maxColorChannel` and terminating the path if so. +If it is not, we boost this path's contribution by dividing by `maxColorChannel` and continue iterating. This is to account for the contribution +of paths that would have contributed to the pixel color, but have terminated due to Russian roulette. + +

+ Stream Compaction Chart +

+

Figure 4. Russian Roulette Performance Impact

+ +Looking at this graph, it seems as if turning Russian Roulette off is faster. However, the numbers are very close and do not have +an extremely noticeable performance impact. One possible reason it could take slightly longer when enabled is that Russian Roulette adds +a branch condition in the shading kernel, which could lead to thread divergence. + +#### Caching First Bounce Intersections + +Since the result of the first intersection is deterministic, we can cache the first bounce intersections and save +them for future iterations. This will save us from performing one extra `computeIntersections` call per run. Note +that this feature cannot be used with anti-aliasing or depth-of-field since we jitter the camera rays, which may result +in different outcomes for the first bounce intersection. + +

+ Stream Compaction Chart +

+

Figure 5. Caching vs. No Caching Across Iterations (Total Elapsed Time)

+ +

+ Stream Compaction Chart +

+

Figure 6. Caching vs. No Caching For 10 Iterations With Varying Trace Depth

+ +Caching first bounce intersections had a very subtle effect, but still improved performance. Overall, it +decreased the amount of time taken during each iteration since we are reducing the amount of `computeIntersections` +calls by 1. + +#### Material Sorting + +Shading computations can take different amounts of time depending on the type of material. To further improve performance, +we can sort materials by type after intersections are computed so that the same materials are contiguous in memory. This ensures +that materials with similar computational complexity will be executed together. + +

+ Stream Compaction Chart +

+

Figure 7. Shading Kernel Execution Time, Sorting vs. No Sorting

+ +Based on this graph, material sorting did improve performance for this scene. I think this is slightly suprising because +when I tested simpler scenes, it led to a major performance decrease. I suspected that it was because there are only a +handful of materials in the scene, making the benefits of sorting unnoticeable. My assumption was that sorting will only improve performance +when there is a large number of different materials in the scene, but it was nice to see that re-ordering the materials had a positive impact +for this particular scene. + +#### Bloopers + +**Broken AABB** - Buggy AABB Intersection Test + +![](img/bloopers/cornell.2022-10-06_04-26-16z.5000samp.png) + +**Chrome Bunny** - Flat Normals +![](img/bloopers/cornell.2022-10-06_05-12-34z.5000samp.png) + +**Crazy Reflections** +![](img/bloopers/cornell.2022-09-24_switch_diffuse_spec_probability.png) + +#### References + +* [Tero Karras' Blog - Linear Bounding Volume Hierarchy](https://developer.nvidia.com/blog/thinking-parallel-part-ii-tree-traversal-gpu/) +* [Paper - Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d Trees](https://research.nvidia.com/sites/default/files/pubs/2012-06_Maximizing-Parallelism-in/karras2012hpg_paper.pdf) +* [Surface Area Heuristic - Jacco Bikker](https://jacco.ompf2.com/2022/04/18/how-to-build-a-bvh-part-2-faster-rays/) +* [Raycasting AABBs](https://gdbooks.gitbooks.io/3dcollisions/content/Chapter3/raycast_aabb.html) +* [Fast, Branchless Ray/Bounding Box Intersections](https://tavianator.com/2011/ray_box.html) +* Physically Based Rendering: From Theory to Implementation - Pharr, Jakob, Humphreys + +##### Third Party Libraries + +* tinyobj +* libmorton (included but not used) + +##### Models + +* Stanford Dragon - [The Stanford 3D Scanning Repository](http://graphics.stanford.edu/data/3Dscanrep/) +* Stanford Bunny and Teapot - [Alec Jacobson's Common 3D Test Models](https://github.com/alecjacobson/common-3d-test-models) diff --git a/img/bloopers/cornell.2022-09-22_diffuse_1_bounce.png b/img/bloopers/cornell.2022-09-22_diffuse_1_bounce.png new file mode 100644 index 0000000..42fff27 Binary files /dev/null and b/img/bloopers/cornell.2022-09-22_diffuse_1_bounce.png differ diff --git a/img/bloopers/cornell.2022-09-22_diffuse_2_bounces.png b/img/bloopers/cornell.2022-09-22_diffuse_2_bounces.png new file mode 100644 index 0000000..429e325 Binary files /dev/null and b/img/bloopers/cornell.2022-09-22_diffuse_2_bounces.png differ diff --git a/img/bloopers/cornell.2022-09-22_diffuse_3_bounces.png b/img/bloopers/cornell.2022-09-22_diffuse_3_bounces.png new file mode 100644 index 0000000..1889df0 Binary files /dev/null and b/img/bloopers/cornell.2022-09-22_diffuse_3_bounces.png differ diff --git a/img/bloopers/cornell.2022-09-22_init_program.png b/img/bloopers/cornell.2022-09-22_init_program.png new file mode 100644 index 0000000..6b5453d Binary files /dev/null and b/img/bloopers/cornell.2022-09-22_init_program.png differ diff --git a/img/bloopers/cornell.2022-09-23_diffuse_rngfix_2_bounces.png b/img/bloopers/cornell.2022-09-23_diffuse_rngfix_2_bounces.png new file mode 100644 index 0000000..629f93a Binary files /dev/null and b/img/bloopers/cornell.2022-09-23_diffuse_rngfix_2_bounces.png differ diff --git a/img/bloopers/cornell.2022-09-24_diffuse-5000samp-depth8.png b/img/bloopers/cornell.2022-09-24_diffuse-5000samp-depth8.png new file mode 100644 index 0000000..2a11cef Binary files /dev/null and b/img/bloopers/cornell.2022-09-24_diffuse-5000samp-depth8.png differ diff --git a/img/bloopers/cornell.2022-09-24_diffuse_rngfix_3_bounces.png b/img/bloopers/cornell.2022-09-24_diffuse_rngfix_3_bounces.png new file mode 100644 index 0000000..16aa68c Binary files /dev/null and b/img/bloopers/cornell.2022-09-24_diffuse_rngfix_3_bounces.png differ diff --git a/img/bloopers/cornell.2022-09-24_switch_diffuse_spec_probability.png b/img/bloopers/cornell.2022-09-24_switch_diffuse_spec_probability.png new file mode 100644 index 0000000..4c0a781 Binary files /dev/null and b/img/bloopers/cornell.2022-09-24_switch_diffuse_spec_probability.png differ diff --git a/img/bloopers/cornell.2022-10-06_04-26-16z.5000samp.png b/img/bloopers/cornell.2022-10-06_04-26-16z.5000samp.png new file mode 100644 index 0000000..4a961e2 Binary files /dev/null and b/img/bloopers/cornell.2022-10-06_04-26-16z.5000samp.png differ diff --git a/img/bloopers/cornell.2022-10-06_05-12-34z.5000samp.png b/img/bloopers/cornell.2022-10-06_05-12-34z.5000samp.png new file mode 100644 index 0000000..913fedf Binary files /dev/null and b/img/bloopers/cornell.2022-10-06_05-12-34z.5000samp.png differ diff --git a/img/cornell.2022-09-24_depth8_5000samp_nocompact.png b/img/cornell.2022-09-24_depth8_5000samp_nocompact.png new file mode 100644 index 0000000..3ee2e6b Binary files /dev/null and b/img/cornell.2022-09-24_depth8_5000samp_nocompact.png differ diff --git a/img/cornell.2022-09-24_depth8_5000samp_spec.png b/img/cornell.2022-09-24_depth8_5000samp_spec.png new file mode 100644 index 0000000..bc02c5a Binary files /dev/null and b/img/cornell.2022-09-24_depth8_5000samp_spec.png differ diff --git a/img/cornell.2022-09-24_depth8_5000samp_withcompact.png b/img/cornell.2022-09-24_depth8_5000samp_withcompact.png new file mode 100644 index 0000000..7f69499 Binary files /dev/null and b/img/cornell.2022-09-24_depth8_5000samp_withcompact.png differ diff --git a/img/cornell.2022-09-25_depth8_5000samp_mat_nocache.png b/img/cornell.2022-09-25_depth8_5000samp_mat_nocache.png new file mode 100644 index 0000000..ffd52b7 Binary files /dev/null and b/img/cornell.2022-09-25_depth8_5000samp_mat_nocache.png differ diff --git a/img/cornell.2022-09-25_depth8_5000samples_nomat_nocache.png b/img/cornell.2022-09-25_depth8_5000samples_nomat_nocache.png new file mode 100644 index 0000000..bc02c5a Binary files /dev/null and b/img/cornell.2022-09-25_depth8_5000samples_nomat_nocache.png differ diff --git a/img/cornell.2022-09-27_depth8_5000samp_antialiasing.png b/img/cornell.2022-09-27_depth8_5000samp_antialiasing.png new file mode 100644 index 0000000..c62c93f Binary files /dev/null and b/img/cornell.2022-09-27_depth8_5000samp_antialiasing.png differ diff --git a/img/cornell.2022-09-27_depth8_5000samp_dof.png b/img/cornell.2022-09-27_depth8_5000samp_dof.png new file mode 100644 index 0000000..c20c505 Binary files /dev/null and b/img/cornell.2022-09-27_depth8_5000samp_dof.png differ diff --git a/img/cornell.2022-09-27_depth8_5000samp_noantialiasing.png b/img/cornell.2022-09-27_depth8_5000samp_noantialiasing.png new file mode 100644 index 0000000..01050b7 Binary files /dev/null and b/img/cornell.2022-09-27_depth8_5000samp_noantialiasing.png differ diff --git a/img/cornell.2022-09-28_21-01-03z.5000samp.png b/img/cornell.2022-09-28_21-01-03z.5000samp.png new file mode 100644 index 0000000..43b8c67 Binary files /dev/null and b/img/cornell.2022-09-28_21-01-03z.5000samp.png differ diff --git a/img/cornell.2022-10-09_13-47-52z.5000samp.png b/img/cornell.2022-10-09_13-47-52z.5000samp.png new file mode 100644 index 0000000..f05d851 Binary files /dev/null and b/img/cornell.2022-10-09_13-47-52z.5000samp.png differ diff --git a/img/cornell.2022-10-10_03-25-47z.5000samp.png b/img/cornell.2022-10-10_03-25-47z.5000samp.png new file mode 100644 index 0000000..5fbbfe3 Binary files /dev/null and b/img/cornell.2022-10-10_03-25-47z.5000samp.png differ diff --git a/img/dragon_5000_lbvh.png b/img/dragon_5000_lbvh.png new file mode 100644 index 0000000..3e4eacd Binary files /dev/null and b/img/dragon_5000_lbvh.png differ diff --git a/img/final/acceleration_bunny.PNG b/img/final/acceleration_bunny.PNG new file mode 100644 index 0000000..73a2549 Binary files /dev/null and b/img/final/acceleration_bunny.PNG differ diff --git a/img/final/acceleration_dragon.PNG b/img/final/acceleration_dragon.PNG new file mode 100644 index 0000000..032cd98 Binary files /dev/null and b/img/final/acceleration_dragon.PNG differ diff --git a/img/final/acceleration_teapot.PNG b/img/final/acceleration_teapot.PNG new file mode 100644 index 0000000..63c5870 Binary files /dev/null and b/img/final/acceleration_teapot.PNG differ diff --git a/img/final/anti-aliasing-zoomed.PNG b/img/final/anti-aliasing-zoomed.PNG new file mode 100644 index 0000000..15106fd Binary files /dev/null and b/img/final/anti-aliasing-zoomed.PNG differ diff --git a/img/final/anti-aliasing.PNG b/img/final/anti-aliasing.PNG new file mode 100644 index 0000000..a40aa38 Binary files /dev/null and b/img/final/anti-aliasing.PNG differ diff --git a/img/final/antialiased-zoomed.PNG b/img/final/antialiased-zoomed.PNG new file mode 100644 index 0000000..b7c9218 Binary files /dev/null and b/img/final/antialiased-zoomed.PNG differ diff --git a/img/final/diffuse.PNG b/img/final/diffuse.PNG new file mode 100644 index 0000000..a8a78a0 Binary files /dev/null and b/img/final/diffuse.PNG differ diff --git a/img/final/dof.PNG b/img/final/dof.PNG new file mode 100644 index 0000000..e0515c9 Binary files /dev/null and b/img/final/dof.PNG differ diff --git a/img/final/gamma.PNG b/img/final/gamma.PNG new file mode 100644 index 0000000..4be40cf Binary files /dev/null and b/img/final/gamma.PNG differ diff --git a/img/final/heatmap.png b/img/final/heatmap.png new file mode 100644 index 0000000..1b7373c Binary files /dev/null and b/img/final/heatmap.png differ diff --git a/img/final/imperfect_specular.PNG b/img/final/imperfect_specular.PNG new file mode 100644 index 0000000..1e85e27 Binary files /dev/null and b/img/final/imperfect_specular.PNG differ diff --git a/img/final/mesh_loading.PNG b/img/final/mesh_loading.PNG new file mode 100644 index 0000000..5894799 Binary files /dev/null and b/img/final/mesh_loading.PNG differ diff --git a/img/final/no-anti-aliasing-zoomed.png b/img/final/no-anti-aliasing-zoomed.png new file mode 100644 index 0000000..ffa8f3c Binary files /dev/null and b/img/final/no-anti-aliasing-zoomed.png differ diff --git a/img/final/no-anti-aliasing.PNG b/img/final/no-anti-aliasing.PNG new file mode 100644 index 0000000..4de1ed6 Binary files /dev/null and b/img/final/no-anti-aliasing.PNG differ diff --git a/img/final/noantialiased-zoomed.PNG b/img/final/noantialiased-zoomed.PNG new file mode 100644 index 0000000..beff402 Binary files /dev/null and b/img/final/noantialiased-zoomed.PNG differ diff --git a/img/final/nogamma.PNG b/img/final/nogamma.PNG new file mode 100644 index 0000000..3b21e2c Binary files /dev/null and b/img/final/nogamma.PNG differ diff --git a/img/final/perfect_specular.PNG b/img/final/perfect_specular.PNG new file mode 100644 index 0000000..e4b487a Binary files /dev/null and b/img/final/perfect_specular.PNG differ diff --git a/img/final/refractive.PNG b/img/final/refractive.PNG new file mode 100644 index 0000000..c0e81e1 Binary files /dev/null and b/img/final/refractive.PNG differ diff --git a/img/final/stream_compact_closed.PNG b/img/final/stream_compact_closed.PNG new file mode 100644 index 0000000..a0413be Binary files /dev/null and b/img/final/stream_compact_closed.PNG differ diff --git a/img/final/stream_compact_open - Copy.PNG b/img/final/stream_compact_open - Copy.PNG new file mode 100644 index 0000000..9324756 Binary files /dev/null and b/img/final/stream_compact_open - Copy.PNG differ diff --git a/img/final/stream_compact_open.PNG b/img/final/stream_compact_open.PNG new file mode 100644 index 0000000..9324756 Binary files /dev/null and b/img/final/stream_compact_open.PNG differ diff --git a/img/final/stream_compact_open_labels.PNG b/img/final/stream_compact_open_labels.PNG new file mode 100644 index 0000000..c2866a8 Binary files /dev/null and b/img/final/stream_compact_open_labels.PNG differ diff --git a/img/final/violin.PNG b/img/final/violin.PNG new file mode 100644 index 0000000..1ab1f6c Binary files /dev/null and b/img/final/violin.PNG differ diff --git a/img/glass_ball.2022-10-09_17-19-17z.5000samp.png b/img/glass_ball.2022-10-09_17-19-17z.5000samp.png new file mode 100644 index 0000000..4d8fb62 Binary files /dev/null and b/img/glass_ball.2022-10-09_17-19-17z.5000samp.png differ diff --git a/img/graphs/acceleration_structures.png b/img/graphs/acceleration_structures.png new file mode 100644 index 0000000..8bd1a7b Binary files /dev/null and b/img/graphs/acceleration_structures.png differ diff --git a/img/graphs/caching_bounces.png b/img/graphs/caching_bounces.png new file mode 100644 index 0000000..e3c8b57 Binary files /dev/null and b/img/graphs/caching_bounces.png differ diff --git a/img/graphs/caching_elapsed_time.png b/img/graphs/caching_elapsed_time.png new file mode 100644 index 0000000..2871e1e Binary files /dev/null and b/img/graphs/caching_elapsed_time.png differ diff --git a/img/graphs/lbvh.png b/img/graphs/lbvh.png new file mode 100644 index 0000000..58501aa Binary files /dev/null and b/img/graphs/lbvh.png differ diff --git a/img/graphs/material_sorting.png b/img/graphs/material_sorting.png new file mode 100644 index 0000000..617adbb Binary files /dev/null and b/img/graphs/material_sorting.png differ diff --git a/img/graphs/russian_roulette.png b/img/graphs/russian_roulette.png new file mode 100644 index 0000000..69327c7 Binary files /dev/null and b/img/graphs/russian_roulette.png differ diff --git a/img/graphs/stream_compact_kernels.png b/img/graphs/stream_compact_kernels.png new file mode 100644 index 0000000..89497ad Binary files /dev/null and b/img/graphs/stream_compact_kernels.png differ diff --git a/img/graphs/stream_compact_rays.png b/img/graphs/stream_compact_rays.png new file mode 100644 index 0000000..df3cf38 Binary files /dev/null and b/img/graphs/stream_compact_rays.png differ diff --git a/img/graphs/table.png b/img/graphs/table.png new file mode 100644 index 0000000..3d1871f Binary files /dev/null and b/img/graphs/table.png differ diff --git a/img/mesh.2022-10-10_00-39-44z.5000samp.png b/img/mesh.2022-10-10_00-39-44z.5000samp.png new file mode 100644 index 0000000..6781e4f Binary files /dev/null and b/img/mesh.2022-10-10_00-39-44z.5000samp.png differ diff --git a/img/performance_testing.2022-10-11_06-11-40z.10samp.png b/img/performance_testing.2022-10-11_06-11-40z.10samp.png new file mode 100644 index 0000000..cd60668 Binary files /dev/null and b/img/performance_testing.2022-10-11_06-11-40z.10samp.png differ diff --git a/scenes/bunny-tree.txt b/scenes/bunny-tree.txt new file mode 100644 index 0000000..fc80004 --- /dev/null +++ b/scenes/bunny-tree.txt @@ -0,0 +1,159 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 40 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse aqua +MATERIAL 3 +RGB 0.4 0.9 0.6 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse grey +MATERIAL 4 +RGB 0.6 0.6 0.6 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse blue +MATERIAL 5 +RGB 0.5 0.5 0.9 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular pink +MATERIAL 6 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.9 0.6 0.9 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Transmissive orange +MATERIAL 7 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Imperfect specular green +MATERIAL 8 +RGB 0.6 1.0 0.5 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 0 +REFRIOR 1.55 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 35 +ITERATIONS 3 +DEPTH 8 +FILE performance_testing +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light middle +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 15 .01 10 + +// Ceiling +OBJECT 2 +cube +material 2 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 20 10 + +// Back wall +OBJECT 3 +cube +material 4 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 15 + +// Left wall +OBJECT 4 +cube +material 3 +TRANS -7 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 7 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Mesh +OBJECT 6 +mesh dragon.obj +material 8 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 diff --git a/scenes/cornell.txt b/scenes/cornell.txt index 83ff820..8f0954f 100644 --- a/scenes/cornell.txt +++ b/scenes/cornell.txt @@ -6,7 +6,7 @@ SPECRGB 0 0 0 REFL 0 REFR 0 REFRIOR 0 -EMITTANCE 5 +EMITTANCE 10 // Diffuse white MATERIAL 1 @@ -40,9 +40,9 @@ EMITTANCE 0 // Specular white MATERIAL 4 -RGB .98 .98 .98 +RGB 0 0 0 SPECEX 0 -SPECRGB .98 .98 .98 +SPECRGB 0 .98 .98 REFL 1 REFR 0 REFRIOR 0 @@ -51,11 +51,13 @@ EMITTANCE 0 // Camera CAMERA RES 800 800 -FOVY 45 +FOVY 19.5 ITERATIONS 5000 DEPTH 8 FILE cornell -EYE 0.0 5 10.5 +LENS_RADIUS 0.5 +FOCAL_DIST 29.5 +EYE 0.0 5 15.5 LOOKAT 0 5 0 UP 0 1 0 diff --git a/scenes/dragon.txt b/scenes/dragon.txt new file mode 100644 index 0000000..27d7911 --- /dev/null +++ b/scenes/dragon.txt @@ -0,0 +1,119 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 40 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB 0.13 0.13 0.13 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 35 +ITERATIONS 5000 +DEPTH 5 +FILE mesh_cornell +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 20 .01 10 + +// Ceiling +OBJECT 2 +cube +material 2 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 20 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 20 + +// Left wall +OBJECT 4 +cube +material 3 +TRANS -10 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 10 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Mesh +OBJECT 6 +mesh dragon.obj +material 4 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 diff --git a/scenes/glass_ball.txt b/scenes/glass_ball.txt new file mode 100644 index 0000000..4ae9e96 --- /dev/null +++ b/scenes/glass_ball.txt @@ -0,0 +1,119 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 40 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB 0 0 1 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 19.5 +ITERATIONS 5000 +DEPTH 5 +FILE glass_ball +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 15.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS 0 4 0 +ROTAT 0 0 0 +SCALE 6 6 6 diff --git a/scenes/materials.txt b/scenes/materials.txt new file mode 100644 index 0000000..fd58acc --- /dev/null +++ b/scenes/materials.txt @@ -0,0 +1,167 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 20 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse aqua +MATERIAL 3 +RGB 0.4 0.9 0.6 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse grey +MATERIAL 4 +RGB 0.4 0.4 0.4 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse purple +MATERIAL 5 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.7 0.3 0.9 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Specular pink +MATERIAL 6 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.9 0.6 0.9 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Transmissive orange +MATERIAL 7 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Imperfect specular green +MATERIAL 8 +RGB 0.6 1.0 0.5 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 0 +REFRIOR 1.55 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 35 +ITERATIONS 5000 +DEPTH 5 +FILE performance_testing +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light middle +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 40 .01 40 + +// Ceiling +OBJECT 2 +cube +material 4 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 40 40 + +// Back wall +OBJECT 3 +cube +material 4 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 20 40 + +// Left wall +OBJECT 4 +cube +material 3 +TRANS -20 5 0 +ROTAT 0 0 0 +SCALE .01 10 40 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 20 5 0 +ROTAT 0 0 0 +SCALE .01 10 40 + +// Mesh +OBJECT 6 +mesh violin.obj +material 5 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Sphere +OBJECT 7 +sphere +material 5 +TRANS 1 1.25 1.5 +ROTAT 0 0 0 +SCALE 2.5 2.5 2.5 diff --git a/scenes/mesh.txt b/scenes/mesh.txt new file mode 100644 index 0000000..9bdea1c --- /dev/null +++ b/scenes/mesh.txt @@ -0,0 +1,30 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE mesh +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + +// Sphere +OBJECT 0 +mesh dragons.obj +material 0 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 diff --git a/scenes/mesh_cornell.txt b/scenes/mesh_cornell.txt new file mode 100644 index 0000000..c06a0c7 --- /dev/null +++ b/scenes/mesh_cornell.txt @@ -0,0 +1,119 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 20 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB 0 0.13 0.84 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 5 +FILE mesh_cornell +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Mesh +OBJECT 6 +mesh dragons.obj +material 4 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 diff --git a/scenes/performance-testing.txt b/scenes/performance-testing.txt new file mode 100644 index 0000000..37adc1d --- /dev/null +++ b/scenes/performance-testing.txt @@ -0,0 +1,199 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 40 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse aqua +MATERIAL 3 +RGB 0.4 0.9 0.6 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse grey +MATERIAL 4 +RGB 0.6 0.6 0.6 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse blue +MATERIAL 5 +RGB 0.5 0.5 0.9 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular pink +MATERIAL 6 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.9 0.6 0.9 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Transmissive orange +MATERIAL 7 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Imperfect specular green +MATERIAL 8 +RGB 0.6 1.0 0.5 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 0 +REFRIOR 1.55 +EMITTANCE 0 + +// Camera +CAMERA +RES 1000 800 +FOVY 35 +ITERATIONS 10 +DEPTH 8 +FILE performance_testing +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light middle +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Ceiling light left +OBJECT 1 +cube +material 0 +TRANS -5 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Ceiling light right +OBJECT 2 +cube +material 0 +TRANS 5 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 3 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 20 .01 40 + +// Ceiling +OBJECT 4 +cube +material 2 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 20 40 + +// Back wall +OBJECT 5 +cube +material 4 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 20 + +// Left wall +OBJECT 6 +cube +material 3 +TRANS -10 5 0 +ROTAT 0 0 0 +SCALE .01 10 40 + +// Right wall +OBJECT 7 +cube +material 3 +TRANS 10 5 0 +ROTAT 0 0 0 +SCALE .01 10 40 + +// Mesh +OBJECT 8 +mesh bunny-performance.obj +material 8 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Transmissive sphere +OBJECT 9 +sphere +material 7 +TRANS 1 1.25 1 +ROTAT 0 0 0 +SCALE 2.5 2.5 2.5 + +// Reflective sphere +OBJECT 10 +sphere +material 6 +TRANS 4 3 -1 +ROTAT 0 0 0 +SCALE 4 4 4 + +// Pedestal +OBJECT 11 +cube +material 4 +TRANS 4 0 -1 +ROTAT 0 0 0 +SCALE 4 2 4 diff --git a/scenes/sphere.txt b/scenes/sphere.txt index a74b545..89a74b6 100644 --- a/scenes/sphere.txt +++ b/scenes/sphere.txt @@ -15,6 +15,8 @@ FOVY 45 ITERATIONS 5000 DEPTH 8 FILE sphere +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 EYE 0.0 5 10.5 LOOKAT 0 5 0 UP 0 1 0 diff --git a/scenes/violin.txt b/scenes/violin.txt new file mode 100644 index 0000000..6dba462 --- /dev/null +++ b/scenes/violin.txt @@ -0,0 +1,173 @@ +// Emissive material (light) +MATERIAL 0 +RGB 0.8 0.6 0.3 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 100 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse rust +MATERIAL 3 +RGB 0.01 0 0 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse grey +MATERIAL 4 +RGB 0.8 0.8 0.8 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Transmissive white +MATERIAL 5 +RGB 0 0 0 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Reflective white +MATERIAL 5 +RGB 0 0 0 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse purple +MATERIAL 6 +RGB 0 0 0.05 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 1000 800 +FOVY 35 +ITERATIONS 5000 +DEPTH 5 +FILE performance_testing +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light middle +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 2 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 40 .01 40 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 40 40 + +// Mesh +OBJECT 3 +mesh violin.obj +material 3 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Paper 1 +OBJECT 4 +cube +material 4 +TRANS 0 0 4 +ROTAT 0 65 0 +SCALE 3 .07 5 + +// Paper 2 +OBJECT 5 +cube +material 1 +TRANS 2.3 0.01 2.1 +ROTAT 0 110 0 +SCALE 3 .07 5 + +// Pedestal Left +OBJECT 6 +cube +material 6 +TRANS -3 0.4 4.44 +ROTAT 0 45 0 +SCALE 3 .7 2.8 + +// Transmissive sphere +OBJECT 7 +sphere +material 5 +TRANS -3 2.1 4.5 +ROTAT 0 0 0 +SCALE 2 2 2 + +// Pedestal Right +OBJECT 8 +cube +material 6 +TRANS -3 0.4 -6 +ROTAT 0 118 0 +SCALE 4.6 1.2 4.7 + +// Reflective sphere +OBJECT 9 +sphere +material 5 +TRANS -2.7 3 -5.3 +ROTAT 0 0 0 +SCALE 4 4 4 \ No newline at end of file diff --git a/scenes/violin2.txt b/scenes/violin2.txt new file mode 100644 index 0000000..fd58acc --- /dev/null +++ b/scenes/violin2.txt @@ -0,0 +1,167 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 20 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse aqua +MATERIAL 3 +RGB 0.4 0.9 0.6 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse grey +MATERIAL 4 +RGB 0.4 0.4 0.4 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse purple +MATERIAL 5 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.7 0.3 0.9 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Specular pink +MATERIAL 6 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.9 0.6 0.9 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Transmissive orange +MATERIAL 7 +RGB 0 0 0 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 1 +REFRIOR 1.55 +EMITTANCE 0 + +// Imperfect specular green +MATERIAL 8 +RGB 0.6 1.0 0.5 +SPECEX 0 +SPECRGB 0.98 0.98 0.98 +REFL 1 +REFR 0 +REFRIOR 1.55 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 35 +ITERATIONS 5000 +DEPTH 5 +FILE performance_testing +LENS_RADIUS 0.0 +FOCAL_DIST 29.5 +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + + +// Ceiling light middle +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 40 .01 40 + +// Ceiling +OBJECT 2 +cube +material 4 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 40 40 + +// Back wall +OBJECT 3 +cube +material 4 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 20 40 + +// Left wall +OBJECT 4 +cube +material 3 +TRANS -20 5 0 +ROTAT 0 0 0 +SCALE .01 10 40 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 20 5 0 +ROTAT 0 0 0 +SCALE .01 10 40 + +// Mesh +OBJECT 6 +mesh violin.obj +material 5 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Sphere +OBJECT 7 +sphere +material 5 +TRANS 1 1.25 1.5 +ROTAT 0 0 0 +SCALE 2.5 2.5 2.5 diff --git a/src/interactions.h b/src/interactions.h index f969e45..556a77f 100644 --- a/src/interactions.h +++ b/src/interactions.h @@ -41,6 +41,115 @@ glm::vec3 calculateRandomDirectionInHemisphere( + sin(around) * over * perpendicularDirection2; } +__host__ __device__ +glm::vec3 calculateFresnel(const Material &m, float cosTheta) { + float etaI = 1.0; + float etaT = m.indexOfRefraction; + float cosThetaI = glm::clamp(cosThetaI, -1.f, 1.f); + + // Check if entering or leaving medium, and swap indices of refraction if necessary + bool leaving = (cosThetaI < 0.f); + if (leaving) { + float tmp = etaI; + etaI = etaT; + etaT = tmp; + cosThetaI = glm::abs(cosThetaI); + } + float eta = etaI / etaT; + + // Snell's Law + float sinThetaI = glm::sqrt(max(0.0, 1.0 - cosThetaI * cosThetaI)); + float sinThetaT = eta * sinThetaI; + + // Total internal reflection + if (sinThetaT >= 1.0) return glm::vec3(1.0, 1.0, 1.0); + + // Compute Fresnel reflectance (see equation in PBRT 8.2.1) + float cosThetaT = glm::sqrt(glm::max(0.0, 1.0 - sinThetaT * sinThetaT)); + float rParallel = ((etaT * cosThetaI) - (etaI * cosThetaT)) / + ((etaT * cosThetaI) + (etaI * cosThetaT)); + float rPerp = ((etaI * cosThetaI) - (etaT * cosThetaT)) / + ((etaI * cosThetaI) + (etaT * cosThetaT)); + + return glm::vec3((rParallel * rParallel + rPerp * rPerp) * 0.5f); +} + +__host__ __device__ +glm::vec3 sample_diffuse( + glm::vec3 &normal, const Material& m, thrust::default_random_engine& rng, glm::vec3 wo, glm::vec3& wi) +{ + wi = calculateRandomDirectionInHemisphere(normal, rng); + return m.color; +} + +__host__ __device__ +glm::vec3 sample_specular_refl( + glm::vec3 &normal, const Material& m, thrust::default_random_engine& rng, glm::vec3 wo, glm::vec3& wi) +{ + wi = glm::reflect(wo, normal); + return m.specular.color; +} + +__host__ __device__ +glm::vec3 sample_specular_trans( + glm::vec3 &normal, const Material& m, thrust::default_random_engine& rng, glm::vec3 wo, glm::vec3& wi) +{ + float entering = (glm::dot(wo, normal) < 0); + float eta = (entering) ? 1.f / m.indexOfRefraction : m.indexOfRefraction; + + // Flip normal to be in same hemisphere as wo + bool flip = (glm::dot(wo, normal) > 0.f); + normal = (flip) ? -normal : normal; + wi = glm::refract(wo, normal, eta); + + // Total internal reflection + if (glm::length(wi) < 0) { + wi = glm::reflect(wo, normal); + return glm::vec3(0.0, 0.0, 0.0); + } + return m.specular.color; +} + +__host__ __device__ +glm::vec3 sample_glass( + glm::vec3& normal, const Material& m, thrust::default_random_engine& rng, glm::vec3 wo, glm::vec3& wi) +{ + thrust::uniform_real_distribution u01(0, 1); + bool reflect = u01(rng) < 0.5; + + float cosTheta = glm::dot(wo, normal); + glm::vec3 Fr = calculateFresnel(m, cosTheta); + glm::vec3 f = glm::vec3(0.0, 0.0, 0.0); + if (reflect) { + f = sample_specular_refl(normal, m, rng, wo, wi); + return 2.f * Fr * f; + } + else { + f = sample_specular_trans(normal, m, rng, wo, wi); + return 2.f * (glm::vec3(1.f, 1.f, 1.f) - Fr) * f; + } +} + +__host__ __device__ +glm::vec3 sample_plastic( + glm::vec3& normal, const Material& m, thrust::default_random_engine& rng, glm::vec3 wo, glm::vec3& wi) +{ + thrust::uniform_real_distribution u01(0, 1); + bool reflect = u01(rng) < 0.5; + + float cosTheta = glm::dot(wo, normal); + glm::vec3 Fr = calculateFresnel(m, cosTheta); + glm::vec3 f = glm::vec3(0.0, 0.0, 0.0); + if (reflect) { + f = sample_specular_refl(normal, m, rng, wo, wi); + return 2.f * Fr * f; + } + else { + f = sample_diffuse(normal, m, rng, wo, wi); + return 2.f * (glm::vec3(1.f, 1.f, 1.f) - Fr) * f; + } +} + /** * Scatter a ray with some probabilities according to the material properties. * For example, a diffuse surface scatters in a cosine-weighted hemisphere. @@ -76,4 +185,32 @@ void scatterRay( // TODO: implement this. // A basic implementation of pure-diffuse shading will just call the // calculateRandomDirectionInHemisphere defined above. + if (pathSegment.remainingBounces <= 0) { + return; + } + thrust::uniform_real_distribution u01(0, 1); + float xi = u01(rng); + + glm::vec3 wi = glm::vec3(0.0, 0.0, 0.0); + glm::vec3 f = glm::vec3(0.0, 0.0, 0.0); + if (m.hasReflective && m.hasRefractive) { + f = sample_glass(normal, m, rng, pathSegment.ray.direction, wi); + } + else if (m.hasReflective && glm::length(m.color) > 0) { + f = sample_plastic(normal, m, rng, pathSegment.ray.direction, wi); + } + else if (m.hasReflective) { + f = sample_specular_refl(normal, m, rng, pathSegment.ray.direction, wi); + } + else if (m.hasRefractive) { + f = sample_specular_trans(normal, m, rng, pathSegment.ray.direction, wi); + } + else { + f = sample_diffuse(normal, m, rng, pathSegment.ray.direction, wi); + } + pathSegment.throughput *= f; + pathSegment.ray.direction = wi; + pathSegment.ray.invDirection = glm::vec3(1.0, 1.0, 1.0) / pathSegment.ray.direction; + pathSegment.ray.origin = intersect + 0.01f * pathSegment.ray.direction; + pathSegment.remainingBounces--; } diff --git a/src/intersections.h b/src/intersections.h index b150407..20d10c0 100644 --- a/src/intersections.h +++ b/src/intersections.h @@ -89,6 +89,60 @@ __host__ __device__ float boxIntersectionTest(Geom box, Ray r, return -1; } +//__host__ __device__ bool aabbIntersectionTest(AABB aabb, Ray r) { +// glm::vec3 invR = glm::vec3(1.0, 1.0, 1.0) / r.direction; +// +// float x1 = (aabb.min.x - r.origin.x) * invR.x; +// float x2 = (aabb.max.x - r.origin.x) * invR.x; +// +// float tmin = glm::min(x1, x2); +// float tmax = glm::max(x1, x2); +// +// float y1 = (aabb.min.y - r.origin.y) * invR.y; +// float y2 = (aabb.max.y - r.origin.y) * invR.y; +// +// tmin = glm::min(tmin, glm::min(y1, y2)); +// tmax = glm::max(tmax, glm::max(y1, y2)); +// +// float z1 = (aabb.min.z - r.origin.z) * invR.z; +// float z2 = (aabb.max.z - r.origin.z) * invR.z; +// +// tmin = glm::min(tmin, glm::min(z1, z2)); +// tmax = glm::max(tmax, glm::max(z1, z2)); +// +// return tmin <= tmax && tmax >= 0.0; +//} + +// Based off of "Fast, Branchless Ray/Bounding Box Intersections" by Tavian Barnes +__host__ __device__ bool aabbIntersectionTest(AABB aabb, Ray &r, float& t) { + glm::vec3 invR = r.invDirection; + + float x1 = (aabb.min.x - r.origin.x) * invR.x; + float x2 = (aabb.max.x - r.origin.x) * invR.x; + + float tmin = glm::min(x1, x2); + float tmax = glm::max(x1, x2); + + float y1 = (aabb.min.y - r.origin.y) * invR.y; + float y2 = (aabb.max.y - r.origin.y) * invR.y; + + tmin = glm::max(tmin, glm::min(y1, y2)); + tmax = glm::min(tmax, glm::max(y1, y2)); + + float z1 = (aabb.min.z - r.origin.z) * invR.z; + float z2 = (aabb.max.z - r.origin.z) * invR.z; + + tmin = glm::max(tmin, glm::min(z1, z2)); + tmax = glm::min(tmax, glm::max(z1, z2)); + + bool intersect = tmin <= tmax && tmax >= 0; + t = (intersect) ? tmin : -1.0; + if (t < 0.f) t = tmax; + + r.intersectionCount++; + return intersect; +} + // CHECKITOUT /** * Test intersection between a ray and a transformed sphere. Untransformed, @@ -137,8 +191,242 @@ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r, intersectionPoint = multiplyMV(sphere.transform, glm::vec4(objspaceIntersection, 1.f)); normal = glm::normalize(multiplyMV(sphere.invTranspose, glm::vec4(objspaceIntersection, 0.f))); if (!outside) { - normal = -normal; + //normal = -normal; --> commented out because this impacts refraction } - return glm::length(r.origin - intersectionPoint); } + +/** + * Test intersection between a ray and a transformed triangle. + * + * @param intersectionPoint Output parameter for point of intersection. + * @param normal Output parameter for surface normal. + * @param outside Output param for whether the ray came from outside. + * @return Ray parameter `t` value. -1 if no intersection. + */ +__host__ __device__ float triangleIntersectionTest(Triangle tri, Ray &r, + glm::vec3& barycenter) { + + bool intersect = glm::intersectRayTriangle(r.origin, r.direction, + tri.verts[0], tri.verts[1], tri.verts[2], + barycenter); + r.intersectionCount++; + if (!intersect) return -1.f; + + return barycenter.z; +} + +/** + * Test intersection between a ray and a triangle mesh. + * + * @param intersectionPoint Output parameter for point of intersection. + * @param normal Output parameter for surface normal. + * @param outside Output param for whether the ray came from outside. + * @return Ray parameter `t` value. -1 if no intersection. + */ +__host__ __device__ float meshIntersectionTest(Geom mesh, Ray &r, + const Triangle* tris, glm::vec3& intersectionPoint, glm::vec3& normal, bool& outside) { + +#if BB_CULLING + // Test ray against mesh AABB + float t = -1.0; + bool intersectAABB = aabbIntersectionTest(mesh.aabb, r, t); + if (!intersectAABB) return -1.f; +#endif + + // If bounding box is intersected, then check for intersection with all triangles + Triangle min_tri; + glm::vec3 barycenter, min_barycenter; + float min_t = INFINITY; + for (int i = mesh.startIdx; i < mesh.startIdx + mesh.triangleCount; i++) + { + float t = triangleIntersectionTest(tris[i], r, barycenter); + if (t < min_t && t > 0.f) + { + min_t = t; + min_barycenter = barycenter; + min_tri = tris[i]; + } + } + + // Find intersection point and normal + float u = min_barycenter.x; + float v = min_barycenter.y; + float w = 1.f - u - v; + intersectionPoint = u * min_tri.verts[0] + v * min_tri.verts[1] + w * min_tri.verts[2]; + normal = u * min_tri.norms[0] + v * min_tri.norms[1] + w * min_tri.norms[2]; + + return min_t; +} + +__host__ __device__ bool devIsLeaf(const LBVHNode* node) { + return node->left == 0xFFFFFFFF && node->right == 0xFFFFFFFF; +} + +__host__ __device__ void lbvhIntersectTriangle(const Triangle* tris, Ray &r, int objectId, + Triangle& min_tri, glm::vec3& min_barycenter, float& min_t) { + + glm::vec3 barycenter; + float t = triangleIntersectionTest(tris[objectId], r, barycenter); + if (t < min_t && t > 0.f) + { + min_t = t; + min_barycenter = barycenter; + min_tri = tris[objectId]; + } +} + +/** + * Test intersection between a ray and an LBVH. + * + * @param intersectionPoint Output parameter for point of intersection. + * @param normal Output parameter for surface normal. + * @param outside Output param for whether the ray came from outside. + * @return Ray parameter `t` value. -1 if no intersection. + */ +__host__ __device__ float lbvhIntersectionTest(const LBVHNode* nodes, const Triangle* tris, Ray &r, int triangleCount, + glm::vec3& intersectionPoint, glm::vec3& normal, bool& outside) { + + float stack[16]; + int stackPtr = -1; + + Triangle min_tri; + glm::vec3 min_barycenter; + float min_t = INFINITY; + + // Push root node + stack[++stackPtr] = triangleCount; + int currNodeIdx = stack[stackPtr]; + while (stackPtr >= 0) + { + // Check intersection with left and right children + int leftChild = nodes[currNodeIdx].left; + int rightChild = nodes[currNodeIdx].right; + const LBVHNode* left = &nodes[leftChild]; + const LBVHNode* right = &nodes[rightChild]; + + float t; + bool intersectLeft = aabbIntersectionTest(left->aabb, r, t); + bool intersectRight = aabbIntersectionTest(right->aabb, r, t); + + // If intersection found, and they are leaf nodes, check for triangle intersections + if (intersectLeft && devIsLeaf(left)) { + lbvhIntersectTriangle(tris, r, leftChild, min_tri, min_barycenter, min_t); + } + if (intersectRight && devIsLeaf(right)) { + lbvhIntersectTriangle(tris, r, rightChild, min_tri, min_barycenter, min_t); + } + + // If internal nodes, keep traversing + bool traverseLeftSubtree = (intersectLeft && !devIsLeaf(left)); + bool traverseRightSubtree = (intersectRight && !devIsLeaf(right)); + + if (!traverseLeftSubtree && !traverseRightSubtree) { + // Pop node from stack + currNodeIdx = stack[stackPtr--]; + } + else { + currNodeIdx = (traverseLeftSubtree) ? leftChild : rightChild; + if (traverseLeftSubtree && traverseRightSubtree) { + // Push right child onto stack + stack[++stackPtr] = rightChild; + } + } + } + + // Find intersection point and normal + float u = min_barycenter.x; + float v = min_barycenter.y; + float w = 1.f - u - v; + intersectionPoint = u * min_tri.verts[0] + v * min_tri.verts[1] + w * min_tri.verts[2]; + normal = u * min_tri.norms[0] + v * min_tri.norms[1] + w * min_tri.norms[2]; + + return min_t; +} + +__host__ __device__ bool devBvhIsLeaf(const BVHNode* node) { + return (node->numTris > 0); +} + +__host__ __device__ void bvhIntersectTriangles(const Triangle* tris, Ray &r, int start, int numTris, + Triangle& min_tri, glm::vec3& min_barycenter, float& min_t) { + + for (int i = start; i < start + numTris; ++i) { + glm::vec3 barycenter; + float t = triangleIntersectionTest(tris[i], r, barycenter); + if (t < min_t && t > 0.f) + { + min_t = t; + min_barycenter = barycenter; + min_tri = tris[i]; + } + } +} + +/** + * Test intersection between a ray and a BVH. + * + * @param intersectionPoint Output parameter for point of intersection. + * @param normal Output parameter for surface normal. + * @param outside Output param for whether the ray came from outside. + * @return Ray parameter `t` value. -1 if no intersection. + */ +__host__ __device__ float bvhIntersectionTest(const BVHNode* nodes, const Triangle* tris, Ray &r, int triangleCount, + glm::vec3& intersectionPoint, glm::vec3& normal, bool& outside) { + + float stack[20]; + int stackPtr = -1; + + Triangle min_tri; + glm::vec3 min_barycenter; + float min_t = INFINITY; + + // Push root node + stack[++stackPtr] = 0; + int currNodeIdx = stack[stackPtr]; + while (stackPtr >= 0) + { + // Check intersection with left and right children + int leftChild = nodes[currNodeIdx].left; + int rightChild = nodes[currNodeIdx].right; + const BVHNode* left = &nodes[leftChild]; + const BVHNode* right = &nodes[rightChild]; + + float t; + bool intersectLeft = aabbIntersectionTest(left->aabb, r, t); + bool intersectRight = aabbIntersectionTest(right->aabb, r, t); + + // If intersection found, and they are leaf nodes, check for triangle intersections + if (intersectLeft && devBvhIsLeaf(left)) { + bvhIntersectTriangles(tris, r, left->firstTri, left->numTris, min_tri, min_barycenter, min_t); + } + if (intersectRight && devBvhIsLeaf(right)) { + bvhIntersectTriangles(tris, r, right->firstTri, right->numTris, min_tri, min_barycenter, min_t); + } + + // If internal nodes, keep traversing + bool traverseLeftSubtree = (intersectLeft && !devBvhIsLeaf(left)); + bool traverseRightSubtree = (intersectRight && !devBvhIsLeaf(right)); + + if (!traverseLeftSubtree && !traverseRightSubtree) { + // Pop node from stack + currNodeIdx = stack[stackPtr--]; + } + else { + currNodeIdx = (traverseLeftSubtree) ? leftChild : rightChild; + if (traverseLeftSubtree && traverseRightSubtree) { + // Push right child onto stack + stack[++stackPtr] = rightChild; + } + } + } + + // Find intersection point and normal + float u = min_barycenter.x; + float v = min_barycenter.y; + float w = 1.f - u - v; + intersectionPoint = u * min_tri.verts[0] + v * min_tri.verts[1] + w * min_tri.verts[2]; + normal = u * min_tri.norms[0] + v * min_tri.norms[1] + w * min_tri.norms[2]; + + return min_t; +} diff --git a/src/lbvh.cu b/src/lbvh.cu new file mode 100644 index 0000000..59f2029 --- /dev/null +++ b/src/lbvh.cu @@ -0,0 +1,374 @@ +#include "lbvh.h" + +/// LBVH FUNCTIONS /// + +// This optimized LBVH is based on the paper "Maximizing Parallelism in the Construction of BVHs, +// Octrees, and k-d Trees" by Tero Karras of NVIDIA Research + +bool morton_sort(const MortonCode& a, const MortonCode& b) { + return a.code < b.code; +} + +bool isLeaf(const LBVHNode* node) { + return node->left == 0xFFFFFFFF && node->right == 0xFFFFFFFF; +} + +AABB Union(AABB left, AABB right) { + glm::vec3 umin = glm::min(left.min, right.min); + glm::vec3 umax = glm::max(left.max, right.max); + return AABB{ umin, umax }; +} + +// Expand 10-bit integer into 30-bit integer +unsigned int expand(unsigned int n) +{ + n = (n | (n << 16)) & 0b00000011000000000000000011111111; + n = (n | (n << 8)) & 0b00000011000000001111000000001111; + n = (n | (n << 4)) & 0b00000011000011000011000011000011; + n = (n | (n << 2)) & 0b00001001001001001001001001001001; + return n; +} + +// Based on PBRT 4.3.3. and Tero Karras version at https://developer.nvidia.com/blog/thinking-parallel-part-iii-tree-construction-gpu/ +unsigned int mortonCode3D(const glm::vec3& centroid) { + // Convert centroid coordinates to value between 0 and 1024 + float x = min(max(centroid.x * 1024.0f, 0.0f), 1023.0f); + float y = min(max(centroid.y * 1024.0f, 0.0f), 1023.0f); + float z = min(max(centroid.z * 1024.0f, 0.0f), 1023.0f); + + // Expand each 10 bit value so that ith value is at 3 * ith position + unsigned int xx = expand((unsigned int)x); + unsigned int yy = expand((unsigned int)y); + unsigned int zz = expand((unsigned int)z); + + // Interleave the bits + return (xx << 2) | (yy << 1) | zz; +} + +void computeMortonCodes(Scene* scene, const AABB& sceneAABB) { + for (int i = 0; i < scene->triangles.size(); i++) { + // Find centroid of triangle's bounding box + glm::vec3 centroid = 0.5f * scene->triangles[i].aabb.min + 0.5f * scene->triangles[i].aabb.max; + + // Normalize centroid w.r.t. scene bounding box + glm::vec3 norm_centroid = (centroid - sceneAABB.min) / (sceneAABB.max - sceneAABB.min); + + // Calculate Morton code and add to list + MortonCode mcode; + mcode.objectId = i; + mcode.code = mortonCode3D(norm_centroid); + scene->mcodes.push_back(mcode); + } +} + +void sortMortonCodes(Scene* scene) { + std::vector mcodes_copy = scene->mcodes; + std::sort(mcodes_copy.begin(), mcodes_copy.end(), morton_sort); + scene->mcodes = mcodes_copy; +} + +// Determines the number of common bits between two numbers +int delta(MortonCode* sortedMCodes, int N, int i, int j) { + // Range check + if (j < 0 || j >= N) { + return -1; + } + + if (sortedMCodes[i].code == sortedMCodes[j].code) + { + return 32 + __lzcnt(i ^ j); + } + + return __lzcnt(sortedMCodes[i].code ^ sortedMCodes[j].code); +} + +// Determines in which direction the node's range will grow +int sign(MortonCode* sortedMCodes, int N, int i) { + int diff = delta(sortedMCodes, N, i, i + 1) - delta(sortedMCodes, N, i, i - 1); + return (diff >= 0) ? 1 : -1; +} + +NodeRange determineRange(MortonCode* sortedMCodes, int triangleCount, int i) { + // Determine direction of range (+1 or -1) + int d = sign(sortedMCodes, triangleCount, i); + + // Compute upper bound of range + int deltaMin = delta(sortedMCodes, triangleCount, i, i - d); + int lMax = 2; + while (delta(sortedMCodes, triangleCount, i, i + lMax * d) > deltaMin) { + lMax = lMax * 2; + } + + // Find the other end with binary search + int l = 0; + for (int t = lMax / 2; t >= 1; t /= 2) { + if (delta(sortedMCodes, triangleCount, i, i + (l + t) * d) > deltaMin) { + l = l + t; + } + } + int j = i + l * d; + + return NodeRange{ i, j, l, d }; +} + +int findSplit(MortonCode* sortedMCodes, int triangleCount, NodeRange range) { + int i = range.i; + int j = range.j; + int l = range.l; + int d = range.d; + + // Find split position with binary search + int deltaNode = delta(sortedMCodes, triangleCount, range.i, range.j); + int s = 0; + int t = l; + do { + t = ceil(t / 2.f); + if (delta(sortedMCodes, triangleCount, i, i + (s + t) * d) > deltaNode) { + s = s + t; + } + } while (t > 1); + + int gamma = i + s * d + min(d, 0); + + return gamma; +} + +// Recursively assigns bounding boxes to each node, start from the leaf nodes and recursing upwards +AABB assignBoundingBoxes(Scene* scene, LBVHNode* node) { + + if (!isLeaf(node)) { + AABB leftAABB = assignBoundingBoxes(scene, &scene->lbvh[node->left]); + AABB rightAABB = assignBoundingBoxes(scene, &scene->lbvh[node->right]); + node->aabb = Union(leftAABB, rightAABB); + } + + return node->aabb; +} + +// Tree-building functions +void buildLBVH(Scene* scene, int leafStart, int triangleCount, int meshNum) { + // Resize LBVH + int numLeaf = triangleCount; + int numInternal = triangleCount - 1; + int internalStart = leafStart + numLeaf; + scene->lbvh.resize(numLeaf + numInternal); + scene->sorted_triangles.resize(numLeaf); + + // Initialize leaf nodes + for (int i = leafStart; i < numLeaf; ++i) { + LBVHNode leafNode; + leafNode.objectId = scene->mcodes[i - leafStart].objectId; + leafNode.aabb = scene->triangles[leafNode.objectId].aabb; + leafNode.left = 0xFFFFFFFF; + leafNode.right = 0xFFFFFFFF; + scene->lbvh[i] = leafNode; + + scene->sorted_triangles[i] = scene->triangles[leafNode.objectId]; + } + scene->triangles = scene->sorted_triangles; + + // Initialize internal nodes + for (int j = internalStart; j < internalStart + numInternal; ++j) { + LBVHNode internalNode; + + // Determine range + NodeRange range = determineRange(scene->mcodes.data(), triangleCount, j - triangleCount); + + // Find split position + int split = findSplit(scene->mcodes.data(), triangleCount, range); + + int leftChild = -1; + int rightChild = -1; + if (min(range.i, range.j) == split) { + leftChild = split; + } + else { + leftChild = triangleCount + split; + } + + if (max(range.i, range.j) == split + 1) { + rightChild = split + 1; + } + else { + rightChild = triangleCount + split + 1; + } + + internalNode.objectId = -1; + internalNode.left = leftChild; + internalNode.right = rightChild; + scene->lbvh[j] = internalNode; + } + // Assign bounding boxes here + assignBoundingBoxes(scene, &scene->lbvh[triangleCount]); +} + +void generateLBVH(Scene* scene) +{ + for (int i = 0; i < scene->meshCount; i++) { + // Morton code computation + computeMortonCodes(scene, scene->mesh_aabbs[i]); + + // Sort Morton codes + sortMortonCodes(scene); + + // Build tree from sorted Morton codes + buildLBVH(scene, scene->lbvh.size(), scene->mcodes.size(), i); + + scene->mcodes.clear(); + } +} + +/// BASIC BVH FUNCTIONS /// + +// Counter to keep track of the current available node in the tree +int idx = 1; + +// Finds the new bounds of the aabb +void updateBounds(Scene* scene, const int idx) +{ + BVHNode& node = scene->bvh[idx]; + for (int i = node.firstTri; i < node.firstTri + node.numTris; ++i) + { + node.aabb = Union(node.aabb, scene->triangles[i].aabb); + } +} + +int maxExtent(glm::vec3 extent) { + if (extent.x > extent.y && extent.x > extent.z) { + return 0; + } + else if (extent.y > extent.z) { + return 1; + } + else { + return 2; + } +} + +AABB Union(AABB aabb, glm::vec3 p) { + glm::vec3 umin = glm::min(aabb.min, p); + glm::vec3 umax = glm::max(aabb.max, p); + return AABB{ umin, umax }; +} + +// SAH cost = num_triangles_left * left_box_area + num_triangles_right * right_box_area +// Determines bounding boxes that result from splitting at this position and how many +// triangles to place in each box. Once these are determined, we can calculate SAH cost +float evalSAH(Scene* scene, BVHNode* node, float queryPos, int axis) +{ + AABB leftChild = { glm::vec3{INFINITY, INFINITY, INFINITY}, glm::vec3{-INFINITY, -INFINITY, -INFINITY} }; + AABB rightChild = { glm::vec3{INFINITY, INFINITY, INFINITY}, glm::vec3{-INFINITY, -INFINITY, -INFINITY} }; + int leftCount = 0; + int rightCount = 0; + + for (int i = node->firstTri; i < node->firstTri + node->numTris; ++i) { + glm::vec3 centroid = scene->triangles[i].centroid; + if (centroid[axis] < queryPos) { + leftCount++; + leftChild = Union(leftChild, scene->triangles[i].aabb); + } + else { + rightCount++; + rightChild = Union(rightChild, scene->triangles[i].aabb); + } + } + // Calculate cost + float cost = leftCount * leftChild.surfaceArea() + rightCount * rightChild.surfaceArea(); + + return cost; +} + +void calculateSAHSplit(Scene* scene, BVHNode* node, float& split, int& axis) +{ + // To find the optimal cost, we must calculate the cost of splitting along each + // axis for each triangle contained within this node + float optimalCost = INFINITY; + for (int i = 0; i < 3; ++i) { + for (int j = node->firstTri; j < node->firstTri + node->numTris; ++j) { + float centroidPos = scene->triangles[j].centroid[i]; + float cost = evalSAH(scene, node, centroidPos, i); + if (cost < optimalCost) { + optimalCost = cost; + split = centroidPos; + axis = i; + } + } + } +} + +void chooseSplit(Scene* scene, BVHNode* node, float& split, int& axis) +{ + +#if USE_BVH_MIDPOINT + // Find bounding box of centroids + AABB centroidAABB = { glm::vec3{INFINITY, INFINITY, INFINITY}, glm::vec3{-INFINITY, -INFINITY, -INFINITY} }; + for (int i = node->firstTri; i < node->firstTri + node->numTris; ++i) + centroidAABB = Union(centroidAABB, scene->triangles[i].centroid); + axis = maxExtent(centroidAABB.max - centroidAABB.min); + split = (centroidAABB.min[axis] + centroidAABB.max[axis]) * 0.5f; + +#elif USE_BVH_SAH + calculateSAHSplit(scene, node, split, axis); +#endif + +} + +void addChildren(Scene* scene, BVHNode* node) +{ + if (node->numTris <= 2) + { + return; + } + + // Choose split axis and position + float split = 0.f; + int axis = 0; + chooseSplit(scene, node, split, axis); + + // Partition primitives (in-place sorting) + int start = node->firstTri; + int end = node->firstTri + node->numTris - 1; + while(start <= end) { + if (scene->triangles[start].centroid[axis] < split) { + start++; + } + else { + std::swap(scene->triangles[start], scene->triangles[end]); + end--; + } + } + + // Make sure there is no empty side on partition + int count = start - node->firstTri; + if (count == 0 || count == node->numTris) return; + + // Set children nodes + node->left = idx++; + node->right = idx++; + scene->bvh[node->left].firstTri = node->firstTri; + scene->bvh[node->left].numTris = start - node->firstTri; + scene->bvh[node->right].firstTri = start; + scene->bvh[node->right].numTris = node->numTris - scene->bvh[node->left].numTris; + node->numTris = 0; + + updateBounds(scene, node->left); + updateBounds(scene, node->right); + + addChildren(scene, &scene->bvh[node->left]); + addChildren(scene, &scene->bvh[node->right]); +} + +void generateBVH(Scene* scene) +{ + // Resize BVH + scene->bvh.resize(2 * scene->triangles.size() - 1); + + // Initialize root node + BVHNode* root = &scene->bvh[0]; + root->aabb = scene->mesh_aabbs[0]; + root->firstTri = 0; + root->numTris = scene->triangles.size(); + + // Construct hierarchy + addChildren(scene, root); +} \ No newline at end of file diff --git a/src/lbvh.h b/src/lbvh.h new file mode 100644 index 0000000..8642e16 --- /dev/null +++ b/src/lbvh.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "glm/glm.hpp" +#include "utilities.h" +#include "scene.h" +#include "sceneStructs.h" + +class Scene; + +/// LBVH FUNCTIONS /// + +// Morton code generation and sorting +unsigned int expand(unsigned int n); +unsigned int mortonCode3D(const glm::vec3& centroid); +void computeMortonCodes(Scene* scene, const AABB& sceneAABB); +void sortMortonCodes(Scene* scene); + +// Tree building +bool isLeaf(const LBVHNode* node); +int delta(unsigned int* sortedMCodes, int N, int i, int j); +int sign(unsigned int* sortedMCodes, int N, int i); + +NodeRange determineRange(unsigned int* sortedMCodes, int triangleCount, int idx); +int findSplit(unsigned int* sortedMCodes, int triangleCount, NodeRange range); +void assignBoundingBoxes(Scene* scene); +void buildLBVH(Scene* scene, int leafStart, int triangleCount); + +// Construct LBVH +void generateLBVH(Scene* scene); + +/// BVH FUNCTIONS /// + +void updateBounds(Scene* scene, const int idx); +int maxExtent(glm::vec3 extent); +float evalSAH(Scene* scene, BVHNode* node, float queryPos, int axis); +void calculateCost(Scene* scene, BVHNode* node, float& split, int& axis); +void chooseSplit(Scene* scene, BVHNode* node, float& split, int& axis); +void addChildren(Scene* scene, BVHNode* node); + +void generateBVH(Scene* scene); diff --git a/src/libmorton/morton.h b/src/libmorton/morton.h new file mode 100644 index 0000000..6084b52 --- /dev/null +++ b/src/libmorton/morton.h @@ -0,0 +1,106 @@ +#pragma once + +// This file will always contain inline functions which point to the fastest Morton encoding/decoding implementation +// IF you just want to use the fastest method to encode/decode morton codes, include this header. + +// If you want to experiment with alternative methods (which might be slower, all depending on hardware / your data set) +// check the individual headers below. + +#include "morton2D.h" +#include "morton3D.h" + +#if defined(__BMI2__) || defined(__AVX2__) +#include "morton_BMI.h" +#elif defined(__AVX512BITALG__) +#include "morton_AVX512BITALG.h" +#endif + +namespace libmorton { + // Functions under this are stubs which will always point to fastest implementation at the moment + //----------------------------------------------------------------------------------------------- + + // ENCODING +#if defined(__BMI2__) || defined(__AVX2__) + inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { + return m2D_e_BMI(x, y); + } + inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { + return m2D_e_BMI(x, y); + } + inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { + return m3D_e_BMI(x, y, z); + } + inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { + return m3D_e_BMI(x, y, z); + } +#elif defined(__AVX512BITALG__) + inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { + return m2D_e_BITALG(x, y); + } + inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { + return m2D_e_BITALG(x, y); + } + inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { + return m3D_e_BITALG(x, y, z); + } + inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { + return m3D_e_BITALG(x, y, z); + } +#else + inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { + return m2D_e_magicbits_combined(x, y); + } + inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { + return m2D_e_sLUT(x, y); + } + inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { + return m3D_e_sLUT(x, y, z); + } + inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { + return m3D_e_sLUT(x, y, z); + } +#endif + + // DECODING + +#if defined(__BMI2__) || defined(__AVX2__) + inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { + m2D_d_BMI(morton, x, y); + } + inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { + m2D_d_BMI(morton, x, y); + } + inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { + m3D_d_BMI(morton, x, y, z); + } + inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { + m3D_d_BMI(morton, x, y, z); + } +#elif defined(__AVX512BITALG__) + inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { + m2D_d_BITALG(morton, x, y); + } + inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { + m2D_d_BITALG(morton, x, y); + } + inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { + m3D_d_BITALG(morton, x, y, z); + } + inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { + m3D_d_BITALG(morton, x, y, z); +} +#else + inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { + m2D_d_magicbits_combined(morton, x, y); + } + inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { + m2D_d_sLUT(morton, x, y); + } + inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { + m3D_d_sLUT(morton, x, y, z); + } + inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { + m3D_d_sLUT(morton, x, y, z); + } +#endif +} \ No newline at end of file diff --git a/src/libmorton/morton2D.h b/src/libmorton/morton2D.h new file mode 100644 index 0000000..6631c1c --- /dev/null +++ b/src/libmorton/morton2D.h @@ -0,0 +1,283 @@ +#pragma once + +// Libmorton - Methods to encode/decode 64-bit morton codes from/to 32-bit (x,y) coordinates +// Warning: morton.h will always point to the functions that use the fastest available method. + +#include +#include +#include +#include "morton2D_LUTs.h" +#include "morton_common.h" + +#define EIGHTBITMASK (morton) 0x000000FF + +namespace libmorton { + + // Encode methods + template inline morton m2D_e_sLUT(const coord x, const coord y); + template inline morton m2D_e_sLUT_ET(const coord x, const coord y); + template inline morton m2D_e_LUT(const coord x, const coord y); + template inline morton m2D_e_LUT_ET(const coord x, const coord y); + template inline morton m2D_e_magicbits(const coord x, const coord y); + template inline morton m2D_e_for(const coord x, const coord y); + template inline morton m2D_e_for_ET(const coord x, const coord y); + + // Decode methods + template inline void m2D_d_sLUT(const morton m, coord& x, coord& y); + template inline void m2D_d_sLUT_ET(const morton m, coord& x, coord& y); + template inline void m2D_d_LUT(const morton m, coord& x, coord& y); + template inline void m2D_d_LUT_ET(const morton m, coord& x, coord& y); + template inline void m2D_d_magicbits(const morton m, coord& x, coord& y); + template inline void m2D_d_for(const morton m, coord& x, coord& y); + + // ENCODE 2D Morton code : Pre-shifted LookUpTable (sLUT) + template + inline morton m2D_e_sLUT(const coord x, const coord y) { + morton answer = 0; + for (unsigned int i = sizeof(coord); i > 0; --i) { + unsigned int shift = (i - 1) * 8; + answer = + answer << 16 | + Morton2D_encode_y_256[(y >> shift) & EIGHTBITMASK] | + Morton2D_encode_x_256[(x >> shift) & EIGHTBITMASK]; + } + return answer; + } + + // ENCODE 2D Morton code : LookUpTable (LUT) + template + inline morton m2D_e_LUT(const coord x, const coord y) { + morton answer = 0; + for (unsigned int i = sizeof(coord); i > 0; --i) { + unsigned int shift = (i - 1) * 8; + answer = + answer << 16 | + (Morton2D_encode_x_256[(y >> shift) & EIGHTBITMASK] << morton(1)) | + (Morton2D_encode_x_256[(x >> shift) & EIGHTBITMASK]); + } + return answer; + } + + // HELPER METHOD for Early Termination LUT Encode + template + inline morton compute2D_ET_LUT_encode(const coord c, const coord *LUT) { + unsigned long maxbit = 0; + if (findFirstSetBit(c, &maxbit) == 0) { return 0; } + morton answer = 0; + unsigned int i = 0; + while (maxbit >= i) { + answer |= ((morton)LUT[(c >> i) & EIGHTBITMASK]) << i * 2; + i += 8; + } + return answer; + } + + // ENCODE 2D Morton code : Pre-shifted LUT (Early termination version) + // This version tries to terminate early when there are no more bits to process + // Figuring this out is probably too costly in most cases. + template + inline morton m2D_e_sLUT_ET(const coord x, const coord y) { + morton answer_x = compute2D_ET_LUT_encode(x, Morton2D_encode_x_256); + morton answer_y = compute2D_ET_LUT_encode(y, Morton2D_encode_y_256); + return answer_y | answer_x; + } + + // ENCODE 2D Morton code : LUT (Early termination version) + template + inline morton m2D_e_LUT_ET(const coord x, const coord y) { + morton answer_x = compute2D_ET_LUT_encode(x, Morton2D_encode_x_256); + morton answer_y = compute2D_ET_LUT_encode(y, Morton2D_encode_x_256); + return (answer_y << 1) | answer_x; + } + + // Magicbits masks (2D encode) + static uint_fast32_t magicbit2D_masks32[6] = { 0xFFFFFFFF, 0x0000FFFF, 0x00FF00FF, 0x0F0F0F0F, 0x33333333, 0x55555555 }; + static uint_fast64_t magicbit2D_masks64[6] = { 0x00000000FFFFFFFF, 0x0000FFFF0000FFFF, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F, 0x3333333333333333, 0x5555555555555555 }; + + // HELPER METHOD for Magic bits encoding - split by 2 + template + inline morton morton2D_SplitBy2Bits(const coord a) { + const morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast(magicbit2D_masks32) : reinterpret_cast(magicbit2D_masks64); + morton x = a; + if (sizeof(morton) > 4) { x = (x | (uint_fast64_t)x << 32) & masks[0]; } + x = (x | x << 16) & masks[1]; + x = (x | x << 8) & masks[2]; + x = (x | x << 4) & masks[3]; + x = (x | x << 2) & masks[4]; + x = (x | x << 1) & masks[5]; + return x; + } + + // ENCODE 2D Morton code : Magic bits + template + inline morton m2D_e_magicbits(const coord x, const coord y) { + return morton2D_SplitBy2Bits(x) | (morton2D_SplitBy2Bits(y) << 1); + } + + // ENCODE 2D 32-bit morton code - alternative version by JarkkoPFC - https://gist.github.com/JarkkoPFC/0e4e599320b0cc7ea92df45fb416d79a + // This uses the same technique as the magicbits method, but uses the upper part of a 64-bit type to split the y coordinate, + // the lower part to split the x coordinate, then merges them back together. + inline uint_fast32_t m2D_e_magicbits_combined(uint_fast16_t x, uint_fast16_t y) { + uint_fast64_t m = x | (uint_fast64_t(y) << 32); // put Y in upper 32 bits, X in lower 32 bits + m = (m | (m << 8)) & magicbit2D_masks64[2]; + m = (m | (m << 4)) & magicbit2D_masks64[3]; + m = (m | (m << 2)) & magicbit2D_masks64[4]; + m = (m | (m << 1)) & magicbit2D_masks64[5]; + m = m | (m >> 31); // merge X and Y back together + // hard cut off to 32 bits, because on some systems uint_fast32_t will be a 64-bit type, and we don't want to retain split Y-version in the upper 32 bits. + m = m & 0x00000000FFFFFFFF; + return uint_fast32_t(m); + } + + // ENCODE 2D Morton code : For Loop + template + inline morton m2D_e_for(const coord x, const coord y) { + morton answer = 0; + unsigned int checkbits = (unsigned int)floor(sizeof(morton) * 4.0f); + for (unsigned int i = 0; i < checkbits; ++i) { + morton mshifted = static_cast(0x1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits + unsigned int shift = i; // because you have to shift back i and forth 2*i + answer |= + ((x & mshifted) << shift) + | ((y & mshifted) << (shift + 1)); + } + return answer; + } + + // ENCODE 2D Morton code : For Loop (Early termination version) + template + inline morton m2D_e_for_ET(const coord x, const coord y) { + morton answer = 0; + unsigned long x_max = 0, y_max = 0; + unsigned int checkbits = sizeof(morton) * 4; + findFirstSetBit(x, &x_max); + findFirstSetBit(y, &y_max); + checkbits = std::min(static_cast(checkbits), std::max(x_max, y_max) + 1ul); + for (unsigned int i = 0; i < checkbits; ++i) { + morton m_shifted = static_cast(0x1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits + unsigned int shift = i; + answer |= ((x & m_shifted) << shift) + | ((y & m_shifted) << (shift + 1)); + } + return answer; + } + + // HELPER METHODE for LUT decoding + template + inline coord morton2D_DecodeCoord_LUT256(const morton m, const uint_fast8_t *LUT, const unsigned int startshift) { + morton a = 0; + unsigned int loops = sizeof(morton); + for (unsigned int i = 0; i < loops; ++i) { + a |= ((morton)LUT[(m >> ((i * 8) + startshift)) & EIGHTBITMASK] << (4 * i)); + } + return static_cast(a); + } + + // DECODE 2D Morton code : Shifted LUT + template + inline void m2D_d_sLUT(const morton m, coord& x, coord& y) { + x = morton2D_DecodeCoord_LUT256(m, Morton2D_decode_x_256, 0); + y = morton2D_DecodeCoord_LUT256(m, Morton2D_decode_y_256, 0); + } + + // DECODE 2D 64-bit morton code : LUT + template + inline void m2D_d_LUT(const morton m, coord& x, coord& y) { + x = morton2D_DecodeCoord_LUT256(m, Morton2D_decode_x_256, 0); + y = morton2D_DecodeCoord_LUT256(m, Morton2D_decode_x_256, 1); + } + + // DECODE 2D Morton code : Shifted LUT (early termination) + template + inline void m2D_d_sLUT_ET(const morton m, coord& x, coord& y) { + x = 0; y = 0; + unsigned long firstbit_location = 0; + if (!findFirstSetBit(m, &firstbit_location)) { return; } + unsigned int i = 0; + unsigned int shiftback = 0; + while (firstbit_location > i) { + morton m_shifted = (m >> i) & EIGHTBITMASK; + x |= (coord)Morton2D_decode_x_256[m_shifted] << shiftback; + y |= (coord)Morton2D_decode_y_256[m_shifted] << shiftback; + shiftback += 4; + i += 8; + } + } + + // DECODE 2D Morton code : LUT (early termination) + template + inline void m2D_d_LUT_ET(const morton m, coord& x, coord& y) { + x = 0; y = 0; + unsigned long firstbit_location = 0; + if (!findFirstSetBit(m, &firstbit_location)) { return; } + unsigned int i = 0; + unsigned int shiftback = 0; + while (firstbit_location > i) { + x |= (coord)Morton2D_decode_x_256[(m >> i) & EIGHTBITMASK] << shiftback; + y |= (coord)Morton2D_decode_x_256[(m >> (i + 1)) & EIGHTBITMASK] << shiftback; + shiftback += 4; + i += 8; + } + } + + // HELPER method for Magicbits decoding + template + static inline coord morton2D_GetSecondBits(const morton m) { + morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast(magicbit2D_masks32) : reinterpret_cast(magicbit2D_masks64); + morton x = m & masks[5]; + x = (x ^ (x >> 1)) & masks[4]; + x = (x ^ (x >> 2)) & masks[3]; + x = (x ^ (x >> 4)) & masks[2]; + x = (x ^ (x >> 8)) & masks[1]; + if (sizeof(morton) > 4) x = (x ^ (x >> 16)) & masks[0]; + return static_cast(x); + } + + // DECODE 2D Morton code : Magic bits + // This method splits the morton codes bits by using certain patterns (magic bits) + template + inline void m2D_d_magicbits(const morton m, coord& x, coord& y) { + x = morton2D_GetSecondBits(m); + y = morton2D_GetSecondBits(m >> 1); + } + + // DECODE 2D 32-bit morton code - alternative version by JarkkoPFC - https://gist.github.com/JarkkoPFC/0e4e599320b0cc7ea92df45fb416d79a + inline void m2D_d_magicbits_combined(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { + uint_fast64_t res = (morton | (uint_fast64_t(morton) << 31)) & magicbit2D_masks64[5]; + res = (res | (res >> 1)) & magicbit2D_masks64[4]; + res = (res | (res >> 2)) & magicbit2D_masks64[3]; + res = (res | (res >> 4)) & magicbit2D_masks64[2]; + res = res | (res >> 8); + x = uint_fast16_t(res) & 0xFFFF; + y = (uint_fast16_t(res >> 32)) & 0xFFFF; + } + + // DECODE 2D morton code : For loop + template + inline void m2D_d_for(const morton m, coord& x, coord& y) { + x = 0; y = 0; + unsigned int checkbits = sizeof(morton) * 4; + for (unsigned int i = 0; i <= checkbits; ++i) { + morton selector = 1; + unsigned int shift_selector = 2 * i; + x |= (m & (selector << shift_selector)) >> i; + y |= (m & (selector << (shift_selector + 1))) >> (i + 1); + } + } + + // DECODE 3D Morton code : For loop (Early termination version) + template + inline void m2D_d_for_ET(const morton m, coord& x, coord& y) { + x = 0; y = 0; + unsigned long firstbit_location = 0; + if (!findFirstSetBit(m, &firstbit_location)) return; + float defaultbits = sizeof(morton) * 4; + unsigned int checkbits = static_cast(std::min(defaultbits, firstbit_location / 2.0f)); + for (unsigned int i = 0; i <= checkbits; ++i) { + morton selector = 1; + unsigned int shift_selector = 2 * i; + x |= (m & (selector << shift_selector)) >> i; + y |= (m & (selector << (shift_selector + 1))) >> (i + 1); + } + } +} diff --git a/src/libmorton/morton2D_LUTs.h b/src/libmorton/morton2D_LUTs.h new file mode 100644 index 0000000..730eb02 --- /dev/null +++ b/src/libmorton/morton2D_LUTs.h @@ -0,0 +1,120 @@ +#pragma once + +#include + +namespace libmorton { + + // LUT for Morton2D encode X + static const uint_fast16_t Morton2D_encode_x_256[256] = + { + 0, 1, 4, 5, 16, 17, 20, 21, + 64, 65, 68, 69, 80, 81, 84, 85, + 256, 257, 260, 261, 272, 273, 276, 277, + 320, 321, 324, 325, 336, 337, 340, 341, + 1024, 1025, 1028, 1029, 1040, 1041, 1044, 1045, + 1088, 1089, 1092, 1093, 1104, 1105, 1108, 1109, + 1280, 1281, 1284, 1285, 1296, 1297, 1300, 1301, + 1344, 1345, 1348, 1349, 1360, 1361, 1364, 1365, + 4096, 4097, 4100, 4101, 4112, 4113, 4116, 4117, + 4160, 4161, 4164, 4165, 4176, 4177, 4180, 4181, + 4352, 4353, 4356, 4357, 4368, 4369, 4372, 4373, + 4416, 4417, 4420, 4421, 4432, 4433, 4436, 4437, + 5120, 5121, 5124, 5125, 5136, 5137, 5140, 5141, + 5184, 5185, 5188, 5189, 5200, 5201, 5204, 5205, + 5376, 5377, 5380, 5381, 5392, 5393, 5396, 5397, + 5440, 5441, 5444, 5445, 5456, 5457, 5460, 5461, + 16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, + 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469, + 16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, + 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725, + 17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, + 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493, + 17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, + 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749, + 20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, + 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565, + 20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, + 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821, + 21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, + 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589, + 21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, + 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845 + }; + + // LUT for Morton2D encode Y + static const uint_fast16_t Morton2D_encode_y_256[256] = + { + 0, 2, 8, 10, 32, 34, 40, 42, + 128, 130, 136, 138, 160, 162, 168, 170, + 512, 514, 520, 522, 544, 546, 552, 554, + 640, 642, 648, 650, 672, 674, 680, 682, + 2048, 2050, 2056, 2058, 2080, 2082, 2088, 2090, + 2176, 2178, 2184, 2186, 2208, 2210, 2216, 2218, + 2560, 2562, 2568, 2570, 2592, 2594, 2600, 2602, + 2688, 2690, 2696, 2698, 2720, 2722, 2728, 2730, + 8192, 8194, 8200, 8202, 8224, 8226, 8232, 8234, + 8320, 8322, 8328, 8330, 8352, 8354, 8360, 8362, + 8704, 8706, 8712, 8714, 8736, 8738, 8744, 8746, + 8832, 8834, 8840, 8842, 8864, 8866, 8872, 8874, + 10240, 10242, 10248, 10250, 10272, 10274, 10280, 10282, + 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, + 10752, 10754, 10760, 10762, 10784, 10786, 10792, 10794, + 10880, 10882, 10888, 10890, 10912, 10914, 10920, 10922, + 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, + 32896, 32898, 32904, 32906, 32928, 32930, 32936, 32938, + 33280, 33282, 33288, 33290, 33312, 33314, 33320, 33322, + 33408, 33410, 33416, 33418, 33440, 33442, 33448, 33450, + 34816, 34818, 34824, 34826, 34848, 34850, 34856, 34858, + 34944, 34946, 34952, 34954, 34976, 34978, 34984, 34986, + 35328, 35330, 35336, 35338, 35360, 35362, 35368, 35370, + 35456, 35458, 35464, 35466, 35488, 35490, 35496, 35498, + 40960, 40962, 40968, 40970, 40992, 40994, 41000, 41002, + 41088, 41090, 41096, 41098, 41120, 41122, 41128, 41130, + 41472, 41474, 41480, 41482, 41504, 41506, 41512, 41514, + 41600, 41602, 41608, 41610, 41632, 41634, 41640, 41642, + 43008, 43010, 43016, 43018, 43040, 43042, 43048, 43050, + 43136, 43138, 43144, 43146, 43168, 43170, 43176, 43178, + 43520, 43522, 43528, 43530, 43552, 43554, 43560, 43562, + 43648, 43650, 43656, 43658, 43680, 43682, 43688, 43690 + }; + + // LUT for Morton2D decode X + static const uint_fast8_t Morton2D_decode_x_256[256] = { + 0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3, + 4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7, + 0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3, + 4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7, + 8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11, + 12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15, + 8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11, + 12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15, + 0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3, + 4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7, + 0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3, + 4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7, + 8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11, + 12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15, + 8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11, + 12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15 + }; + + // LUT for Morton2D decode Y + static const uint_fast8_t Morton2D_decode_y_256[256] = { + 0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3, + 0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3, + 4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7, + 4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7, + 0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3, + 0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3, + 4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7, + 4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7, + 8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11, + 8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11, + 12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15, + 12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15, + 8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11, + 8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11, + 12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15, + 12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15 + }; +} diff --git a/src/libmorton/morton3D.h b/src/libmorton/morton3D.h new file mode 100644 index 0000000..81e88f8 --- /dev/null +++ b/src/libmorton/morton3D.h @@ -0,0 +1,283 @@ +#pragma once + +// Libmorton - Methods to encode/decode 64-bit morton codes from/to 32-bit (x,y,z) coordinates +// Warning: morton.h will always point to the functions that use the fastest available method. + +#include +#include +#include +#include "morton3D_LUTs.h" +#include "morton_common.h" + +#define EIGHTBITMASK (morton) 0x000000FF +#define NINEBITMASK (morton) 0x000001FF + +namespace libmorton { + // AVAILABLE METHODS FOR ENCODING + template inline morton m3D_e_sLUT(const coord x, const coord y, const coord z); + template inline morton m3D_e_sLUT_ET(const coord x, const coord y, const coord z); + template inline morton m3D_e_LUT(const coord x, const coord y, const coord z); + template inline morton m3D_e_LUT_ET(const coord x, const coord y, const coord z); + template inline morton m3D_e_magicbits(const coord x, const coord y, const coord z); + template inline morton m3D_e_for(const coord x, const coord y, const coord z); + template inline morton m3D_e_for_ET(const coord x, const coord y, const coord z); + + // AVAILABLE METHODS FOR DECODING + template inline void m3D_d_sLUT(const morton m, coord& x, coord& y, coord& z); + template inline void m3D_d_sLUT_ET(const morton m, coord& x, coord& y, coord& z); + template inline void m3D_d_LUT(const morton m, coord& x, coord& y, coord& z); + template inline void m3D_d_LUT_ET(const morton m, coord& x, coord& y, coord& z); + template inline void m3D_d_magicbits(const morton m, coord& x, coord& y, coord& z); + template inline void m3D_d_for(const morton m, coord& x, coord& y, coord& z); + template inline void m3D_d_for_ET(const morton m, coord& x, coord& y, coord& z); + + // ENCODE 3D Morton code : Pre-Shifted LookUpTable (sLUT) + template + inline morton m3D_e_sLUT(const coord x, const coord y, const coord z) { + morton answer = 0; + for (unsigned int i = sizeof(coord); i > 0; --i) { + unsigned int shift = (i - 1) * 8; + answer = + answer << 24 | + (Morton3D_encode_z_256[(z >> shift) & EIGHTBITMASK] | + Morton3D_encode_y_256[(y >> shift) & EIGHTBITMASK] | + Morton3D_encode_x_256[(x >> shift) & EIGHTBITMASK]); + } + return answer; + } + + // ENCODE 3D Morton code : LookUpTable (LUT) + template + inline morton m3D_e_LUT(const coord x, const coord y, const coord z) { + morton answer = 0; + for (unsigned int i = sizeof(coord); i > 0; --i) { + unsigned int shift = (i - 1) * 8; + answer = + answer << 24 | + (Morton3D_encode_x_256[(z >> shift) & EIGHTBITMASK] << morton(2)) | + (Morton3D_encode_x_256[(y >> shift) & EIGHTBITMASK] << morton(1)) | + Morton3D_encode_x_256[(x >> shift) & EIGHTBITMASK]; + } + return answer; + } + + // HELPER METHOD for ET LUT encode + template + inline morton compute3D_ET_LUT_encode(const coord c, const coord *LUT) { + unsigned long maxbit = 0; + if (findFirstSetBit(c, &maxbit) == 0) { return 0; } + morton answer = 0; + for (int i = (int)ceil((maxbit + 1) / 8.0f); i >= 0; --i) { + unsigned int shift = i * 8; + answer = answer << 24 | (LUT[(c >> shift) & EIGHTBITMASK]); + } + return answer; + } + + // ENCODE 3D Morton code : Pre-shifted LookUpTable (LUT) (Early Termination version) + // This version tries to terminate early when there are no more bits to process + // Figuring this out is probably too costly in most cases. + template + inline morton m3D_e_sLUT_ET(const coord x, const coord y, const coord z) { + morton answer_x = compute3D_ET_LUT_encode(x, Morton3D_encode_x_256); + morton answer_y = compute3D_ET_LUT_encode(y, Morton3D_encode_y_256); + morton answer_z = compute3D_ET_LUT_encode(z, Morton3D_encode_z_256); + return answer_z | answer_y | answer_x; + } + + // ENCODE 3D Morton code : LookUpTable (LUT) (Early termination version) + // This version tries to terminate early when there are no more bits to process + // Figuring this out is probably too costly in most cases. + template + inline morton m3D_e_LUT_ET(const coord x, const coord y, const coord z) { + morton answer_x = compute3D_ET_LUT_encode(x, Morton3D_encode_x_256); + morton answer_y = compute3D_ET_LUT_encode(y, Morton3D_encode_x_256); + morton answer_z = compute3D_ET_LUT_encode(z, Morton3D_encode_x_256); + return (answer_z << 2) | (answer_y << 1) | answer_x; + } + + // Magicbits masks (3D encode) + static uint_fast32_t magicbit3D_masks32_encode[6] = { 0x000003ff, 0, 0x30000ff, 0x0300f00f, 0x30c30c3, 0x9249249 }; // we add a 0 on position 1 in this array to use same code for 32-bit and 64-bit cases + static uint_fast64_t magicbit3D_masks64_encode[6] = { 0x1fffff, 0x1f00000000ffff, 0x1f0000ff0000ff, 0x100f00f00f00f00f, 0x10c30c30c30c30c3, 0x1249249249249249 }; + + // HELPER METHOD: Magic bits encoding (helper method) + template + static inline morton morton3D_SplitBy3bits(const coord a) { + const morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast(magicbit3D_masks32_encode) : reinterpret_cast(magicbit3D_masks64_encode); + morton x = ((morton)a) & masks[0]; + if (sizeof(morton) == 8) { x = (x | (uint_fast64_t)x << 32) & masks[1]; } // for 64-bit case + x = (x | x << 16) & masks[2]; + x = (x | x << 8) & masks[3]; + x = (x | x << 4) & masks[4]; + x = (x | x << 2) & masks[5]; + return x; + } + + // ENCODE 3D Morton code : Magic bits method + // This method uses certain bit patterns (magic bits) to split bits in the coordinates + template + inline morton m3D_e_magicbits(const coord x, const coord y, const coord z) { + return morton3D_SplitBy3bits(x) | (morton3D_SplitBy3bits(y) << 1) | (morton3D_SplitBy3bits(z) << 2); + } + + // ENCODE 3D Morton code : For loop + // This is the most naive way of encoding coordinates into a morton code + template + inline morton m3D_e_for(const coord x, const coord y, const coord z) { + morton answer = 0; + unsigned int checkbits = (sizeof(morton) * 8) / 3; + for (unsigned int i = 0; i < checkbits; ++i) { + morton mshifted = static_cast(1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits + unsigned int shift = 2 * i; // because you have to shift back i and forth 3*i + answer |= ((x & mshifted) << shift) + | ((y & mshifted) << (shift + 1)) + | ((z & mshifted) << (shift + 2)); + } + return answer; + } + + // ENCODE 3D Morton code : For loop (Early termination version) + // In case of the for loop, figuring out when to stop early has huge benefits. + template + inline morton m3D_e_for_ET(const coord x, const coord y, const coord z) { + morton answer = 0; + unsigned long x_max = 0, y_max = 0, z_max = 0; + unsigned int checkbits = (sizeof(morton) * 8) / 3; + findFirstSetBit(x, &x_max); + findFirstSetBit(y, &y_max); + findFirstSetBit(z, &z_max); + checkbits = std::min((unsigned long)checkbits, std::max(z_max, std::max(x_max, y_max)) + (unsigned long)1); + for (unsigned int i = 0; i < checkbits; ++i) { + morton m_shifted = static_cast(1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits + unsigned int shift = 2 * i; + answer |= ((x & m_shifted) << shift) + | ((y & m_shifted) << (shift + 1)) + | ((z & m_shifted) << (shift + 2)); + } + return answer; + } + + // HELPER METHOD for LUT decoding + // todo: wouldn't this be better with 8-bit aligned decode LUT? + template + inline coord morton3D_DecodeCoord_LUT256(const morton m, const uint_fast8_t *LUT, const unsigned int startshift) { + morton a = 0; + unsigned int loops = (sizeof(morton) <= 4) ? 4 : 7; + for (unsigned int i = 0; i < loops; ++i) { + a |= (morton)(LUT[(m >> ((i * 9) + startshift)) & NINEBITMASK] << morton(3 * i)); + } + return static_cast(a); + } + + // DECODE 3D Morton code : Shifted LUT + template + inline void m3D_d_sLUT(const morton m, coord& x, coord& y, coord& z) { + x = morton3D_DecodeCoord_LUT256(m, Morton3D_decode_x_512, 0); + y = morton3D_DecodeCoord_LUT256(m, Morton3D_decode_y_512, 0); + z = morton3D_DecodeCoord_LUT256(m, Morton3D_decode_z_512, 0); + } + + // DECODE 3D Morton code : LUT + template + inline void m3D_d_LUT(const morton m, coord& x, coord& y, coord& z) { + x = morton3D_DecodeCoord_LUT256(m, Morton3D_decode_x_512, 0); + y = morton3D_DecodeCoord_LUT256(m, Morton3D_decode_x_512, 1); + z = morton3D_DecodeCoord_LUT256(m, Morton3D_decode_x_512, 2); + } + + // DECODE 3D Morton code : Shifted LUT (Early termination version) + template + inline void m3D_d_sLUT_ET(const morton m, coord& x, coord& y, coord& z) { + x = 0; y = 0; z = 0; + unsigned long firstbit_location = 0; + if (!findFirstSetBit(m, &firstbit_location)) { return; } + unsigned int i = 0; + unsigned int shiftback = 0; + while (firstbit_location > i) { + morton m_shifted = (m >> i) & NINEBITMASK; + x |= (coord)Morton3D_decode_x_512[m_shifted] << shiftback; + y |= (coord)Morton3D_decode_y_512[m_shifted] << shiftback; + z |= (coord)Morton3D_decode_z_512[m_shifted] << shiftback; + shiftback += 3; + i += 9; + } + return; + } + + // DECODE 3D Morton code : LUT (Early termination version) + template + inline void m3D_d_LUT_ET(const morton m, coord& x, coord& y, coord& z) { + x = 0; y = 0; z = 0; + unsigned long firstbit_location = 0; + if (!findFirstSetBit(m, &firstbit_location)) { return; } + unsigned int i = 0; + unsigned int shiftback = 0; + while (i < firstbit_location) { + x = x | (coord)Morton3D_decode_x_512[(m >> i) & NINEBITMASK] << shiftback; + y = y | (coord)Morton3D_decode_x_512[(m >> (i + 1)) & NINEBITMASK] << shiftback; + z = z | (coord)Morton3D_decode_x_512[(m >> (i + 2)) & NINEBITMASK] << shiftback; + i += 9; + shiftback += 3; + } + return; + } + + // Magicbits masks (3D decode) + static uint_fast32_t magicbit3D_masks32_decode[6] = { 0, 0x000003ff, 0x30000ff, 0x0300f00f, 0x30c30c3, 0x9249249 }; // we add a 0 on position 0 in this array to use same code for 32-bit and 64-bit cases + static uint_fast64_t magicbit3D_masks64_decode[6] = { 0x1fffff, 0x1f00000000ffff, 0x1f0000ff0000ff, 0x100f00f00f00f00f, 0x10c30c30c30c30c3, 0x1249249249249249 }; + + // HELPER METHOD for Magic bits decoding + template + static inline coord morton3D_GetThirdBits(const morton m) { + morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast(magicbit3D_masks32_decode) : reinterpret_cast(magicbit3D_masks64_decode); + morton x = m & masks[5]; + x = (x ^ (x >> 2)) & masks[4]; + x = (x ^ (x >> 4)) & masks[3]; + x = (x ^ (x >> 8)) & masks[2]; + x = (x ^ (x >> 16)) & masks[1]; + if (sizeof(morton) > 4) { x = (x ^ ((uint_fast64_t)x >> 32)) & masks[0]; } + return static_cast(x); + } + + // DECODE 3D Morton code : Magic bits + // This method splits the morton codes bits by using certain patterns (magic bits) + template + inline void m3D_d_magicbits(const morton m, coord& x, coord& y, coord& z) { + x = morton3D_GetThirdBits(m); + y = morton3D_GetThirdBits(m >> 1); + z = morton3D_GetThirdBits(m >> 2); + } + + // DECODE 3D Morton code : For loop + template + inline void m3D_d_for(const morton m, coord& x, coord& y, coord& z) { + x = 0; y = 0; z = 0; + unsigned int checkbits = (sizeof(morton) * 8) / 3; + for (unsigned int i = 0; i <= checkbits; ++i) { + morton selector = 1; + unsigned int shift_selector = 3 * i; + unsigned int shiftback = 2 * i; + x |= (m & (selector << shift_selector)) >> (shiftback); + y |= (m & (selector << (shift_selector + 1))) >> (shiftback + 1); + z |= (m & (selector << (shift_selector + 2))) >> (shiftback + 2); + } + } + + // DECODE 3D Morton code : For loop (Early termination version) + template + inline void m3D_d_for_ET(const morton m, coord& x, coord& y, coord& z) { + x = 0; y = 0; z = 0; + unsigned long firstbit_location = 0; + if (!findFirstSetBit(m, &firstbit_location)) return; + unsigned int defaultbits = (sizeof(morton) * 8) / 3; + unsigned int checkbits = static_cast(std::min((float) defaultbits, firstbit_location / 3.0f)); + for (unsigned int i = 0; i <= checkbits; ++i) { + morton selector = 1; + unsigned int shift_selector = 3 * i; + unsigned int shiftback = 2 * i; + x |= (m & (selector << shift_selector)) >> (shiftback); + y |= (m & (selector << (shift_selector + 1))) >> (shiftback + 1); + z |= (m & (selector << (shift_selector + 2))) >> (shiftback + 2); + } + } +} \ No newline at end of file diff --git a/src/libmorton/morton3D_LUTs.h b/src/libmorton/morton3D_LUTs.h new file mode 100644 index 0000000..fedf063 --- /dev/null +++ b/src/libmorton/morton3D_LUTs.h @@ -0,0 +1,225 @@ +#pragma once + +#include + +namespace libmorton { + // LUT for Morton3D encode X + static const uint_fast32_t Morton3D_encode_x_256[256] = + { + 0x00000000, + 0x00000001, 0x00000008, 0x00000009, 0x00000040, 0x00000041, 0x00000048, 0x00000049, 0x00000200, + 0x00000201, 0x00000208, 0x00000209, 0x00000240, 0x00000241, 0x00000248, 0x00000249, 0x00001000, + 0x00001001, 0x00001008, 0x00001009, 0x00001040, 0x00001041, 0x00001048, 0x00001049, 0x00001200, + 0x00001201, 0x00001208, 0x00001209, 0x00001240, 0x00001241, 0x00001248, 0x00001249, 0x00008000, + 0x00008001, 0x00008008, 0x00008009, 0x00008040, 0x00008041, 0x00008048, 0x00008049, 0x00008200, + 0x00008201, 0x00008208, 0x00008209, 0x00008240, 0x00008241, 0x00008248, 0x00008249, 0x00009000, + 0x00009001, 0x00009008, 0x00009009, 0x00009040, 0x00009041, 0x00009048, 0x00009049, 0x00009200, + 0x00009201, 0x00009208, 0x00009209, 0x00009240, 0x00009241, 0x00009248, 0x00009249, 0x00040000, + 0x00040001, 0x00040008, 0x00040009, 0x00040040, 0x00040041, 0x00040048, 0x00040049, 0x00040200, + 0x00040201, 0x00040208, 0x00040209, 0x00040240, 0x00040241, 0x00040248, 0x00040249, 0x00041000, + 0x00041001, 0x00041008, 0x00041009, 0x00041040, 0x00041041, 0x00041048, 0x00041049, 0x00041200, + 0x00041201, 0x00041208, 0x00041209, 0x00041240, 0x00041241, 0x00041248, 0x00041249, 0x00048000, + 0x00048001, 0x00048008, 0x00048009, 0x00048040, 0x00048041, 0x00048048, 0x00048049, 0x00048200, + 0x00048201, 0x00048208, 0x00048209, 0x00048240, 0x00048241, 0x00048248, 0x00048249, 0x00049000, + 0x00049001, 0x00049008, 0x00049009, 0x00049040, 0x00049041, 0x00049048, 0x00049049, 0x00049200, + 0x00049201, 0x00049208, 0x00049209, 0x00049240, 0x00049241, 0x00049248, 0x00049249, 0x00200000, + 0x00200001, 0x00200008, 0x00200009, 0x00200040, 0x00200041, 0x00200048, 0x00200049, 0x00200200, + 0x00200201, 0x00200208, 0x00200209, 0x00200240, 0x00200241, 0x00200248, 0x00200249, 0x00201000, + 0x00201001, 0x00201008, 0x00201009, 0x00201040, 0x00201041, 0x00201048, 0x00201049, 0x00201200, + 0x00201201, 0x00201208, 0x00201209, 0x00201240, 0x00201241, 0x00201248, 0x00201249, 0x00208000, + 0x00208001, 0x00208008, 0x00208009, 0x00208040, 0x00208041, 0x00208048, 0x00208049, 0x00208200, + 0x00208201, 0x00208208, 0x00208209, 0x00208240, 0x00208241, 0x00208248, 0x00208249, 0x00209000, + 0x00209001, 0x00209008, 0x00209009, 0x00209040, 0x00209041, 0x00209048, 0x00209049, 0x00209200, + 0x00209201, 0x00209208, 0x00209209, 0x00209240, 0x00209241, 0x00209248, 0x00209249, 0x00240000, + 0x00240001, 0x00240008, 0x00240009, 0x00240040, 0x00240041, 0x00240048, 0x00240049, 0x00240200, + 0x00240201, 0x00240208, 0x00240209, 0x00240240, 0x00240241, 0x00240248, 0x00240249, 0x00241000, + 0x00241001, 0x00241008, 0x00241009, 0x00241040, 0x00241041, 0x00241048, 0x00241049, 0x00241200, + 0x00241201, 0x00241208, 0x00241209, 0x00241240, 0x00241241, 0x00241248, 0x00241249, 0x00248000, + 0x00248001, 0x00248008, 0x00248009, 0x00248040, 0x00248041, 0x00248048, 0x00248049, 0x00248200, + 0x00248201, 0x00248208, 0x00248209, 0x00248240, 0x00248241, 0x00248248, 0x00248249, 0x00249000, + 0x00249001, 0x00249008, 0x00249009, 0x00249040, 0x00249041, 0x00249048, 0x00249049, 0x00249200, + 0x00249201, 0x00249208, 0x00249209, 0x00249240, 0x00249241, 0x00249248, 0x00249249 + }; + + // LUT for Morton3D encode Y + static const uint_fast32_t Morton3D_encode_y_256[256] = { + 0x00000000, + 0x00000002, 0x00000010, 0x00000012, 0x00000080, 0x00000082, 0x00000090, 0x00000092, 0x00000400, + 0x00000402, 0x00000410, 0x00000412, 0x00000480, 0x00000482, 0x00000490, 0x00000492, 0x00002000, + 0x00002002, 0x00002010, 0x00002012, 0x00002080, 0x00002082, 0x00002090, 0x00002092, 0x00002400, + 0x00002402, 0x00002410, 0x00002412, 0x00002480, 0x00002482, 0x00002490, 0x00002492, 0x00010000, + 0x00010002, 0x00010010, 0x00010012, 0x00010080, 0x00010082, 0x00010090, 0x00010092, 0x00010400, + 0x00010402, 0x00010410, 0x00010412, 0x00010480, 0x00010482, 0x00010490, 0x00010492, 0x00012000, + 0x00012002, 0x00012010, 0x00012012, 0x00012080, 0x00012082, 0x00012090, 0x00012092, 0x00012400, + 0x00012402, 0x00012410, 0x00012412, 0x00012480, 0x00012482, 0x00012490, 0x00012492, 0x00080000, + 0x00080002, 0x00080010, 0x00080012, 0x00080080, 0x00080082, 0x00080090, 0x00080092, 0x00080400, + 0x00080402, 0x00080410, 0x00080412, 0x00080480, 0x00080482, 0x00080490, 0x00080492, 0x00082000, + 0x00082002, 0x00082010, 0x00082012, 0x00082080, 0x00082082, 0x00082090, 0x00082092, 0x00082400, + 0x00082402, 0x00082410, 0x00082412, 0x00082480, 0x00082482, 0x00082490, 0x00082492, 0x00090000, + 0x00090002, 0x00090010, 0x00090012, 0x00090080, 0x00090082, 0x00090090, 0x00090092, 0x00090400, + 0x00090402, 0x00090410, 0x00090412, 0x00090480, 0x00090482, 0x00090490, 0x00090492, 0x00092000, + 0x00092002, 0x00092010, 0x00092012, 0x00092080, 0x00092082, 0x00092090, 0x00092092, 0x00092400, + 0x00092402, 0x00092410, 0x00092412, 0x00092480, 0x00092482, 0x00092490, 0x00092492, 0x00400000, + 0x00400002, 0x00400010, 0x00400012, 0x00400080, 0x00400082, 0x00400090, 0x00400092, 0x00400400, + 0x00400402, 0x00400410, 0x00400412, 0x00400480, 0x00400482, 0x00400490, 0x00400492, 0x00402000, + 0x00402002, 0x00402010, 0x00402012, 0x00402080, 0x00402082, 0x00402090, 0x00402092, 0x00402400, + 0x00402402, 0x00402410, 0x00402412, 0x00402480, 0x00402482, 0x00402490, 0x00402492, 0x00410000, + 0x00410002, 0x00410010, 0x00410012, 0x00410080, 0x00410082, 0x00410090, 0x00410092, 0x00410400, + 0x00410402, 0x00410410, 0x00410412, 0x00410480, 0x00410482, 0x00410490, 0x00410492, 0x00412000, + 0x00412002, 0x00412010, 0x00412012, 0x00412080, 0x00412082, 0x00412090, 0x00412092, 0x00412400, + 0x00412402, 0x00412410, 0x00412412, 0x00412480, 0x00412482, 0x00412490, 0x00412492, 0x00480000, + 0x00480002, 0x00480010, 0x00480012, 0x00480080, 0x00480082, 0x00480090, 0x00480092, 0x00480400, + 0x00480402, 0x00480410, 0x00480412, 0x00480480, 0x00480482, 0x00480490, 0x00480492, 0x00482000, + 0x00482002, 0x00482010, 0x00482012, 0x00482080, 0x00482082, 0x00482090, 0x00482092, 0x00482400, + 0x00482402, 0x00482410, 0x00482412, 0x00482480, 0x00482482, 0x00482490, 0x00482492, 0x00490000, + 0x00490002, 0x00490010, 0x00490012, 0x00490080, 0x00490082, 0x00490090, 0x00490092, 0x00490400, + 0x00490402, 0x00490410, 0x00490412, 0x00490480, 0x00490482, 0x00490490, 0x00490492, 0x00492000, + 0x00492002, 0x00492010, 0x00492012, 0x00492080, 0x00492082, 0x00492090, 0x00492092, 0x00492400, + 0x00492402, 0x00492410, 0x00492412, 0x00492480, 0x00492482, 0x00492490, 0x00492492 + }; + + // LUT for Morton3D encode Z + static const uint_fast32_t Morton3D_encode_z_256[256] = { + 0x00000000, + 0x00000004, 0x00000020, 0x00000024, 0x00000100, 0x00000104, 0x00000120, 0x00000124, 0x00000800, + 0x00000804, 0x00000820, 0x00000824, 0x00000900, 0x00000904, 0x00000920, 0x00000924, 0x00004000, + 0x00004004, 0x00004020, 0x00004024, 0x00004100, 0x00004104, 0x00004120, 0x00004124, 0x00004800, + 0x00004804, 0x00004820, 0x00004824, 0x00004900, 0x00004904, 0x00004920, 0x00004924, 0x00020000, + 0x00020004, 0x00020020, 0x00020024, 0x00020100, 0x00020104, 0x00020120, 0x00020124, 0x00020800, + 0x00020804, 0x00020820, 0x00020824, 0x00020900, 0x00020904, 0x00020920, 0x00020924, 0x00024000, + 0x00024004, 0x00024020, 0x00024024, 0x00024100, 0x00024104, 0x00024120, 0x00024124, 0x00024800, + 0x00024804, 0x00024820, 0x00024824, 0x00024900, 0x00024904, 0x00024920, 0x00024924, 0x00100000, + 0x00100004, 0x00100020, 0x00100024, 0x00100100, 0x00100104, 0x00100120, 0x00100124, 0x00100800, + 0x00100804, 0x00100820, 0x00100824, 0x00100900, 0x00100904, 0x00100920, 0x00100924, 0x00104000, + 0x00104004, 0x00104020, 0x00104024, 0x00104100, 0x00104104, 0x00104120, 0x00104124, 0x00104800, + 0x00104804, 0x00104820, 0x00104824, 0x00104900, 0x00104904, 0x00104920, 0x00104924, 0x00120000, + 0x00120004, 0x00120020, 0x00120024, 0x00120100, 0x00120104, 0x00120120, 0x00120124, 0x00120800, + 0x00120804, 0x00120820, 0x00120824, 0x00120900, 0x00120904, 0x00120920, 0x00120924, 0x00124000, + 0x00124004, 0x00124020, 0x00124024, 0x00124100, 0x00124104, 0x00124120, 0x00124124, 0x00124800, + 0x00124804, 0x00124820, 0x00124824, 0x00124900, 0x00124904, 0x00124920, 0x00124924, 0x00800000, + 0x00800004, 0x00800020, 0x00800024, 0x00800100, 0x00800104, 0x00800120, 0x00800124, 0x00800800, + 0x00800804, 0x00800820, 0x00800824, 0x00800900, 0x00800904, 0x00800920, 0x00800924, 0x00804000, + 0x00804004, 0x00804020, 0x00804024, 0x00804100, 0x00804104, 0x00804120, 0x00804124, 0x00804800, + 0x00804804, 0x00804820, 0x00804824, 0x00804900, 0x00804904, 0x00804920, 0x00804924, 0x00820000, + 0x00820004, 0x00820020, 0x00820024, 0x00820100, 0x00820104, 0x00820120, 0x00820124, 0x00820800, + 0x00820804, 0x00820820, 0x00820824, 0x00820900, 0x00820904, 0x00820920, 0x00820924, 0x00824000, + 0x00824004, 0x00824020, 0x00824024, 0x00824100, 0x00824104, 0x00824120, 0x00824124, 0x00824800, + 0x00824804, 0x00824820, 0x00824824, 0x00824900, 0x00824904, 0x00824920, 0x00824924, 0x00900000, + 0x00900004, 0x00900020, 0x00900024, 0x00900100, 0x00900104, 0x00900120, 0x00900124, 0x00900800, + 0x00900804, 0x00900820, 0x00900824, 0x00900900, 0x00900904, 0x00900920, 0x00900924, 0x00904000, + 0x00904004, 0x00904020, 0x00904024, 0x00904100, 0x00904104, 0x00904120, 0x00904124, 0x00904800, + 0x00904804, 0x00904820, 0x00904824, 0x00904900, 0x00904904, 0x00904920, 0x00904924, 0x00920000, + 0x00920004, 0x00920020, 0x00920024, 0x00920100, 0x00920104, 0x00920120, 0x00920124, 0x00920800, + 0x00920804, 0x00920820, 0x00920824, 0x00920900, 0x00920904, 0x00920920, 0x00920924, 0x00924000, + 0x00924004, 0x00924020, 0x00924024, 0x00924100, 0x00924104, 0x00924120, 0x00924124, 0x00924800, + 0x00924804, 0x00924820, 0x00924824, 0x00924900, 0x00924904, 0x00924920, 0x00924924 + }; + + // LUT for Morton3D decode X + static const uint_fast8_t Morton3D_decode_x_512[512] = { + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, + 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7 + }; + + // LUT for Morton3D decode Y + static const uint_fast8_t Morton3D_decode_y_512[512] = { + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, + 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, + 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7 + }; + + // LUT for Morton3D decode Z + static const uint_fast8_t Morton3D_decode_z_512[512] = { + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7, + 6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7 + }; +} \ No newline at end of file diff --git a/src/libmorton/morton_AVX512BITALG.h b/src/libmorton/morton_AVX512BITALG.h new file mode 100644 index 0000000..d33b7df --- /dev/null +++ b/src/libmorton/morton_AVX512BITALG.h @@ -0,0 +1,223 @@ +#pragma once +#if defined(__AVX512BITALG__) +#include +#include + +namespace libmorton { + + namespace bitalg_detail { + // "Zip" and interleave an m-vector of n-bit integers into a + // new n*m-bit integer + // 2D MORTONS + inline void bitunzip2D(const uint32_t morton, uint32_t& x, uint32_t& y) noexcept { + // Unpack bits into upper and lower half of 32-bit integer in parallel + // into 16-bit components + const uint32_t Unzipped = _cvtmask32_u32( + _mm256_bitshuffle_epi64_mask( + _mm256_set1_epi32(morton), + _mm256_set_epi8( + // Every odd bit + 31, 29, 27, 25, 23, 21, 19, 17, + 15, 13, 11, 9, 7, 5, 3, 1, + // Every even bit + 30, 28, 26, 24, 22, 20, 18, 16, + 14, 12, 10, 8, 6, 4, 2, 0 + ) + ) + ); + x = static_cast(Unzipped >> 0); + y = static_cast(Unzipped >> 16); + } + inline void bitunzip2D(const uint64_t morton, uint64_t& x, uint64_t& y) noexcept { + // Unpack bits into upper and lower half of 64-bit integer in parallel + // into 32-bit components + const uint64_t Unzipped = _cvtmask64_u64( + _mm512_bitshuffle_epi64_mask( + _mm512_set1_epi64(morton), + _mm512_set_epi8( + // Every odd bit + 63, 61, 59, 57, 55, 53, 51, 49, + 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, + 15, 13, 11, 9, 7, 5, 3, 1, + // Every even bit + 62, 60, 58, 56, 54, 52, 50, 48, + 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, + 14, 12, 10, 8, 6, 4, 2, 0 + ) + ) + ); + x = static_cast(Unzipped >> 0); + y = static_cast(Unzipped >> 32); + } + inline uint32_t bitzip2D(uint32_t x, uint32_t y) noexcept { + // Put both 32-bit integer into each 64-bit lane + const __m256i CoordVec = _mm256_set1_epi64x( + (static_cast(y) << 32u) | x + ); + // Interleave bits from 32-bit X and Y coordinate + const __mmask32 Interleave = _mm256_bitshuffle_epi64_mask( + CoordVec, + _mm256_set_epi16( + 0x1000 + 0x0101 * 15, 0x1000 + 0x0101 * 14, + 0x1000 + 0x0101 * 13, 0x1000 + 0x0101 * 12, + 0x1000 + 0x0101 * 11, 0x1000 + 0x0101 * 10, + 0x1000 + 0x0101 * 9, 0x1000 + 0x0101 * 8, + 0x1000 + 0x0101 * 7, 0x1000 + 0x0101 * 6, + 0x1000 + 0x0101 * 5, 0x1000 + 0x0101 * 4, + 0x1000 + 0x0101 * 3, 0x1000 + 0x0101 * 2, + 0x1000 + 0x0101 * 1, 0x1000 + 0x0101 * 0 + ) + ); + return _cvtmask32_u32(Interleave); + } + + inline uint64_t bitzip2D(uint64_t x, uint64_t y) noexcept { + const __m512i CoordVec = _mm512_set1_epi64( + (static_cast(y) << 32u) | x + ); + // Interleave bits from 32-bit X and Y coordinate + const __mmask64 Interleave = _mm512_bitshuffle_epi64_mask( + CoordVec, + _mm512_set_epi16( + 0x2000 + 0x0101 * 31, 0x2000 + 0x0101 * 30, + 0x2000 + 0x0101 * 29, 0x2000 + 0x0101 * 28, + 0x2000 + 0x0101 * 27, 0x2000 + 0x0101 * 26, + 0x2000 + 0x0101 * 25, 0x2000 + 0x0101 * 24, + 0x2000 + 0x0101 * 23, 0x2000 + 0x0101 * 22, + 0x2000 + 0x0101 * 21, 0x2000 + 0x0101 * 20, + 0x2000 + 0x0101 * 19, 0x2000 + 0x0101 * 18, + 0x2000 + 0x0101 * 17, 0x2000 + 0x0101 * 16, + 0x2000 + 0x0101 * 15, 0x2000 + 0x0101 * 14, + 0x2000 + 0x0101 * 13, 0x2000 + 0x0101 * 12, + 0x2000 + 0x0101 * 11, 0x2000 + 0x0101 * 10, + 0x2000 + 0x0101 * 9, 0x2000 + 0x0101 * 8, + 0x2000 + 0x0101 * 7, 0x2000 + 0x0101 * 6, + 0x2000 + 0x0101 * 5, 0x2000 + 0x0101 * 4, + 0x2000 + 0x0101 * 3, 0x2000 + 0x0101 * 2, + 0x2000 + 0x0101 * 1, 0x2000 + 0x0101 * 0 + ) + ); + return _cvtmask64_u64(Interleave); + } + // 3D MORTONS + inline void bitunzip3D(const uint32_t morton, uint32_t& x, uint32_t& y, uint32_t& z) noexcept { + // Unpack 32-bit integer in parallel into 10-bit components, within 16-bit lanes + const uint64_t Unzipped = _cvtmask64_u64( + _mm512_bitshuffle_epi64_mask( + _mm512_set1_epi64(morton), + _mm512_set_epi8( + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, + ~0, ~0, ~0, ~0, ~0, ~0, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2, + ~0, ~0, ~0, ~0, ~0, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1, + ~0, ~0, ~0, ~0, ~0, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0 + ) + ) + ); + x = static_cast(Unzipped >> 0); + y = static_cast(Unzipped >> 16); + z = static_cast(Unzipped >> 32); + } + inline void bitunzip3D(const uint64_t morton, uint64_t& x, uint64_t& y, uint64_t& z) noexcept { + // Unpack 64-bit integer in parallel into 21-bit components + const uint64_t Unzipped = _cvtmask64_u64( + _mm512_bitshuffle_epi64_mask( + _mm512_set1_epi64(morton), + _mm512_set_epi8( + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1, + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0 + ) + ) + ); + x = static_cast(Unzipped >> 0); + y = static_cast(Unzipped >> 32); + z = bmi2_detail::pext(morton, 0x4924924924924924); + //z = static_cast(Unzipped >> 64); + } + inline uint32_t bitzip3D(uint32_t x, uint32_t y, uint32_t z) noexcept { + const __m256i CoordVec = _mm256_broadcastsi128_si256( + _mm_set_epi32(0, z, y, x) + ); + const __m256i ShuffleVec = _mm256_permutexvar_epi8( + _mm256_set_epi64x( + 0xFFFFFFFFFF100800ul + 0x010101 * 1, // Lane 3 | ...000 | z[1] | y[1] | x[1] + 0xFFFFFFFFFF100800ul + 0x010101 * 0, // Lane 2 | ...000 | z[0] | y[0] | x[0] + 0xFFFFFFFFFF100800ul + 0x010101 * 0, // Lane 1 | ...000 | z[0] | y[0] | x[0] + 0xFFFFFFFFFF100800ul + 0x010101 * 0 // Lane 0 | ...000 | z[0] | y[0] | x[0] + ), + CoordVec + ); + const __mmask32 Interleave = _mm256_bitshuffle_epi64_mask( + ShuffleVec, + _mm256_set_epi64x( + 0x0202010101000000 + 0x0100020100020100 * 8, + 0x0707070606060505 + 0x0201000201000201 * 8, + 0x0504040403030302 + 0x0002010002010002 * 8, + 0x0202010101000000 + 0x0100020100020100 * 8 + ) + ); + return _cvtmask32_u32(Interleave); + } + inline uint64_t bitzip3D(uint64_t x, uint64_t y, uint64_t z) noexcept { + // Put both 32-bit integers into each 64-bit lane + // Todo: _mm512_shuffle_epi8 version, 128-bit lane woes + const __m512i CoordVec = _mm512_set_epi64( + 0, 0, 0, 0, 0, z, y, x + ); + const __m512i ShuffleVec = _mm512_permutexvar_epi8( + _mm512_set_epi64( + 0xFFFFFFFFFF100800ul + 0x010101 * 2, // Lane 7 | ...000 | z[2] | y[2] | x[2] + 0xFFFFFFFFFF100800ul + 0x010101 * 2, // Lane 6 | ...000 | z[2] | y[2] | x[2] + 0xFFFFFFFFFF100800ul + 0x010101 * 1, // Lane 5 | ...000 | z[1] | y[1] | x[1] + 0xFFFFFFFFFF100800ul + 0x010101 * 1, // Lane 4 | ...000 | z[1] | y[1] | x[1] + 0xFFFFFFFFFF100800ul + 0x010101 * 1, // Lane 3 | ...000 | z[1] | y[1] | x[1] + 0xFFFFFFFFFF100800ul + 0x010101 * 0, // Lane 2 | ...000 | z[0] | y[0] | x[0] + 0xFFFFFFFFFF100800ul + 0x010101 * 0, // Lane 1 | ...000 | z[0] | y[0] | x[0] + 0xFFFFFFFFFF100800ul + 0x010101 * 0 // Lane 0 | ...000 | z[0] | y[0] | x[0] + ), + CoordVec + ); + // Interleave bits from 32-bit X and Y and Z coordinate + const __mmask64 Interleave = _mm512_bitshuffle_epi64_mask( + ShuffleVec, + _mm512_set_epi64( + 0x0504040403030302 + 0x0002010002010002 * 8, + 0x0202010101000000 + 0x0100020100020100 * 8, + 0x0707070606060505 + 0x0201000201000201 * 8, + 0x0504040403030302 + 0x0002010002010002 * 8, + 0x0202010101000000 + 0x0100020100020100 * 8, + 0x0707070606060505 + 0x0201000201000201 * 8, + 0x0504040403030302 + 0x0002010002010002 * 8, + 0x0202010101000000 + 0x0100020100020100 * 8 + ) + ); + return _cvtmask64_u64(Interleave); + } + } // namespace bitalg_detail + + template + inline morton m2D_e_BITALG(const coord x, const coord y) { + return bitalg_detail::bitzip2D( + static_cast(x), static_cast(y) + ); + } + + template + inline void m2D_d_BITALG(const morton m, coord& x, coord& y) { + bitalg_detail::bitunzip2D(m, x, y); + } + + template + inline morton m3D_e_BITALG(const coord x, const coord y, const coord z) { + return bitalg_detail::bitzip3D( + static_cast(x), static_cast(y), static_cast(z) + ); + } + + template + inline void m3D_d_BITALG(const morton m, coord& x, coord& y, coord& z) { + bitalg_detail::bitunzip3D(m, x, y, z); + } +} +#endif \ No newline at end of file diff --git a/src/libmorton/morton_BMI.h b/src/libmorton/morton_BMI.h new file mode 100644 index 0000000..6b0222f --- /dev/null +++ b/src/libmorton/morton_BMI.h @@ -0,0 +1,60 @@ +#pragma once +#if defined(__BMI2__) || defined(__AVX2__) +#include +#include + +namespace libmorton { + + namespace bmi2_detail { + inline uint32_t pdep(uint32_t source, uint32_t mask) noexcept { + return _pdep_u32(source, mask); + } + inline uint64_t pdep(uint64_t source, uint64_t mask) noexcept { + return _pdep_u64(source, mask); + } + inline uint32_t pext(uint32_t source, uint32_t mask) noexcept { + return _pext_u32(source, mask); + } + inline uint64_t pext(uint64_t source, uint64_t mask) noexcept { + return _pext_u64(source, mask); + } + } // namespace bmi2_detail + +#define BMI_2D_X_MASK 0x5555555555555555 +#define BMI_2D_Y_MASK 0xAAAAAAAAAAAAAAAA + + template + inline morton m2D_e_BMI(const coord x, const coord y) { + morton m = 0; + m |= bmi2_detail::pdep(static_cast(x), static_cast(BMI_2D_X_MASK)) + | bmi2_detail::pdep(static_cast(y), static_cast(BMI_2D_Y_MASK)); + return m; + } + + template + inline void m2D_d_BMI(const morton m, coord& x, coord& y) { + x = static_cast(bmi2_detail::pext(m, static_cast(BMI_2D_X_MASK))); + y = static_cast(bmi2_detail::pext(m, static_cast(BMI_2D_Y_MASK))); + } + +#define BMI_3D_X_MASK 0x9249249249249249 +#define BMI_3D_Y_MASK 0x2492492492492492 +#define BMI_3D_Z_MASK 0x4924924924924924 + + template + inline morton m3D_e_BMI(const coord x, const coord y, const coord z) { + morton m = 0; + m |= bmi2_detail::pdep(static_cast(x), static_cast(BMI_3D_X_MASK)) + | bmi2_detail::pdep(static_cast(y), static_cast(BMI_3D_Y_MASK)) + | bmi2_detail::pdep(static_cast(z), static_cast(BMI_3D_Z_MASK)); + return m; + } + + template + inline void m3D_d_BMI(const morton m, coord& x, coord& y, coord& z) { + x = static_cast(bmi2_detail::pext(m, static_cast(BMI_3D_X_MASK))); + y = static_cast(bmi2_detail::pext(m, static_cast(BMI_3D_Y_MASK))); + z = static_cast(bmi2_detail::pext(m, static_cast(BMI_3D_Z_MASK))); + } +} +#endif diff --git a/src/libmorton/morton_common.h b/src/libmorton/morton_common.h new file mode 100644 index 0000000..41078d5 --- /dev/null +++ b/src/libmorton/morton_common.h @@ -0,0 +1,49 @@ +#pragma once + +// Libmorton - Common helper methods needed in Morton encoding/decoding + +#include +#if defined(_MSC_VER) +#include +#endif + +namespace libmorton { + template + inline bool findFirstSetBitZeroIdx(const morton x, unsigned long* firstbit_location) { +#if defined(_MSC_VER) && !defined(_WIN64) + // 32 BIT on 32 BIT + if (sizeof(morton) <= 4) { + return _BitScanReverse(firstbit_location, x) != 0; + } + // 64 BIT on 32 BIT + else { + *firstbit_location = 0; + if (_BitScanReverse(firstbit_location, (x >> 32))) { // check first part + *firstbit_location += 32; + return true; + } + return _BitScanReverse(firstbit_location, (x & 0xFFFFFFFF)) != 0; + } +#elif defined(_MSC_VER) && defined(_WIN64) + // 32 or 64 BIT on 64 BIT + return _BitScanReverse64(firstbit_location, x) != 0; +#elif defined(__GNUC__) + if (x == 0) { + return false; + } + else { + *firstbit_location = static_cast((sizeof(morton) * 8) - __builtin_clzll(x) - 1); + return true; + } +#endif + } + + template + inline bool findFirstSetBit(const morton x, unsigned long* firstbit_location) { + if (findFirstSetBitZeroIdx(x, firstbit_location)) { + *firstbit_location += 1; + return true; + } + return false; + } +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 96127b6..4627f57 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -44,6 +44,16 @@ int main(int argc, char** argv) { // Load scene file scene = new Scene(sceneFile); + // Generate BVH + if (scene->triangles.size() > 0) + { +#if USE_LBVH + generateLBVH(scene); +#elif USE_BVH + generateBVH(scene); +#endif + } + //Create Instance for ImGUIData guiData = new GuiDataContainer(); @@ -196,7 +206,7 @@ void mousePositionCallback(GLFWwindow* window, double xpos, double ypos) { } else if (rightMousePressed) { zoom += (ypos - lastY) / height; - zoom = std::fmax(0.1f, zoom); + zoom = std::fmax(0.8f, zoom); camchanged = true; } else if (middleMousePressed) { diff --git a/src/main.h b/src/main.h index fdb7d5d..3e13623 100644 --- a/src/main.h +++ b/src/main.h @@ -16,6 +16,7 @@ #include "sceneStructs.h" #include "image.h" +#include "lbvh.h" #include "pathtrace.h" #include "utilities.h" #include "scene.h" diff --git a/src/pathtrace.cu b/src/pathtrace.cu index fd2a464..8eca4dc 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -1,7 +1,9 @@ #include #include #include +#include #include +#include #include #include @@ -14,6 +16,20 @@ #include "intersections.h" #include "interactions.h" +// Turn on anti-aliasing to removed jagged edges on shapes +#define ANTIALIASING + +// Turn on to sort by material (keeps same materials contiguous in memory) +//#define MATERIAL_SORT + +// Turn on to stream compact +#define STREAM_COMPACTION + +// Turn off cache first bouncing when anti-aliasing is enabled +#ifndef ANTIALIASING + #define CACHE_FIRST_BOUNCE +#endif + #define ERRORCHECK 1 #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) @@ -38,12 +54,53 @@ void checkCUDAErrorFn(const char* msg, const char* file, int line) { #endif } +// Returns true if a path still has bounces left +struct not_zero +{ + __host__ __device__ + bool operator()(const PathSegment &path) + { + return path.remainingBounces != 0; + } +}; + +// Compares the material ids of two materials to sort them in ascending order +struct mat_id +{ + __host__ __device__ + bool operator()(const ShadeableIntersection &i1, ShadeableIntersection & i2) + { + return i1.materialId < i2.materialId; + } +}; + __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); return thrust::default_random_engine(h); } +// Color correction helper functions (to convert to sRGB) +__host__ __device__ glm::vec3 reinhardOp(glm::vec3 c) { + return c / (glm::vec3(1.f, 1.f, 1.f) + c); +} + +__host__ __device__ glm::vec3 gammaCorrect(glm::vec3 c) { + glm::vec3 gamma = glm::vec3(1.0 / 2.2, 1.0 / 2.2, 1.0 / 2.2); + return pow(c, gamma); +} + +// Use a cosine-based color palette to map intersection count to color - from "Color Palettes" - Inigo Quilez +__host__ __device__ glm::vec3 palette(glm::vec3 a, glm::vec3 b, glm::vec3 c, glm::vec3 d, float t) { + return a + b * cos(6.28318f * (c * t + d)); +} + +__host__ __device__ glm::vec3 intToColor(float count) { + // Map value to [0, 1] range + float val = count * (1.f / 250.f); + return palette(glm::vec3(0.5f, 0.5f, 0.5f), glm::vec3(0.5f, 0.5f, 0.5f), glm::vec3(1.f, 0.7f, 0.4f), glm::vec3(0.f, 0.15f, 0.2f), val); +} + //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, int iter, glm::vec3* image) { @@ -54,10 +111,22 @@ __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, int index = x + (y * resolution.x); glm::vec3 pix = image[index]; + // Average samples + glm::vec3 mod_color = pix / glm::vec3(iter, iter, iter); + +#if CONVERT_TO_SRGB + // Apply Reinhard operator + mod_color = reinhardOp(mod_color); + + // Apply gamma correction + mod_color = gammaCorrect(mod_color); +#endif + + // Convert to 0-255 scale glm::ivec3 color; - color.x = glm::clamp((int)(pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int)(pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int)(pix.z / iter * 255.0), 0, 255); + color.x = glm::clamp((int)(mod_color.x * 255.0), 0, 255); + color.y = glm::clamp((int)(mod_color.y * 255.0), 0, 255); + color.z = glm::clamp((int)(mod_color.z * 255.0), 0, 255); // Each thread writes one pixel location in the texture (textel) pbo[index].w = 0; @@ -71,11 +140,15 @@ static Scene* hst_scene = NULL; static GuiDataContainer* guiData = NULL; static glm::vec3* dev_image = NULL; static Geom* dev_geoms = NULL; +static LBVHNode* dev_lbvh = NULL; +static BVHNode* dev_bvh = NULL; +static Triangle* dev_tris = NULL; static Material* dev_materials = NULL; static PathSegment* dev_paths = NULL; static ShadeableIntersection* dev_intersections = NULL; -// TODO: static variables for device memory, any extra info you need, etc -// ... + +// For saving first-bounce intersections +static ShadeableIntersection* dev_first_bounce_intersections = NULL; void InitDataContainer(GuiDataContainer* imGuiData) { @@ -96,13 +169,23 @@ void pathtraceInit(Scene* scene) { cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_lbvh, scene->lbvh.size() * sizeof(LBVHNode)); + cudaMemcpy(dev_lbvh, scene->lbvh.data(), scene->lbvh.size() * sizeof(LBVHNode), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_bvh, scene->bvh.size() * sizeof(BVHNode)); + cudaMemcpy(dev_bvh, scene->bvh.data(), scene->bvh.size() * sizeof(BVHNode), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_tris, scene->triangles.size() * sizeof(Triangle)); + cudaMemcpy(dev_tris, scene->triangles.data(), scene->triangles.size() * sizeof(Triangle), cudaMemcpyHostToDevice); + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - // TODO: initialize any extra device memeory you need + cudaMalloc(&dev_first_bounce_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_first_bounce_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); checkCUDAError("pathtraceInit"); } @@ -111,13 +194,45 @@ void pathtraceFree() { cudaFree(dev_image); // no-op if dev_image is null cudaFree(dev_paths); cudaFree(dev_geoms); + cudaFree(dev_lbvh); + cudaFree(dev_bvh); + cudaFree(dev_tris); cudaFree(dev_materials); cudaFree(dev_intersections); - // TODO: clean up any extra device memory you created + cudaFree(dev_first_bounce_intersections); checkCUDAError("pathtraceFree"); } +/** +* Concentric Disk Sampling from PBRT Chapter 13.6.2 +*/ +__host__ __device__ glm::vec3 concentricSampleDisk(glm::vec2 &sample) +{ + // Map sample point (uniform random numbers) to range [-1, 1] + glm::vec2 mappedSample = 2.f * sample - glm::vec2(1.f, 1.f); + + // Handle origin to avoid divide by zero + if (mappedSample.x == 0.f && mappedSample.y == 0.f) { + return glm::vec3(0.f); + } + + // Apply concentric mapping to the adjusted sample point + float r = 0.f; + float theta = 0.f; + // Find r and theta depending on x and y coords of mapped point + if (std::abs(mappedSample.x) > std::abs(mappedSample.y)) { + r = mappedSample.x; + theta = PI_OVER_FOUR * (mappedSample.y / mappedSample.x); + } + else { + r = mappedSample.y; + theta = PI_OVER_TWO - PI_OVER_FOUR * (mappedSample.x / mappedSample.y); + } + + return glm::vec3(r * cos(theta), r * sin(theta), 0); +} + /** * Generate PathSegments with rays from the camera through the screen into the * scene, which is the first bounce of rays. @@ -131,19 +246,45 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; + // Add jitter to x and y + thrust::default_random_engine rng = makeSeededRandomEngine(iter, x + y * cam.resolution.x, 0); + thrust::uniform_real_distribution u01(0, 1); + float jitterX = 0.0; + float jitterY = 0.0; +#ifdef ANTIALIASING + jitterX = u01(rng); + jitterY = u01(rng); +#endif + if (x < cam.resolution.x && y < cam.resolution.y) { int index = x + (y * cam.resolution.x); PathSegment& segment = pathSegments[index]; segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.color = glm::vec3(0.0f, 0.0f, 0.0f); + segment.throughput = glm::vec3(1.0f, 1.0f, 1.0f); - // TODO: implement antialiasing by jittering the ray + // Jitter the ray for anti-aliasing segment.ray.direction = glm::normalize(cam.view - - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) + - cam.right * cam.pixelLength.x * ((float)(x + jitterX) - (float)cam.resolution.x * 0.5f) + - cam.up * cam.pixelLength.y * ((float)(y + jitterY) - (float)cam.resolution.y * 0.5f) ); + // Depth-of-field (if specified in scene file) + if (cam.lens_radius > 0.0f) { + // Get sample on lens + glm::vec3 samplePoint = cam.lens_radius * concentricSampleDisk(glm::vec2(u01(rng), u01(rng))); + + // Focal point + float ft = glm::length(cam.lookAt - cam.position); + glm::vec3 focalPoint = getPointOnRay(segment.ray, ft); + + // Update ray + segment.ray.origin += samplePoint; + segment.ray.direction = glm::normalize(focalPoint - segment.ray.origin); + } + segment.ray.invDirection = glm::vec3(1.0, 1.0, 1.0) / segment.ray.direction; + segment.ray.intersectionCount = 0.f; segment.pixelIndex = index; segment.remainingBounces = traceDepth; } @@ -158,6 +299,9 @@ __global__ void computeIntersections( , int num_paths , PathSegment* pathSegments , Geom* geoms + , LBVHNode* dev_lbvh + , BVHNode* dev_bvh + , Triangle* dev_tris , int geoms_size , ShadeableIntersection* intersections ) @@ -166,7 +310,7 @@ __global__ void computeIntersections( if (path_index < num_paths) { - PathSegment pathSegment = pathSegments[path_index]; + PathSegment &pathSegment = pathSegments[path_index]; float t; glm::vec3 intersect_point; @@ -179,7 +323,6 @@ __global__ void computeIntersections( glm::vec3 tmp_normal; // naive parse through global geoms - for (int i = 0; i < geoms_size; i++) { Geom& geom = geoms[i]; @@ -193,7 +336,16 @@ __global__ void computeIntersections( t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); } // TODO: add more intersection tests here... triangle? metaball? CSG? - + else if (geom.type == MESH) + { +#if USE_LBVH + t = lbvhIntersectionTest(dev_lbvh, dev_tris, pathSegment.ray, geom.triangleCount, tmp_intersect, tmp_normal, outside); +#elif USE_BVH + t = bvhIntersectionTest(dev_bvh, dev_tris, pathSegment.ray, geom.triangleCount, tmp_intersect, tmp_normal, outside); +#else + t = meshIntersectionTest(geom, pathSegment.ray, dev_tris, tmp_intersect, tmp_normal, outside); +#endif + } // Compute the minimum t from the intersection tests to determine what // scene geometry object was hit first. if (t > 0.0f && t_min > t) @@ -273,6 +425,68 @@ __global__ void shadeFakeMaterial( } } +__global__ void shadeAllMaterials( + int iter + , int num_paths + , ShadeableIntersection* shadeableIntersections + , PathSegment* pathSegments + , Material* materials +) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + //if (pathSegments[idx].remainingBounces <= 0) + //{ + // return; + //} + ShadeableIntersection intersection = shadeableIntersections[idx]; + if (intersection.t > 0.0f) { // if the intersection exists... + // Set up the RNG + // LOOK: this is how you use thrust's RNG! Please look at + // makeSeededRandomEngine as well. + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, pathSegments[idx].remainingBounces); + thrust::uniform_real_distribution u01(0, 1); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + pathSegments[idx].color += (materialColor * material.emittance) * pathSegments[idx].throughput; + pathSegments[idx].remainingBounces = 0; + } + // Otherwise, do some pseudo-lighting computation. This is actually more + // like what you would expect from shading in a rasterizer like OpenGL. + // TODO: replace this! you should be able to start with basically a one-liner + else { + scatterRay(pathSegments[idx], getPointOnRay(pathSegments[idx].ray, intersection.t), + intersection.surfaceNormal, material, rng); + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + +#if RUSSIAN_ROULETTE + if (iter > 3) { + float maxColorChannel = glm::max(pathSegments[idx].throughput.r, glm::max(pathSegments[idx].throughput.g, pathSegments[idx].throughput.b)); + float xi = u01(rng); + if (xi < (1.f - maxColorChannel)) { + pathSegments[idx].remainingBounces = 0; + } + else { + pathSegments[idx].throughput /= maxColorChannel; + } + } +#endif + } + else { + pathSegments[idx].remainingBounces = 0; + } + } +} + // Add the current iteration's output to the overall image __global__ void finalGather(int nPaths, glm::vec3* image, PathSegment* iterationPaths) { @@ -281,7 +495,11 @@ __global__ void finalGather(int nPaths, glm::vec3* image, PathSegment* iteration if (index < nPaths) { PathSegment iterationPath = iterationPaths[index]; +#if DISPLAY_HEATMAP + image[iterationPath.pixelIndex] += intToColor(iterationPath.ray.intersectionCount); +#else image[iterationPath.pixelIndex] += iterationPath.color; +#endif } } @@ -340,6 +558,8 @@ void pathtrace(uchar4* pbo, int frame, int iter) { int depth = 0; PathSegment* dev_path_end = dev_paths + pixelcount; int num_paths = dev_path_end - dev_paths; + int compact_num_paths = num_paths; + thrust::device_ptr dev_thrust_paths = thrust::device_pointer_cast(dev_paths); // --- PathSegment Tracing Stage --- // Shoot ray into scene, bounce between objects, push shading chunks @@ -348,39 +568,101 @@ void pathtrace(uchar4* pbo, int frame, int iter) { while (!iterationComplete) { // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, compact_num_paths * sizeof(ShadeableIntersection)); // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + dim3 numblocksPathSegmentTracing = (compact_num_paths + blockSize1d - 1) / blockSize1d; + +#ifdef CACHE_FIRST_BOUNCE + // If first iteration, compute first bounce intersections + if (iter == 1) { + computeIntersections << > > ( + depth + , compact_num_paths + , dev_paths + , dev_geoms + , dev_lbvh + , dev_bvh + , dev_tris + , hst_scene->geoms.size() + , dev_intersections + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + if (depth == 0) { + cudaMemcpy(dev_first_bounce_intersections, dev_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + } + } + // For all subsequent iterations, read from cached first bounce intersections + else { + if (depth == 0) { + cudaMemcpy(dev_intersections, dev_first_bounce_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + } + else { + computeIntersections << > > ( + depth + , compact_num_paths + , dev_paths + , dev_geoms + , dev_lbvh + , dev_bvh + , dev_tris + , hst_scene->geoms.size() + , dev_intersections + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + } + } +#else computeIntersections << > > ( depth - , num_paths + , compact_num_paths , dev_paths , dev_geoms + , dev_lbvh + , dev_bvh + , dev_tris , hst_scene->geoms.size() , dev_intersections ); checkCUDAError("trace one bounce"); cudaDeviceSynchronize(); +#endif depth++; // TODO: // --- Shading Stage --- // Shade path segments based on intersections and generate new rays by - // evaluating the BSDF. - // Start off with just a big kernel that handles all the different - // materials you have in the scenefile. - // TODO: compare between directly shading the path segments and shading - // path segments that have been reshuffled to be contiguous in memory. + // evaluating the BSDF. + // Start off with just a big kernel that handles all the different + // materials you have in the scenefile. + // TODO: compare between directly shading the path segments and shading + // path segments that have been reshuffled to be contiguous in memory. + +#ifdef MATERIAL_SORT + // Shuffle paths to be contiguous in memory + thrust::device_ptr dev_thrust_intersections = thrust::device_pointer_cast(dev_intersections); + thrust::sort_by_key(dev_thrust_intersections, dev_thrust_intersections + compact_num_paths, dev_thrust_paths, mat_id()); +#endif - shadeFakeMaterial << > > ( + shadeAllMaterials << > > ( iter, - num_paths, + compact_num_paths, dev_intersections, dev_paths, dev_materials ); - iterationComplete = true; // TODO: should be based off stream compaction results. + + // Stream compact +#ifdef STREAM_COMPACTION + thrust::device_ptr dev_thrust_path_end = thrust::stable_partition(dev_thrust_paths, dev_thrust_paths + compact_num_paths, not_zero()); + dev_path_end = dev_thrust_path_end.get(); + compact_num_paths = dev_path_end - dev_paths; +#endif + + // TODO: should be based off stream compaction results + if (depth == traceDepth || dev_paths == dev_path_end) { iterationComplete = true; } if (guiData != NULL) { diff --git a/src/scene.cpp b/src/scene.cpp index 3fb6239..6d7b036 100644 --- a/src/scene.cpp +++ b/src/scene.cpp @@ -4,6 +4,9 @@ #include #include +#define TINYOBJLOADER_IMPLEMENTATION +#include "tiny_obj_loader.h" + Scene::Scene(string filename) { cout << "Reading scene from " << filename << " ..." << endl; cout << " " << endl; @@ -45,12 +48,20 @@ int Scene::loadGeom(string objectid) { //load object type utilityCore::safeGetline(fp_in, line); if (!line.empty() && fp_in.good()) { - if (strcmp(line.c_str(), "sphere") == 0) { + vector tokens = utilityCore::tokenizeString(line); + if (strcmp(tokens[0].c_str(), "sphere") == 0) { cout << "Creating new sphere..." << endl; newGeom.type = SPHERE; - } else if (strcmp(line.c_str(), "cube") == 0) { + //newGeom.tri = glm::vec3(-1.0, -1.0, -1.0); + } else if (strcmp(tokens[0].c_str(), "cube") == 0) { cout << "Creating new cube..." << endl; newGeom.type = CUBE; + //newGeom.tri = glm::vec3(-1.0, -1.0, -1.0); + } + else if (strcmp(tokens[0].c_str(), "mesh") == 0) { + cout << "Creating new mesh..." << endl; + loadOBJ(tokens[1], id); + return 1; } } @@ -96,7 +107,7 @@ int Scene::loadCamera() { float fovy; //load static properties - for (int i = 0; i < 5; i++) { + for (int i = 0; i < 7; i++) { string line; utilityCore::safeGetline(fp_in, line); vector tokens = utilityCore::tokenizeString(line); @@ -111,6 +122,10 @@ int Scene::loadCamera() { state.traceDepth = atoi(tokens[1].c_str()); } else if (strcmp(tokens[0].c_str(), "FILE") == 0) { state.imageName = tokens[1]; + } else if (strcmp(tokens[0].c_str(), "LENS_RADIUS") == 0) { + camera.lens_radius = atof(tokens[1].c_str()); + } else if (strcmp(tokens[0].c_str(), "FOCAL_DIST") == 0) { + camera.focal_dist = atof(tokens[1].c_str()); } } @@ -186,3 +201,118 @@ int Scene::loadMaterial(string materialid) { return 1; } } + +// Load obj using tinyobjloader (based off of example give by tinyobj library) +int Scene::loadOBJ(string filename, int objectid) +{ + int materialid = -1; + glm::vec3 translation = glm::vec3(0.0, 0.0, 0.0); + glm::vec3 rotation = glm::vec3(0.0, 0.0, 0.0); + glm::vec3 scale = glm::vec3(1.0, 1.0, 1.0); + string line; + + // Get material id (same for entire mesh) + utilityCore::safeGetline(fp_in, line); + if (!line.empty() && fp_in.good()) { + vector tokens = utilityCore::tokenizeString(line); + materialid = atoi(tokens[1].c_str()); + cout << "Connecting Geom " << objectid << " to Material " << materialid << "..." << endl; + } + + // Get transformations (default for all triangles) + glm::mat4 transform = utilityCore::buildTransformationMatrix(translation, rotation, scale); + glm::mat4 inverseTransform = glm::inverse(transform); + glm::mat4 invTranspose = glm::inverseTranspose(transform); + + // Load obj using tinyobjloader + std::string inputfile = "../obj/" + filename; + tinyobj::ObjReaderConfig reader_config; + tinyobj::ObjReader reader; + + if (!reader.ParseFromFile(inputfile, reader_config)) { + if (!reader.Error().empty()) { + std::cerr << "TinyObjReader: " << reader.Error(); + } + exit(1); + } + + if (!reader.Warning().empty()) { + std::cout << "TinyObjReader: " << reader.Warning(); + } + + auto& attrib = reader.GetAttrib(); + auto& shapes = reader.GetShapes(); + auto& materials = reader.GetMaterials(); + + meshCount = 0; + for (size_t s = 0; s < shapes.size(); s++) { + std::vector mesh_triangles; + + // Track aabb + mesh_aabbs.resize(shapes.size()); + glm::vec3 min = glm::vec3(INFINITY, INFINITY, INFINITY); + glm::vec3 max = glm::vec3(-INFINITY, -INFINITY, -INFINITY); + + // Loop over faces(polygon) + size_t index_offset = 0; + for (size_t f = 0; f < shapes[s].mesh.num_face_vertices.size(); f++) { + size_t fv = size_t(shapes[s].mesh.num_face_vertices[f]); + + // Loop over vertices in the face. + Triangle tri; + + int i = 0; + for (size_t v = 0; v < fv; v++) { + // access to vertex + tinyobj::index_t idx = shapes[s].mesh.indices[index_offset + v]; + + tinyobj::real_t vx = attrib.vertices[3 * size_t(idx.vertex_index) + 0]; + tinyobj::real_t vy = attrib.vertices[3 * size_t(idx.vertex_index) + 1]; + tinyobj::real_t vz = attrib.vertices[3 * size_t(idx.vertex_index) + 2]; + tri.verts[i] = glm::vec3((float)vx, (float)vy, (float)vz); + + if (idx.normal_index >= 0) { + tinyobj::real_t nx = attrib.normals[3 * size_t(idx.normal_index) + 0]; + tinyobj::real_t ny = attrib.normals[3 * size_t(idx.normal_index) + 1]; + tinyobj::real_t nz = attrib.normals[3 * size_t(idx.normal_index) + 2]; + tri.norms[i] = glm::vec3((float)nx, (float)ny, (float)nz); + } + + // Determine AABB min and max + min = glm::min(min, tri.verts[i]); + max = glm::max(max, tri.verts[i]); + + i++; + } + tri.computeAABB(); + tri.computeCentroid(); + tri.objectId = f; + mesh_triangles.push_back(tri); + + index_offset += fv; + } + + // Set AABB + mesh_aabbs[s].min = min; + mesh_aabbs[s].max = max; + + // Initialize new mesh + Geom newGeom; + newGeom.type = MESH; + newGeom.aabb = mesh_aabbs[s]; + newGeom.startIdx = triangles.size(); + newGeom.triangleCount = mesh_triangles.size(); + newGeom.materialid = materialid; + newGeom.translation = translation; + newGeom.rotation = rotation; + newGeom.scale = scale; + newGeom.transform = transform; + newGeom.inverseTransform = inverseTransform; + newGeom.invTranspose = invTranspose; + triangles.insert(triangles.end(), mesh_triangles.begin(), mesh_triangles.end()); + geoms.push_back(newGeom); + meshCount++; + } + + return 1; +} diff --git a/src/scene.h b/src/scene.h index f29a917..7034431 100644 --- a/src/scene.h +++ b/src/scene.h @@ -5,6 +5,7 @@ #include #include #include "glm/glm.hpp" +#include "lbvh.h" #include "utilities.h" #include "sceneStructs.h" @@ -16,11 +17,19 @@ class Scene { int loadMaterial(string materialid); int loadGeom(string objectid); int loadCamera(); + int loadOBJ(string filename, int objectid); public: Scene(string filename); ~Scene(); std::vector geoms; std::vector materials; + std::vector triangles; + std::vector sorted_triangles; + std::vector mcodes; + std::vector bvh; + std::vector lbvh; + std::vector mesh_aabbs; + int meshCount; RenderState state; }; diff --git a/src/sceneStructs.h b/src/sceneStructs.h index da4dbf3..e4132fd 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -10,15 +10,73 @@ enum GeomType { SPHERE, CUBE, + MESH }; struct Ray { glm::vec3 origin; glm::vec3 direction; + glm::vec3 invDirection; + float intersectionCount; +}; + +struct AABB { + glm::vec3 min; + glm::vec3 max; + + float surfaceArea() { + glm::vec3 e = min - max; + return 2.f * (e.x * e.y + e.x * e.z + e.y * e.z); + }; +}; + +struct MortonCode { + int objectId; + unsigned int code; +}; + +struct Triangle { + AABB aabb; + glm::vec3 centroid; + glm::vec3 verts[3]; + glm::vec3 norms[3]; + int objectId; + + void computeAABB() { + aabb.min = glm::min(verts[0], glm::min(verts[1], verts[2])); + aabb.max = glm::max(verts[0], glm::max(verts[1], verts[2])); + } + + void computeCentroid() { + centroid = (verts[0] + verts[1] + verts[2]) / glm::vec3(3.f, 3.f, 3.f); + } +}; + +struct NodeRange { + int i; + int j; + int l; + int d; +}; + +struct BVHNode { + AABB aabb; + unsigned int left, right; + int firstTri, numTris; +}; + +struct LBVHNode { + AABB aabb; + int objectId; + unsigned int left; + unsigned int right; }; struct Geom { enum GeomType type; + AABB aabb; + int startIdx; + int triangleCount; int materialid; glm::vec3 translation; glm::vec3 rotation; @@ -49,6 +107,8 @@ struct Camera { glm::vec3 right; glm::vec2 fov; glm::vec2 pixelLength; + float lens_radius; + float focal_dist; }; struct RenderState { @@ -61,7 +121,8 @@ struct RenderState { struct PathSegment { Ray ray; - glm::vec3 color; + glm::vec3 color; // accumulated light + glm::vec3 throughput; int pixelIndex; int remainingBounces; }; diff --git a/src/tiny_obj_loader.h b/src/tiny_obj_loader.h new file mode 100644 index 0000000..7d0c384 --- /dev/null +++ b/src/tiny_obj_loader.h @@ -0,0 +1,3333 @@ +/* +The MIT License (MIT) + +Copyright (c) 2012-Present, Syoyo Fujita and many contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// +// version 2.0.0 : Add new object oriented API. 1.x API is still provided. +// * Support line primitive. +// * Support points primitive. +// * Support multiple search path for .mtl(v1 API). +// * Support vertex weight `vw`(as an tinyobj extension) +// * Support escaped whitespece in mtllib +// * Add robust triangulation using Mapbox earcut(TINYOBJLOADER_USE_MAPBOX_EARCUT). +// version 1.4.0 : Modifed ParseTextureNameAndOption API +// version 1.3.1 : Make ParseTextureNameAndOption API public +// version 1.3.0 : Separate warning and error message(breaking API of LoadObj) +// version 1.2.3 : Added color space extension('-colorspace') to tex opts. +// version 1.2.2 : Parse multiple group names. +// version 1.2.1 : Added initial support for line('l') primitive(PR #178) +// version 1.2.0 : Hardened implementation(#175) +// version 1.1.1 : Support smoothing groups(#162) +// version 1.1.0 : Support parsing vertex color(#144) +// version 1.0.8 : Fix parsing `g` tag just after `usemtl`(#138) +// version 1.0.7 : Support multiple tex options(#126) +// version 1.0.6 : Add TINYOBJLOADER_USE_DOUBLE option(#124) +// version 1.0.5 : Ignore `Tr` when `d` exists in MTL(#43) +// version 1.0.4 : Support multiple filenames for 'mtllib'(#112) +// version 1.0.3 : Support parsing texture options(#85) +// version 1.0.2 : Improve parsing speed by about a factor of 2 for large +// files(#105) +// version 1.0.1 : Fixes a shape is lost if obj ends with a 'usemtl'(#104) +// version 1.0.0 : Change data structure. Change license from BSD to MIT. +// + +// +// Use this in *one* .cc +// #define TINYOBJLOADER_IMPLEMENTATION +// #include "tiny_obj_loader.h" +// + +#ifndef TINY_OBJ_LOADER_H_ +#define TINY_OBJ_LOADER_H_ + +#include +#include +#include + +namespace tinyobj { + +// TODO(syoyo): Better C++11 detection for older compiler +#if __cplusplus > 199711L +#define TINYOBJ_OVERRIDE override +#else +#define TINYOBJ_OVERRIDE +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + +#pragma clang diagnostic ignored "-Wpadded" + +#endif + +// https://en.wikipedia.org/wiki/Wavefront_.obj_file says ... +// +// -blendu on | off # set horizontal texture blending +// (default on) +// -blendv on | off # set vertical texture blending +// (default on) +// -boost real_value # boost mip-map sharpness +// -mm base_value gain_value # modify texture map values (default +// 0 1) +// # base_value = brightness, +// gain_value = contrast +// -o u [v [w]] # Origin offset (default +// 0 0 0) +// -s u [v [w]] # Scale (default +// 1 1 1) +// -t u [v [w]] # Turbulence (default +// 0 0 0) +// -texres resolution # texture resolution to create +// -clamp on | off # only render texels in the clamped +// 0-1 range (default off) +// # When unclamped, textures are +// repeated across a surface, +// # when clamped, only texels which +// fall within the 0-1 +// # range are rendered. +// -bm mult_value # bump multiplier (for bump maps +// only) +// +// -imfchan r | g | b | m | l | z # specifies which channel of the file +// is used to +// # create a scalar or bump texture. +// r:red, g:green, +// # b:blue, m:matte, l:luminance, +// z:z-depth.. +// # (the default for bump is 'l' and +// for decal is 'm') +// bump -imfchan r bumpmap.tga # says to use the red channel of +// bumpmap.tga as the bumpmap +// +// For reflection maps... +// +// -type sphere # specifies a sphere for a "refl" +// reflection map +// -type cube_top | cube_bottom | # when using a cube map, the texture +// file for each +// cube_front | cube_back | # side of the cube is specified +// separately +// cube_left | cube_right +// +// TinyObjLoader extension. +// +// -colorspace SPACE # Color space of the texture. e.g. +// 'sRGB` or 'linear' +// + +#ifdef TINYOBJLOADER_USE_DOUBLE +//#pragma message "using double" +typedef double real_t; +#else +//#pragma message "using float" +typedef float real_t; +#endif + +typedef enum { + TEXTURE_TYPE_NONE, // default + TEXTURE_TYPE_SPHERE, + TEXTURE_TYPE_CUBE_TOP, + TEXTURE_TYPE_CUBE_BOTTOM, + TEXTURE_TYPE_CUBE_FRONT, + TEXTURE_TYPE_CUBE_BACK, + TEXTURE_TYPE_CUBE_LEFT, + TEXTURE_TYPE_CUBE_RIGHT +} texture_type_t; + +struct texture_option_t { + texture_type_t type; // -type (default TEXTURE_TYPE_NONE) + real_t sharpness; // -boost (default 1.0?) + real_t brightness; // base_value in -mm option (default 0) + real_t contrast; // gain_value in -mm option (default 1) + real_t origin_offset[3]; // -o u [v [w]] (default 0 0 0) + real_t scale[3]; // -s u [v [w]] (default 1 1 1) + real_t turbulence[3]; // -t u [v [w]] (default 0 0 0) + int texture_resolution; // -texres resolution (No default value in the spec. + // We'll use -1) + bool clamp; // -clamp (default false) + char imfchan; // -imfchan (the default for bump is 'l' and for decal is 'm') + bool blendu; // -blendu (default on) + bool blendv; // -blendv (default on) + real_t bump_multiplier; // -bm (for bump maps only, default 1.0) + + // extension + std::string colorspace; // Explicitly specify color space of stored texel + // value. Usually `sRGB` or `linear` (default empty). +}; + +struct material_t { + std::string name; + + real_t ambient[3]; + real_t diffuse[3]; + real_t specular[3]; + real_t transmittance[3]; + real_t emission[3]; + real_t shininess; + real_t ior; // index of refraction + real_t dissolve; // 1 == opaque; 0 == fully transparent + // illumination model (see http://www.fileformat.info/format/material/) + int illum; + + int dummy; // Suppress padding warning. + + std::string ambient_texname; // map_Ka + std::string diffuse_texname; // map_Kd + std::string specular_texname; // map_Ks + std::string specular_highlight_texname; // map_Ns + std::string bump_texname; // map_bump, map_Bump, bump + std::string displacement_texname; // disp + std::string alpha_texname; // map_d + std::string reflection_texname; // refl + + texture_option_t ambient_texopt; + texture_option_t diffuse_texopt; + texture_option_t specular_texopt; + texture_option_t specular_highlight_texopt; + texture_option_t bump_texopt; + texture_option_t displacement_texopt; + texture_option_t alpha_texopt; + texture_option_t reflection_texopt; + + // PBR extension + // http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr + real_t roughness; // [0, 1] default 0 + real_t metallic; // [0, 1] default 0 + real_t sheen; // [0, 1] default 0 + real_t clearcoat_thickness; // [0, 1] default 0 + real_t clearcoat_roughness; // [0, 1] default 0 + real_t anisotropy; // aniso. [0, 1] default 0 + real_t anisotropy_rotation; // anisor. [0, 1] default 0 + real_t pad0; + std::string roughness_texname; // map_Pr + std::string metallic_texname; // map_Pm + std::string sheen_texname; // map_Ps + std::string emissive_texname; // map_Ke + std::string normal_texname; // norm. For normal mapping. + + texture_option_t roughness_texopt; + texture_option_t metallic_texopt; + texture_option_t sheen_texopt; + texture_option_t emissive_texopt; + texture_option_t normal_texopt; + + int pad2; + + std::map unknown_parameter; + +#ifdef TINY_OBJ_LOADER_PYTHON_BINDING + // For pybind11 + std::array GetDiffuse() { + std::array values; + values[0] = double(diffuse[0]); + values[1] = double(diffuse[1]); + values[2] = double(diffuse[2]); + + return values; + } + + std::array GetSpecular() { + std::array values; + values[0] = double(specular[0]); + values[1] = double(specular[1]); + values[2] = double(specular[2]); + + return values; + } + + std::array GetTransmittance() { + std::array values; + values[0] = double(transmittance[0]); + values[1] = double(transmittance[1]); + values[2] = double(transmittance[2]); + + return values; + } + + std::array GetEmission() { + std::array values; + values[0] = double(emission[0]); + values[1] = double(emission[1]); + values[2] = double(emission[2]); + + return values; + } + + std::array GetAmbient() { + std::array values; + values[0] = double(ambient[0]); + values[1] = double(ambient[1]); + values[2] = double(ambient[2]); + + return values; + } + + void SetDiffuse(std::array &a) { + diffuse[0] = real_t(a[0]); + diffuse[1] = real_t(a[1]); + diffuse[2] = real_t(a[2]); + } + + void SetAmbient(std::array &a) { + ambient[0] = real_t(a[0]); + ambient[1] = real_t(a[1]); + ambient[2] = real_t(a[2]); + } + + void SetSpecular(std::array &a) { + specular[0] = real_t(a[0]); + specular[1] = real_t(a[1]); + specular[2] = real_t(a[2]); + } + + void SetTransmittance(std::array &a) { + transmittance[0] = real_t(a[0]); + transmittance[1] = real_t(a[1]); + transmittance[2] = real_t(a[2]); + } + + std::string GetCustomParameter(const std::string &key) { + std::map::const_iterator it = + unknown_parameter.find(key); + + if (it != unknown_parameter.end()) { + return it->second; + } + return std::string(); + } + +#endif +}; + +struct tag_t { + std::string name; + + std::vector intValues; + std::vector floatValues; + std::vector stringValues; +}; + +struct joint_and_weight_t { + int joint_id; + real_t weight; +}; + +struct skin_weight_t { + int vertex_id; // Corresponding vertex index in `attrib_t::vertices`. + // Compared to `index_t`, this index must be positive and + // start with 0(does not allow relative indexing) + std::vector weightValues; +}; + +// Index struct to support different indices for vtx/normal/texcoord. +// -1 means not used. +struct index_t { + int vertex_index; + int normal_index; + int texcoord_index; +}; + +struct mesh_t { + std::vector indices; + std::vector + num_face_vertices; // The number of vertices per + // face. 3 = triangle, 4 = quad, + // ... Up to 255 vertices per face. + std::vector material_ids; // per-face material ID + std::vector smoothing_group_ids; // per-face smoothing group + // ID(0 = off. positive value + // = group id) + std::vector tags; // SubD tag +}; + +// struct path_t { +// std::vector indices; // pairs of indices for lines +//}; + +struct lines_t { + // Linear flattened indices. + std::vector indices; // indices for vertices(poly lines) + std::vector num_line_vertices; // The number of vertices per line. +}; + +struct points_t { + std::vector indices; // indices for points +}; + +struct shape_t { + std::string name; + mesh_t mesh; + lines_t lines; + points_t points; +}; + +// Vertex attributes +struct attrib_t { + std::vector vertices; // 'v'(xyz) + + // For backward compatibility, we store vertex weight in separate array. + std::vector vertex_weights; // 'v'(w) + std::vector normals; // 'vn' + std::vector texcoords; // 'vt'(uv) + + // For backward compatibility, we store texture coordinate 'w' in separate + // array. + std::vector texcoord_ws; // 'vt'(w) + std::vector colors; // extension: vertex colors + + // + // TinyObj extension. + // + + // NOTE(syoyo): array index is based on the appearance order. + // To get a corresponding skin weight for a specific vertex id `vid`, + // Need to reconstruct a look up table: `skin_weight_t::vertex_id` == `vid` + // (e.g. using std::map, std::unordered_map) + std::vector skin_weights; + + attrib_t() {} + + // + // For pybind11 + // + const std::vector &GetVertices() const { return vertices; } + + const std::vector &GetVertexWeights() const { return vertex_weights; } +}; + +struct callback_t { + // W is optional and set to 1 if there is no `w` item in `v` line + void (*vertex_cb)(void *user_data, real_t x, real_t y, real_t z, real_t w); + void (*normal_cb)(void *user_data, real_t x, real_t y, real_t z); + + // y and z are optional and set to 0 if there is no `y` and/or `z` item(s) in + // `vt` line. + void (*texcoord_cb)(void *user_data, real_t x, real_t y, real_t z); + + // called per 'f' line. num_indices is the number of face indices(e.g. 3 for + // triangle, 4 for quad) + // 0 will be passed for undefined index in index_t members. + void (*index_cb)(void *user_data, index_t *indices, int num_indices); + // `name` material name, `material_id` = the array index of material_t[]. -1 + // if + // a material not found in .mtl + void (*usemtl_cb)(void *user_data, const char *name, int material_id); + // `materials` = parsed material data. + void (*mtllib_cb)(void *user_data, const material_t *materials, + int num_materials); + // There may be multiple group names + void (*group_cb)(void *user_data, const char **names, int num_names); + void (*object_cb)(void *user_data, const char *name); + + callback_t() + : vertex_cb(NULL), + normal_cb(NULL), + texcoord_cb(NULL), + index_cb(NULL), + usemtl_cb(NULL), + mtllib_cb(NULL), + group_cb(NULL), + object_cb(NULL) {} +}; + +class MaterialReader { + public: + MaterialReader() {} + virtual ~MaterialReader(); + + virtual bool operator()(const std::string &matId, + std::vector *materials, + std::map *matMap, std::string *warn, + std::string *err) = 0; +}; + +/// +/// Read .mtl from a file. +/// +class MaterialFileReader : public MaterialReader { + public: + // Path could contain separator(';' in Windows, ':' in Posix) + explicit MaterialFileReader(const std::string &mtl_basedir) + : m_mtlBaseDir(mtl_basedir) {} + virtual ~MaterialFileReader() TINYOBJ_OVERRIDE {} + virtual bool operator()(const std::string &matId, + std::vector *materials, + std::map *matMap, std::string *warn, + std::string *err) TINYOBJ_OVERRIDE; + + private: + std::string m_mtlBaseDir; +}; + +/// +/// Read .mtl from a stream. +/// +class MaterialStreamReader : public MaterialReader { + public: + explicit MaterialStreamReader(std::istream &inStream) + : m_inStream(inStream) {} + virtual ~MaterialStreamReader() TINYOBJ_OVERRIDE {} + virtual bool operator()(const std::string &matId, + std::vector *materials, + std::map *matMap, std::string *warn, + std::string *err) TINYOBJ_OVERRIDE; + + private: + std::istream &m_inStream; +}; + +// v2 API +struct ObjReaderConfig { + bool triangulate; // triangulate polygon? + + // Currently not used. + // "simple" or empty: Create triangle fan + // "earcut": Use the algorithm based on Ear clipping + std::string triangulation_method; + + /// Parse vertex color. + /// If vertex color is not present, its filled with default value. + /// false = no vertex color + /// This will increase memory of parsed .obj + bool vertex_color; + + /// + /// Search path to .mtl file. + /// Default = "" = search from the same directory of .obj file. + /// Valid only when loading .obj from a file. + /// + std::string mtl_search_path; + + ObjReaderConfig() + : triangulate(true), triangulation_method("simple"), vertex_color(true) {} +}; + +/// +/// Wavefront .obj reader class(v2 API) +/// +class ObjReader { + public: + ObjReader() : valid_(false) {} + + /// + /// Load .obj and .mtl from a file. + /// + /// @param[in] filename wavefront .obj filename + /// @param[in] config Reader configuration + /// + bool ParseFromFile(const std::string &filename, + const ObjReaderConfig &config = ObjReaderConfig()); + + /// + /// Parse .obj from a text string. + /// Need to supply .mtl text string by `mtl_text`. + /// This function ignores `mtllib` line in .obj text. + /// + /// @param[in] obj_text wavefront .obj filename + /// @param[in] mtl_text wavefront .mtl filename + /// @param[in] config Reader configuration + /// + bool ParseFromString(const std::string &obj_text, const std::string &mtl_text, + const ObjReaderConfig &config = ObjReaderConfig()); + + /// + /// .obj was loaded or parsed correctly. + /// + bool Valid() const { return valid_; } + + const attrib_t &GetAttrib() const { return attrib_; } + + const std::vector &GetShapes() const { return shapes_; } + + const std::vector &GetMaterials() const { return materials_; } + + /// + /// Warning message(may be filled after `Load` or `Parse`) + /// + const std::string &Warning() const { return warning_; } + + /// + /// Error message(filled when `Load` or `Parse` failed) + /// + const std::string &Error() const { return error_; } + + private: + bool valid_; + + attrib_t attrib_; + std::vector shapes_; + std::vector materials_; + + std::string warning_; + std::string error_; +}; + +/// ==>>========= Legacy v1 API ============================================= + +/// Loads .obj from a file. +/// 'attrib', 'shapes' and 'materials' will be filled with parsed shape data +/// 'shapes' will be filled with parsed shape data +/// Returns true when loading .obj become success. +/// Returns warning message into `warn`, and error message into `err` +/// 'mtl_basedir' is optional, and used for base directory for .mtl file. +/// In default(`NULL'), .mtl file is searched from an application's working +/// directory. +/// 'triangulate' is optional, and used whether triangulate polygon face in .obj +/// or not. +/// Option 'default_vcols_fallback' specifies whether vertex colors should +/// always be defined, even if no colors are given (fallback to white). +bool LoadObj(attrib_t *attrib, std::vector *shapes, + std::vector *materials, std::string *warn, + std::string *err, const char *filename, + const char *mtl_basedir = NULL, bool triangulate = true, + bool default_vcols_fallback = true); + +/// Loads .obj from a file with custom user callback. +/// .mtl is loaded as usual and parsed material_t data will be passed to +/// `callback.mtllib_cb`. +/// Returns true when loading .obj/.mtl become success. +/// Returns warning message into `warn`, and error message into `err` +/// See `examples/callback_api/` for how to use this function. +bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback, + void *user_data = NULL, + MaterialReader *readMatFn = NULL, + std::string *warn = NULL, std::string *err = NULL); + +/// Loads object from a std::istream, uses `readMatFn` to retrieve +/// std::istream for materials. +/// Returns true when loading .obj become success. +/// Returns warning and error message into `err` +bool LoadObj(attrib_t *attrib, std::vector *shapes, + std::vector *materials, std::string *warn, + std::string *err, std::istream *inStream, + MaterialReader *readMatFn = NULL, bool triangulate = true, + bool default_vcols_fallback = true); + +/// Loads materials into std::map +void LoadMtl(std::map *material_map, + std::vector *materials, std::istream *inStream, + std::string *warning, std::string *err); + +/// +/// Parse texture name and texture option for custom texture parameter through +/// material::unknown_parameter +/// +/// @param[out] texname Parsed texture name +/// @param[out] texopt Parsed texopt +/// @param[in] linebuf Input string +/// +bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt, + const char *linebuf); + +/// =<<========== Legacy v1 API ============================================= + +} // namespace tinyobj + +#endif // TINY_OBJ_LOADER_H_ + +#ifdef TINYOBJLOADER_IMPLEMENTATION +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT + +#ifdef TINYOBJLOADER_DONOT_INCLUDE_MAPBOX_EARCUT +// Assume earcut.hpp is included outside of tiny_obj_loader.h +#else + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" +#endif + +#include +#include "mapbox/earcut.hpp" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif + +#endif // TINYOBJLOADER_USE_MAPBOX_EARCUT + +namespace tinyobj { + +MaterialReader::~MaterialReader() {} + +struct vertex_index_t { + int v_idx, vt_idx, vn_idx; + vertex_index_t() : v_idx(-1), vt_idx(-1), vn_idx(-1) {} + explicit vertex_index_t(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {} + vertex_index_t(int vidx, int vtidx, int vnidx) + : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {} +}; + +// Internal data structure for face representation +// index + smoothing group. +struct face_t { + unsigned int + smoothing_group_id; // smoothing group id. 0 = smoothing groupd is off. + int pad_; + std::vector vertex_indices; // face vertex indices. + + face_t() : smoothing_group_id(0), pad_(0) {} +}; + +// Internal data structure for line representation +struct __line_t { + // l v1/vt1 v2/vt2 ... + // In the specification, line primitrive does not have normal index, but + // TinyObjLoader allow it + std::vector vertex_indices; +}; + +// Internal data structure for points representation +struct __points_t { + // p v1 v2 ... + // In the specification, point primitrive does not have normal index and + // texture coord index, but TinyObjLoader allow it. + std::vector vertex_indices; +}; + +struct tag_sizes { + tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {} + int num_ints; + int num_reals; + int num_strings; +}; + +struct obj_shape { + std::vector v; + std::vector vn; + std::vector vt; +}; + +// +// Manages group of primitives(face, line, points, ...) +struct PrimGroup { + std::vector faceGroup; + std::vector<__line_t> lineGroup; + std::vector<__points_t> pointsGroup; + + void clear() { + faceGroup.clear(); + lineGroup.clear(); + pointsGroup.clear(); + } + + bool IsEmpty() const { + return faceGroup.empty() && lineGroup.empty() && pointsGroup.empty(); + } + + // TODO(syoyo): bspline, surface, ... +}; + +// See +// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf +static std::istream &safeGetline(std::istream &is, std::string &t) { + t.clear(); + + // The characters in the stream are read one-by-one using a std::streambuf. + // That is faster than reading them one-by-one using the std::istream. + // Code that uses streambuf this way must be guarded by a sentry object. + // The sentry object performs various tasks, + // such as thread synchronization and updating the stream state. + + std::istream::sentry se(is, true); + std::streambuf *sb = is.rdbuf(); + + if (se) { + for (;;) { + int c = sb->sbumpc(); + switch (c) { + case '\n': + return is; + case '\r': + if (sb->sgetc() == '\n') sb->sbumpc(); + return is; + case EOF: + // Also handle the case when the last line has no line ending + if (t.empty()) is.setstate(std::ios::eofbit); + return is; + default: + t += static_cast(c); + } + } + } + + return is; +} + +#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t')) +#define IS_DIGIT(x) \ + (static_cast((x) - '0') < static_cast(10)) +#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0')) + +// Make index zero-base, and also support relative index. +static inline bool fixIndex(int idx, int n, int *ret) { + if (!ret) { + return false; + } + + if (idx > 0) { + (*ret) = idx - 1; + return true; + } + + if (idx == 0) { + // zero is not allowed according to the spec. + return false; + } + + if (idx < 0) { + (*ret) = n + idx; // negative value = relative + return true; + } + + return false; // never reach here. +} + +static inline std::string parseString(const char **token) { + std::string s; + (*token) += strspn((*token), " \t"); + size_t e = strcspn((*token), " \t\r"); + s = std::string((*token), &(*token)[e]); + (*token) += e; + return s; +} + +static inline int parseInt(const char **token) { + (*token) += strspn((*token), " \t"); + int i = atoi((*token)); + (*token) += strcspn((*token), " \t\r"); + return i; +} + +// Tries to parse a floating point number located at s. +// +// s_end should be a location in the string where reading should absolutely +// stop. For example at the end of the string, to prevent buffer overflows. +// +// Parses the following EBNF grammar: +// sign = "+" | "-" ; +// END = ? anything not in digit ? +// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +// integer = [sign] , digit , {digit} ; +// decimal = integer , ["." , integer] ; +// float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; +// +// Valid strings are for example: +// -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2 +// +// If the parsing is a success, result is set to the parsed value and true +// is returned. +// +// The function is greedy and will parse until any of the following happens: +// - a non-conforming character is encountered. +// - s_end is reached. +// +// The following situations triggers a failure: +// - s >= s_end. +// - parse failure. +// +static bool tryParseDouble(const char *s, const char *s_end, double *result) { + if (s >= s_end) { + return false; + } + + double mantissa = 0.0; + // This exponent is base 2 rather than 10. + // However the exponent we parse is supposed to be one of ten, + // thus we must take care to convert the exponent/and or the + // mantissa to a * 2^E, where a is the mantissa and E is the + // exponent. + // To get the final double we will use ldexp, it requires the + // exponent to be in base 2. + int exponent = 0; + + // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED + // TO JUMP OVER DEFINITIONS. + char sign = '+'; + char exp_sign = '+'; + char const *curr = s; + + // How many characters were read in a loop. + int read = 0; + // Tells whether a loop terminated due to reaching s_end. + bool end_not_reached = false; + bool leading_decimal_dots = false; + + /* + BEGIN PARSING. + */ + + // Find out what sign we've got. + if (*curr == '+' || *curr == '-') { + sign = *curr; + curr++; + if ((curr != s_end) && (*curr == '.')) { + // accept. Somethig like `.7e+2`, `-.5234` + leading_decimal_dots = true; + } + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else if (*curr == '.') { + // accept. Somethig like `.7e+2`, `-.5234` + leading_decimal_dots = true; + } else { + goto fail; + } + + // Read the integer part. + end_not_reached = (curr != s_end); + if (!leading_decimal_dots) { + while (end_not_reached && IS_DIGIT(*curr)) { + mantissa *= 10; + mantissa += static_cast(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + + // We must make sure we actually got something. + if (read == 0) goto fail; + } + + // We allow numbers of form "#", "###" etc. + if (!end_not_reached) goto assemble; + + // Read the decimal part. + if (*curr == '.') { + curr++; + read = 1; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + static const double pow_lut[] = { + 1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, + }; + const int lut_entries = sizeof pow_lut / sizeof pow_lut[0]; + + // NOTE: Don't use powf here, it will absolutely murder precision. + mantissa += static_cast(*curr - 0x30) * + (read < lut_entries ? pow_lut[read] : std::pow(10.0, -read)); + read++; + curr++; + end_not_reached = (curr != s_end); + } + } else if (*curr == 'e' || *curr == 'E') { + } else { + goto assemble; + } + + if (!end_not_reached) goto assemble; + + // Read the exponent part. + if (*curr == 'e' || *curr == 'E') { + curr++; + // Figure out if a sign is present and if it is. + end_not_reached = (curr != s_end); + if (end_not_reached && (*curr == '+' || *curr == '-')) { + exp_sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + // Empty E is not allowed. + goto fail; + } + + read = 0; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + // To avoid annoying MSVC's min/max macro definiton, + // Use hardcoded int max value + if (exponent > (2147483647/10)) { // 2147483647 = std::numeric_limits::max() + // Integer overflow + goto fail; + } + exponent *= 10; + exponent += static_cast(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + exponent *= (exp_sign == '+' ? 1 : -1); + if (read == 0) goto fail; + } + +assemble: + *result = (sign == '+' ? 1 : -1) * + (exponent ? std::ldexp(mantissa * std::pow(5.0, exponent), exponent) + : mantissa); + return true; +fail: + return false; +} + +static inline real_t parseReal(const char **token, double default_value = 0.0) { + (*token) += strspn((*token), " \t"); + const char *end = (*token) + strcspn((*token), " \t\r"); + double val = default_value; + tryParseDouble((*token), end, &val); + real_t f = static_cast(val); + (*token) = end; + return f; +} + +static inline bool parseReal(const char **token, real_t *out) { + (*token) += strspn((*token), " \t"); + const char *end = (*token) + strcspn((*token), " \t\r"); + double val; + bool ret = tryParseDouble((*token), end, &val); + if (ret) { + real_t f = static_cast(val); + (*out) = f; + } + (*token) = end; + return ret; +} + +static inline void parseReal2(real_t *x, real_t *y, const char **token, + const double default_x = 0.0, + const double default_y = 0.0) { + (*x) = parseReal(token, default_x); + (*y) = parseReal(token, default_y); +} + +static inline void parseReal3(real_t *x, real_t *y, real_t *z, + const char **token, const double default_x = 0.0, + const double default_y = 0.0, + const double default_z = 0.0) { + (*x) = parseReal(token, default_x); + (*y) = parseReal(token, default_y); + (*z) = parseReal(token, default_z); +} + +static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w, + const char **token, const double default_x = 0.0, + const double default_y = 0.0, + const double default_z = 0.0, + const double default_w = 1.0) { + (*x) = parseReal(token, default_x); + (*y) = parseReal(token, default_y); + (*z) = parseReal(token, default_z); + (*w) = parseReal(token, default_w); +} + +// Extension: parse vertex with colors(6 items) +static inline bool parseVertexWithColor(real_t *x, real_t *y, real_t *z, + real_t *r, real_t *g, real_t *b, + const char **token, + const double default_x = 0.0, + const double default_y = 0.0, + const double default_z = 0.0) { + (*x) = parseReal(token, default_x); + (*y) = parseReal(token, default_y); + (*z) = parseReal(token, default_z); + + const bool found_color = + parseReal(token, r) && parseReal(token, g) && parseReal(token, b); + + if (!found_color) { + (*r) = (*g) = (*b) = 1.0; + } + + return found_color; +} + +static inline bool parseOnOff(const char **token, bool default_value = true) { + (*token) += strspn((*token), " \t"); + const char *end = (*token) + strcspn((*token), " \t\r"); + + bool ret = default_value; + if ((0 == strncmp((*token), "on", 2))) { + ret = true; + } else if ((0 == strncmp((*token), "off", 3))) { + ret = false; + } + + (*token) = end; + return ret; +} + +static inline texture_type_t parseTextureType( + const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) { + (*token) += strspn((*token), " \t"); + const char *end = (*token) + strcspn((*token), " \t\r"); + texture_type_t ty = default_value; + + if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) { + ty = TEXTURE_TYPE_CUBE_TOP; + } else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) { + ty = TEXTURE_TYPE_CUBE_BOTTOM; + } else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) { + ty = TEXTURE_TYPE_CUBE_LEFT; + } else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) { + ty = TEXTURE_TYPE_CUBE_RIGHT; + } else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) { + ty = TEXTURE_TYPE_CUBE_FRONT; + } else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) { + ty = TEXTURE_TYPE_CUBE_BACK; + } else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) { + ty = TEXTURE_TYPE_SPHERE; + } + + (*token) = end; + return ty; +} + +static tag_sizes parseTagTriple(const char **token) { + tag_sizes ts; + + (*token) += strspn((*token), " \t"); + ts.num_ints = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return ts; + } + + (*token)++; // Skip '/' + + (*token) += strspn((*token), " \t"); + ts.num_reals = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return ts; + } + (*token)++; // Skip '/' + + ts.num_strings = parseInt(token); + + return ts; +} + +// Parse triples with index offsets: i, i/j/k, i//k, i/j +static bool parseTriple(const char **token, int vsize, int vnsize, int vtsize, + vertex_index_t *ret) { + if (!ret) { + return false; + } + + vertex_index_t vi(-1); + + if (!fixIndex(atoi((*token)), vsize, &(vi.v_idx))) { + return false; + } + + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + (*ret) = vi; + return true; + } + (*token)++; + + // i//k + if ((*token)[0] == '/') { + (*token)++; + if (!fixIndex(atoi((*token)), vnsize, &(vi.vn_idx))) { + return false; + } + (*token) += strcspn((*token), "/ \t\r"); + (*ret) = vi; + return true; + } + + // i/j/k or i/j + if (!fixIndex(atoi((*token)), vtsize, &(vi.vt_idx))) { + return false; + } + + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + (*ret) = vi; + return true; + } + + // i/j/k + (*token)++; // skip '/' + if (!fixIndex(atoi((*token)), vnsize, &(vi.vn_idx))) { + return false; + } + (*token) += strcspn((*token), "/ \t\r"); + + (*ret) = vi; + + return true; +} + +// Parse raw triples: i, i/j/k, i//k, i/j +static vertex_index_t parseRawTriple(const char **token) { + vertex_index_t vi(static_cast(0)); // 0 is an invalid index in OBJ + + vi.v_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return vi; + } + (*token)++; + + // i//k + if ((*token)[0] == '/') { + (*token)++; + vi.vn_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + return vi; + } + + // i/j/k or i/j + vi.vt_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return vi; + } + + // i/j/k + (*token)++; // skip '/' + vi.vn_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + return vi; +} + +bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt, + const char *linebuf) { + // @todo { write more robust lexer and parser. } + bool found_texname = false; + std::string texture_name; + + const char *token = linebuf; // Assume line ends with NULL + + while (!IS_NEW_LINE((*token))) { + token += strspn(token, " \t"); // skip space + if ((0 == strncmp(token, "-blendu", 7)) && IS_SPACE((token[7]))) { + token += 8; + texopt->blendu = parseOnOff(&token, /* default */ true); + } else if ((0 == strncmp(token, "-blendv", 7)) && IS_SPACE((token[7]))) { + token += 8; + texopt->blendv = parseOnOff(&token, /* default */ true); + } else if ((0 == strncmp(token, "-clamp", 6)) && IS_SPACE((token[6]))) { + token += 7; + texopt->clamp = parseOnOff(&token, /* default */ true); + } else if ((0 == strncmp(token, "-boost", 6)) && IS_SPACE((token[6]))) { + token += 7; + texopt->sharpness = parseReal(&token, 1.0); + } else if ((0 == strncmp(token, "-bm", 3)) && IS_SPACE((token[3]))) { + token += 4; + texopt->bump_multiplier = parseReal(&token, 1.0); + } else if ((0 == strncmp(token, "-o", 2)) && IS_SPACE((token[2]))) { + token += 3; + parseReal3(&(texopt->origin_offset[0]), &(texopt->origin_offset[1]), + &(texopt->origin_offset[2]), &token); + } else if ((0 == strncmp(token, "-s", 2)) && IS_SPACE((token[2]))) { + token += 3; + parseReal3(&(texopt->scale[0]), &(texopt->scale[1]), &(texopt->scale[2]), + &token, 1.0, 1.0, 1.0); + } else if ((0 == strncmp(token, "-t", 2)) && IS_SPACE((token[2]))) { + token += 3; + parseReal3(&(texopt->turbulence[0]), &(texopt->turbulence[1]), + &(texopt->turbulence[2]), &token); + } else if ((0 == strncmp(token, "-type", 5)) && IS_SPACE((token[5]))) { + token += 5; + texopt->type = parseTextureType((&token), TEXTURE_TYPE_NONE); + } else if ((0 == strncmp(token, "-texres", 7)) && IS_SPACE((token[7]))) { + token += 7; + // TODO(syoyo): Check if arg is int type. + texopt->texture_resolution = parseInt(&token); + } else if ((0 == strncmp(token, "-imfchan", 8)) && IS_SPACE((token[8]))) { + token += 9; + token += strspn(token, " \t"); + const char *end = token + strcspn(token, " \t\r"); + if ((end - token) == 1) { // Assume one char for -imfchan + texopt->imfchan = (*token); + } + token = end; + } else if ((0 == strncmp(token, "-mm", 3)) && IS_SPACE((token[3]))) { + token += 4; + parseReal2(&(texopt->brightness), &(texopt->contrast), &token, 0.0, 1.0); + } else if ((0 == strncmp(token, "-colorspace", 11)) && + IS_SPACE((token[11]))) { + token += 12; + texopt->colorspace = parseString(&token); + } else { +// Assume texture filename +#if 0 + size_t len = strcspn(token, " \t\r"); // untile next space + texture_name = std::string(token, token + len); + token += len; + + token += strspn(token, " \t"); // skip space +#else + // Read filename until line end to parse filename containing whitespace + // TODO(syoyo): Support parsing texture option flag after the filename. + texture_name = std::string(token); + token += texture_name.length(); +#endif + + found_texname = true; + } + } + + if (found_texname) { + (*texname) = texture_name; + return true; + } else { + return false; + } +} + +static void InitTexOpt(texture_option_t *texopt, const bool is_bump) { + if (is_bump) { + texopt->imfchan = 'l'; + } else { + texopt->imfchan = 'm'; + } + texopt->bump_multiplier = static_cast(1.0); + texopt->clamp = false; + texopt->blendu = true; + texopt->blendv = true; + texopt->sharpness = static_cast(1.0); + texopt->brightness = static_cast(0.0); + texopt->contrast = static_cast(1.0); + texopt->origin_offset[0] = static_cast(0.0); + texopt->origin_offset[1] = static_cast(0.0); + texopt->origin_offset[2] = static_cast(0.0); + texopt->scale[0] = static_cast(1.0); + texopt->scale[1] = static_cast(1.0); + texopt->scale[2] = static_cast(1.0); + texopt->turbulence[0] = static_cast(0.0); + texopt->turbulence[1] = static_cast(0.0); + texopt->turbulence[2] = static_cast(0.0); + texopt->texture_resolution = -1; + texopt->type = TEXTURE_TYPE_NONE; +} + +static void InitMaterial(material_t *material) { + InitTexOpt(&material->ambient_texopt, /* is_bump */ false); + InitTexOpt(&material->diffuse_texopt, /* is_bump */ false); + InitTexOpt(&material->specular_texopt, /* is_bump */ false); + InitTexOpt(&material->specular_highlight_texopt, /* is_bump */ false); + InitTexOpt(&material->bump_texopt, /* is_bump */ true); + InitTexOpt(&material->displacement_texopt, /* is_bump */ false); + InitTexOpt(&material->alpha_texopt, /* is_bump */ false); + InitTexOpt(&material->reflection_texopt, /* is_bump */ false); + InitTexOpt(&material->roughness_texopt, /* is_bump */ false); + InitTexOpt(&material->metallic_texopt, /* is_bump */ false); + InitTexOpt(&material->sheen_texopt, /* is_bump */ false); + InitTexOpt(&material->emissive_texopt, /* is_bump */ false); + InitTexOpt(&material->normal_texopt, + /* is_bump */ false); // @fixme { is_bump will be true? } + material->name = ""; + material->ambient_texname = ""; + material->diffuse_texname = ""; + material->specular_texname = ""; + material->specular_highlight_texname = ""; + material->bump_texname = ""; + material->displacement_texname = ""; + material->reflection_texname = ""; + material->alpha_texname = ""; + for (int i = 0; i < 3; i++) { + material->ambient[i] = static_cast(0.0); + material->diffuse[i] = static_cast(0.0); + material->specular[i] = static_cast(0.0); + material->transmittance[i] = static_cast(0.0); + material->emission[i] = static_cast(0.0); + } + material->illum = 0; + material->dissolve = static_cast(1.0); + material->shininess = static_cast(1.0); + material->ior = static_cast(1.0); + + material->roughness = static_cast(0.0); + material->metallic = static_cast(0.0); + material->sheen = static_cast(0.0); + material->clearcoat_thickness = static_cast(0.0); + material->clearcoat_roughness = static_cast(0.0); + material->anisotropy_rotation = static_cast(0.0); + material->anisotropy = static_cast(0.0); + material->roughness_texname = ""; + material->metallic_texname = ""; + material->sheen_texname = ""; + material->emissive_texname = ""; + material->normal_texname = ""; + + material->unknown_parameter.clear(); +} + +// code from https://wrf.ecse.rpi.edu//Research/Short_Notes/pnpoly.html +template +static int pnpoly(int nvert, T *vertx, T *verty, T testx, T testy) { + int i, j, c = 0; + for (i = 0, j = nvert - 1; i < nvert; j = i++) { + if (((verty[i] > testy) != (verty[j] > testy)) && + (testx < + (vertx[j] - vertx[i]) * (testy - verty[i]) / (verty[j] - verty[i]) + + vertx[i])) + c = !c; + } + return c; +} + +// TODO(syoyo): refactor function. +static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group, + const std::vector &tags, + const int material_id, const std::string &name, + bool triangulate, const std::vector &v, + std::string *warn) { + if (prim_group.IsEmpty()) { + return false; + } + + shape->name = name; + + // polygon + if (!prim_group.faceGroup.empty()) { + // Flatten vertices and indices + for (size_t i = 0; i < prim_group.faceGroup.size(); i++) { + const face_t &face = prim_group.faceGroup[i]; + + size_t npolys = face.vertex_indices.size(); + + if (npolys < 3) { + // Face must have 3+ vertices. + if (warn) { + (*warn) += "Degenerated face found\n."; + } + continue; + } + + if (triangulate) { + if (npolys == 4) { + vertex_index_t i0 = face.vertex_indices[0]; + vertex_index_t i1 = face.vertex_indices[1]; + vertex_index_t i2 = face.vertex_indices[2]; + vertex_index_t i3 = face.vertex_indices[3]; + + size_t vi0 = size_t(i0.v_idx); + size_t vi1 = size_t(i1.v_idx); + size_t vi2 = size_t(i2.v_idx); + size_t vi3 = size_t(i3.v_idx); + + if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) || + ((3 * vi2 + 2) >= v.size()) || ((3 * vi3 + 2) >= v.size())) { + // Invalid triangle. + // FIXME(syoyo): Is it ok to simply skip this invalid triangle? + if (warn) { + (*warn) += "Face with invalid vertex index found.\n"; + } + continue; + } + + real_t v0x = v[vi0 * 3 + 0]; + real_t v0y = v[vi0 * 3 + 1]; + real_t v0z = v[vi0 * 3 + 2]; + real_t v1x = v[vi1 * 3 + 0]; + real_t v1y = v[vi1 * 3 + 1]; + real_t v1z = v[vi1 * 3 + 2]; + real_t v2x = v[vi2 * 3 + 0]; + real_t v2y = v[vi2 * 3 + 1]; + real_t v2z = v[vi2 * 3 + 2]; + real_t v3x = v[vi3 * 3 + 0]; + real_t v3y = v[vi3 * 3 + 1]; + real_t v3z = v[vi3 * 3 + 2]; + + // There are two candidates to split the quad into two triangles. + // + // Choose the shortest edge. + // TODO: Is it better to determine the edge to split by calculating + // the area of each triangle? + // + // +---+ + // |\ | + // | \ | + // | \| + // +---+ + // + // +---+ + // | /| + // | / | + // |/ | + // +---+ + + real_t e02x = v2x - v0x; + real_t e02y = v2y - v0y; + real_t e02z = v2z - v0z; + real_t e13x = v3x - v1x; + real_t e13y = v3y - v1y; + real_t e13z = v3z - v1z; + + real_t sqr02 = e02x * e02x + e02y * e02y + e02z * e02z; + real_t sqr13 = e13x * e13x + e13y * e13y + e13z * e13z; + + index_t idx0, idx1, idx2, idx3; + + idx0.vertex_index = i0.v_idx; + idx0.normal_index = i0.vn_idx; + idx0.texcoord_index = i0.vt_idx; + idx1.vertex_index = i1.v_idx; + idx1.normal_index = i1.vn_idx; + idx1.texcoord_index = i1.vt_idx; + idx2.vertex_index = i2.v_idx; + idx2.normal_index = i2.vn_idx; + idx2.texcoord_index = i2.vt_idx; + idx3.vertex_index = i3.v_idx; + idx3.normal_index = i3.vn_idx; + idx3.texcoord_index = i3.vt_idx; + + if (sqr02 < sqr13) { + // [0, 1, 2], [0, 2, 3] + shape->mesh.indices.push_back(idx0); + shape->mesh.indices.push_back(idx1); + shape->mesh.indices.push_back(idx2); + + shape->mesh.indices.push_back(idx0); + shape->mesh.indices.push_back(idx2); + shape->mesh.indices.push_back(idx3); + } else { + // [0, 1, 3], [1, 2, 3] + shape->mesh.indices.push_back(idx0); + shape->mesh.indices.push_back(idx1); + shape->mesh.indices.push_back(idx3); + + shape->mesh.indices.push_back(idx1); + shape->mesh.indices.push_back(idx2); + shape->mesh.indices.push_back(idx3); + } + + // Two triangle faces + shape->mesh.num_face_vertices.push_back(3); + shape->mesh.num_face_vertices.push_back(3); + + shape->mesh.material_ids.push_back(material_id); + shape->mesh.material_ids.push_back(material_id); + + shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id); + shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id); + + } else { + vertex_index_t i0 = face.vertex_indices[0]; + vertex_index_t i1(-1); + vertex_index_t i2 = face.vertex_indices[1]; + + // find the two axes to work in + size_t axes[2] = {1, 2}; + for (size_t k = 0; k < npolys; ++k) { + i0 = face.vertex_indices[(k + 0) % npolys]; + i1 = face.vertex_indices[(k + 1) % npolys]; + i2 = face.vertex_indices[(k + 2) % npolys]; + size_t vi0 = size_t(i0.v_idx); + size_t vi1 = size_t(i1.v_idx); + size_t vi2 = size_t(i2.v_idx); + + if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) || + ((3 * vi2 + 2) >= v.size())) { + // Invalid triangle. + // FIXME(syoyo): Is it ok to simply skip this invalid triangle? + continue; + } + real_t v0x = v[vi0 * 3 + 0]; + real_t v0y = v[vi0 * 3 + 1]; + real_t v0z = v[vi0 * 3 + 2]; + real_t v1x = v[vi1 * 3 + 0]; + real_t v1y = v[vi1 * 3 + 1]; + real_t v1z = v[vi1 * 3 + 2]; + real_t v2x = v[vi2 * 3 + 0]; + real_t v2y = v[vi2 * 3 + 1]; + real_t v2z = v[vi2 * 3 + 2]; + real_t e0x = v1x - v0x; + real_t e0y = v1y - v0y; + real_t e0z = v1z - v0z; + real_t e1x = v2x - v1x; + real_t e1y = v2y - v1y; + real_t e1z = v2z - v1z; + real_t cx = std::fabs(e0y * e1z - e0z * e1y); + real_t cy = std::fabs(e0z * e1x - e0x * e1z); + real_t cz = std::fabs(e0x * e1y - e0y * e1x); + const real_t epsilon = std::numeric_limits::epsilon(); + // std::cout << "cx " << cx << ", cy " << cy << ", cz " << cz << + // "\n"; + if (cx > epsilon || cy > epsilon || cz > epsilon) { + // std::cout << "corner\n"; + // found a corner + if (cx > cy && cx > cz) { + // std::cout << "pattern0\n"; + } else { + // std::cout << "axes[0] = 0\n"; + axes[0] = 0; + if (cz > cx && cz > cy) { + // std::cout << "axes[1] = 1\n"; + axes[1] = 1; + } + } + break; + } + } + +#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT + using Point = std::array; + + // first polyline define the main polygon. + // following polylines define holes(not used in tinyobj). + std::vector > polygon; + + std::vector polyline; + + // Fill polygon data(facevarying vertices). + for (size_t k = 0; k < npolys; k++) { + i0 = face.vertex_indices[k]; + size_t vi0 = size_t(i0.v_idx); + + assert(((3 * vi0 + 2) < v.size())); + + real_t v0x = v[vi0 * 3 + axes[0]]; + real_t v0y = v[vi0 * 3 + axes[1]]; + + polyline.push_back({v0x, v0y}); + } + + polygon.push_back(polyline); + std::vector indices = mapbox::earcut(polygon); + // => result = 3 * faces, clockwise + + assert(indices.size() % 3 == 0); + + // Reconstruct vertex_index_t + for (size_t k = 0; k < indices.size() / 3; k++) { + { + index_t idx0, idx1, idx2; + idx0.vertex_index = face.vertex_indices[indices[3 * k + 0]].v_idx; + idx0.normal_index = + face.vertex_indices[indices[3 * k + 0]].vn_idx; + idx0.texcoord_index = + face.vertex_indices[indices[3 * k + 0]].vt_idx; + idx1.vertex_index = face.vertex_indices[indices[3 * k + 1]].v_idx; + idx1.normal_index = + face.vertex_indices[indices[3 * k + 1]].vn_idx; + idx1.texcoord_index = + face.vertex_indices[indices[3 * k + 1]].vt_idx; + idx2.vertex_index = face.vertex_indices[indices[3 * k + 2]].v_idx; + idx2.normal_index = + face.vertex_indices[indices[3 * k + 2]].vn_idx; + idx2.texcoord_index = + face.vertex_indices[indices[3 * k + 2]].vt_idx; + + shape->mesh.indices.push_back(idx0); + shape->mesh.indices.push_back(idx1); + shape->mesh.indices.push_back(idx2); + + shape->mesh.num_face_vertices.push_back(3); + shape->mesh.material_ids.push_back(material_id); + shape->mesh.smoothing_group_ids.push_back( + face.smoothing_group_id); + } + } + +#else // Built-in ear clipping triangulation + + + face_t remainingFace = face; // copy + size_t guess_vert = 0; + vertex_index_t ind[3]; + real_t vx[3]; + real_t vy[3]; + + // How many iterations can we do without decreasing the remaining + // vertices. + size_t remainingIterations = face.vertex_indices.size(); + size_t previousRemainingVertices = + remainingFace.vertex_indices.size(); + + while (remainingFace.vertex_indices.size() > 3 && + remainingIterations > 0) { + // std::cout << "remainingIterations " << remainingIterations << + // "\n"; + + npolys = remainingFace.vertex_indices.size(); + if (guess_vert >= npolys) { + guess_vert -= npolys; + } + + if (previousRemainingVertices != npolys) { + // The number of remaining vertices decreased. Reset counters. + previousRemainingVertices = npolys; + remainingIterations = npolys; + } else { + // We didn't consume a vertex on previous iteration, reduce the + // available iterations. + remainingIterations--; + } + + for (size_t k = 0; k < 3; k++) { + ind[k] = remainingFace.vertex_indices[(guess_vert + k) % npolys]; + size_t vi = size_t(ind[k].v_idx); + if (((vi * 3 + axes[0]) >= v.size()) || + ((vi * 3 + axes[1]) >= v.size())) { + // ??? + vx[k] = static_cast(0.0); + vy[k] = static_cast(0.0); + } else { + vx[k] = v[vi * 3 + axes[0]]; + vy[k] = v[vi * 3 + axes[1]]; + } + } + + // + // area is calculated per face + // + real_t e0x = vx[1] - vx[0]; + real_t e0y = vy[1] - vy[0]; + real_t e1x = vx[2] - vx[1]; + real_t e1y = vy[2] - vy[1]; + real_t cross = e0x * e1y - e0y * e1x; + // std::cout << "axes = " << axes[0] << ", " << axes[1] << "\n"; + // std::cout << "e0x, e0y, e1x, e1y " << e0x << ", " << e0y << ", " + // << e1x << ", " << e1y << "\n"; + + real_t area = (vx[0] * vy[1] - vy[0] * vx[1]) * static_cast(0.5); + // std::cout << "cross " << cross << ", area " << area << "\n"; + // if an internal angle + if (cross * area < static_cast(0.0)) { + // std::cout << "internal \n"; + guess_vert += 1; + // std::cout << "guess vert : " << guess_vert << "\n"; + continue; + } + + // check all other verts in case they are inside this triangle + bool overlap = false; + for (size_t otherVert = 3; otherVert < npolys; ++otherVert) { + size_t idx = (guess_vert + otherVert) % npolys; + + if (idx >= remainingFace.vertex_indices.size()) { + // std::cout << "???0\n"; + // ??? + continue; + } + + size_t ovi = size_t(remainingFace.vertex_indices[idx].v_idx); + + if (((ovi * 3 + axes[0]) >= v.size()) || + ((ovi * 3 + axes[1]) >= v.size())) { + // std::cout << "???1\n"; + // ??? + continue; + } + real_t tx = v[ovi * 3 + axes[0]]; + real_t ty = v[ovi * 3 + axes[1]]; + if (pnpoly(3, vx, vy, tx, ty)) { + // std::cout << "overlap\n"; + overlap = true; + break; + } + } + + if (overlap) { + // std::cout << "overlap2\n"; + guess_vert += 1; + continue; + } + + // this triangle is an ear + { + index_t idx0, idx1, idx2; + idx0.vertex_index = ind[0].v_idx; + idx0.normal_index = ind[0].vn_idx; + idx0.texcoord_index = ind[0].vt_idx; + idx1.vertex_index = ind[1].v_idx; + idx1.normal_index = ind[1].vn_idx; + idx1.texcoord_index = ind[1].vt_idx; + idx2.vertex_index = ind[2].v_idx; + idx2.normal_index = ind[2].vn_idx; + idx2.texcoord_index = ind[2].vt_idx; + + shape->mesh.indices.push_back(idx0); + shape->mesh.indices.push_back(idx1); + shape->mesh.indices.push_back(idx2); + + shape->mesh.num_face_vertices.push_back(3); + shape->mesh.material_ids.push_back(material_id); + shape->mesh.smoothing_group_ids.push_back( + face.smoothing_group_id); + } + + // remove v1 from the list + size_t removed_vert_index = (guess_vert + 1) % npolys; + while (removed_vert_index + 1 < npolys) { + remainingFace.vertex_indices[removed_vert_index] = + remainingFace.vertex_indices[removed_vert_index + 1]; + removed_vert_index += 1; + } + remainingFace.vertex_indices.pop_back(); + } + + // std::cout << "remainingFace.vi.size = " << + // remainingFace.vertex_indices.size() << "\n"; + if (remainingFace.vertex_indices.size() == 3) { + i0 = remainingFace.vertex_indices[0]; + i1 = remainingFace.vertex_indices[1]; + i2 = remainingFace.vertex_indices[2]; + { + index_t idx0, idx1, idx2; + idx0.vertex_index = i0.v_idx; + idx0.normal_index = i0.vn_idx; + idx0.texcoord_index = i0.vt_idx; + idx1.vertex_index = i1.v_idx; + idx1.normal_index = i1.vn_idx; + idx1.texcoord_index = i1.vt_idx; + idx2.vertex_index = i2.v_idx; + idx2.normal_index = i2.vn_idx; + idx2.texcoord_index = i2.vt_idx; + + shape->mesh.indices.push_back(idx0); + shape->mesh.indices.push_back(idx1); + shape->mesh.indices.push_back(idx2); + + shape->mesh.num_face_vertices.push_back(3); + shape->mesh.material_ids.push_back(material_id); + shape->mesh.smoothing_group_ids.push_back( + face.smoothing_group_id); + } + } +#endif + } // npolys + } else { + for (size_t k = 0; k < npolys; k++) { + index_t idx; + idx.vertex_index = face.vertex_indices[k].v_idx; + idx.normal_index = face.vertex_indices[k].vn_idx; + idx.texcoord_index = face.vertex_indices[k].vt_idx; + shape->mesh.indices.push_back(idx); + } + + shape->mesh.num_face_vertices.push_back( + static_cast(npolys)); + shape->mesh.material_ids.push_back(material_id); // per face + shape->mesh.smoothing_group_ids.push_back( + face.smoothing_group_id); // per face + } + } + + shape->mesh.tags = tags; + } + + // line + if (!prim_group.lineGroup.empty()) { + // Flatten indices + for (size_t i = 0; i < prim_group.lineGroup.size(); i++) { + for (size_t j = 0; j < prim_group.lineGroup[i].vertex_indices.size(); + j++) { + const vertex_index_t &vi = prim_group.lineGroup[i].vertex_indices[j]; + + index_t idx; + idx.vertex_index = vi.v_idx; + idx.normal_index = vi.vn_idx; + idx.texcoord_index = vi.vt_idx; + + shape->lines.indices.push_back(idx); + } + + shape->lines.num_line_vertices.push_back( + int(prim_group.lineGroup[i].vertex_indices.size())); + } + } + + // points + if (!prim_group.pointsGroup.empty()) { + // Flatten & convert indices + for (size_t i = 0; i < prim_group.pointsGroup.size(); i++) { + for (size_t j = 0; j < prim_group.pointsGroup[i].vertex_indices.size(); + j++) { + const vertex_index_t &vi = prim_group.pointsGroup[i].vertex_indices[j]; + + index_t idx; + idx.vertex_index = vi.v_idx; + idx.normal_index = vi.vn_idx; + idx.texcoord_index = vi.vt_idx; + + shape->points.indices.push_back(idx); + } + } + } + + return true; +} + +// Split a string with specified delimiter character and escape character. +// https://rosettacode.org/wiki/Tokenize_a_string_with_escaping#C.2B.2B +static void SplitString(const std::string &s, char delim, char escape, + std::vector &elems) { + std::string token; + + bool escaping = false; + for (size_t i = 0; i < s.size(); ++i) { + char ch = s[i]; + if (escaping) { + escaping = false; + } else if (ch == escape) { + escaping = true; + continue; + } else if (ch == delim) { + if (!token.empty()) { + elems.push_back(token); + } + token.clear(); + continue; + } + token += ch; + } + + elems.push_back(token); +} + +static std::string JoinPath(const std::string &dir, + const std::string &filename) { + if (dir.empty()) { + return filename; + } else { + // check '/' + char lastChar = *dir.rbegin(); + if (lastChar != '/') { + return dir + std::string("/") + filename; + } else { + return dir + filename; + } + } +} + +void LoadMtl(std::map *material_map, + std::vector *materials, std::istream *inStream, + std::string *warning, std::string *err) { + (void)err; + + // Create a default material anyway. + material_t material; + InitMaterial(&material); + + // Issue 43. `d` wins against `Tr` since `Tr` is not in the MTL specification. + bool has_d = false; + bool has_tr = false; + + // has_kd is used to set a default diffuse value when map_Kd is present + // and Kd is not. + bool has_kd = false; + + std::stringstream warn_ss; + + size_t line_no = 0; + std::string linebuf; + while (inStream->peek() != -1) { + safeGetline(*inStream, linebuf); + line_no++; + + // Trim trailing whitespace. + if (linebuf.size() > 0) { + linebuf = linebuf.substr(0, linebuf.find_last_not_of(" \t") + 1); + } + + // Trim newline '\r\n' or '\n' + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\n') + linebuf.erase(linebuf.size() - 1); + } + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\r') + linebuf.erase(linebuf.size() - 1); + } + + // Skip if empty line. + if (linebuf.empty()) { + continue; + } + + // Skip leading space. + const char *token = linebuf.c_str(); + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') continue; // empty line + + if (token[0] == '#') continue; // comment line + + // new mtl + if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) { + // flush previous material. + if (!material.name.empty()) { + material_map->insert(std::pair( + material.name, static_cast(materials->size()))); + materials->push_back(material); + } + + // initial temporary material + InitMaterial(&material); + + has_d = false; + has_tr = false; + + // set new mtl name + token += 7; + { + std::stringstream sstr; + sstr << token; + material.name = sstr.str(); + } + continue; + } + + // ambient + if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) { + token += 2; + real_t r, g, b; + parseReal3(&r, &g, &b, &token); + material.ambient[0] = r; + material.ambient[1] = g; + material.ambient[2] = b; + continue; + } + + // diffuse + if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) { + token += 2; + real_t r, g, b; + parseReal3(&r, &g, &b, &token); + material.diffuse[0] = r; + material.diffuse[1] = g; + material.diffuse[2] = b; + has_kd = true; + continue; + } + + // specular + if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) { + token += 2; + real_t r, g, b; + parseReal3(&r, &g, &b, &token); + material.specular[0] = r; + material.specular[1] = g; + material.specular[2] = b; + continue; + } + + // transmittance + if ((token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) || + (token[0] == 'T' && token[1] == 'f' && IS_SPACE((token[2])))) { + token += 2; + real_t r, g, b; + parseReal3(&r, &g, &b, &token); + material.transmittance[0] = r; + material.transmittance[1] = g; + material.transmittance[2] = b; + continue; + } + + // ior(index of refraction) + if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) { + token += 2; + material.ior = parseReal(&token); + continue; + } + + // emission + if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) { + token += 2; + real_t r, g, b; + parseReal3(&r, &g, &b, &token); + material.emission[0] = r; + material.emission[1] = g; + material.emission[2] = b; + continue; + } + + // shininess + if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) { + token += 2; + material.shininess = parseReal(&token); + continue; + } + + // illum model + if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) { + token += 6; + material.illum = parseInt(&token); + continue; + } + + // dissolve + if ((token[0] == 'd' && IS_SPACE(token[1]))) { + token += 1; + material.dissolve = parseReal(&token); + + if (has_tr) { + warn_ss << "Both `d` and `Tr` parameters defined for \"" + << material.name + << "\". Use the value of `d` for dissolve (line " << line_no + << " in .mtl.)\n"; + } + has_d = true; + continue; + } + if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) { + token += 2; + if (has_d) { + // `d` wins. Ignore `Tr` value. + warn_ss << "Both `d` and `Tr` parameters defined for \"" + << material.name + << "\". Use the value of `d` for dissolve (line " << line_no + << " in .mtl.)\n"; + } else { + // We invert value of Tr(assume Tr is in range [0, 1]) + // NOTE: Interpretation of Tr is application(exporter) dependent. For + // some application(e.g. 3ds max obj exporter), Tr = d(Issue 43) + material.dissolve = static_cast(1.0) - parseReal(&token); + } + has_tr = true; + continue; + } + + // PBR: roughness + if (token[0] == 'P' && token[1] == 'r' && IS_SPACE(token[2])) { + token += 2; + material.roughness = parseReal(&token); + continue; + } + + // PBR: metallic + if (token[0] == 'P' && token[1] == 'm' && IS_SPACE(token[2])) { + token += 2; + material.metallic = parseReal(&token); + continue; + } + + // PBR: sheen + if (token[0] == 'P' && token[1] == 's' && IS_SPACE(token[2])) { + token += 2; + material.sheen = parseReal(&token); + continue; + } + + // PBR: clearcoat thickness + if (token[0] == 'P' && token[1] == 'c' && IS_SPACE(token[2])) { + token += 2; + material.clearcoat_thickness = parseReal(&token); + continue; + } + + // PBR: clearcoat roughness + if ((0 == strncmp(token, "Pcr", 3)) && IS_SPACE(token[3])) { + token += 4; + material.clearcoat_roughness = parseReal(&token); + continue; + } + + // PBR: anisotropy + if ((0 == strncmp(token, "aniso", 5)) && IS_SPACE(token[5])) { + token += 6; + material.anisotropy = parseReal(&token); + continue; + } + + // PBR: anisotropy rotation + if ((0 == strncmp(token, "anisor", 6)) && IS_SPACE(token[6])) { + token += 7; + material.anisotropy_rotation = parseReal(&token); + continue; + } + + // ambient texture + if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.ambient_texname), + &(material.ambient_texopt), token); + continue; + } + + // diffuse texture + if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.diffuse_texname), + &(material.diffuse_texopt), token); + + // Set a decent diffuse default value if a diffuse texture is specified + // without a matching Kd value. + if (!has_kd) { + material.diffuse[0] = static_cast(0.6); + material.diffuse[1] = static_cast(0.6); + material.diffuse[2] = static_cast(0.6); + } + + continue; + } + + // specular texture + if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.specular_texname), + &(material.specular_texopt), token); + continue; + } + + // specular highlight texture + if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.specular_highlight_texname), + &(material.specular_highlight_texopt), token); + continue; + } + + // bump texture + if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) { + token += 9; + ParseTextureNameAndOption(&(material.bump_texname), + &(material.bump_texopt), token); + continue; + } + + // bump texture + if ((0 == strncmp(token, "map_Bump", 8)) && IS_SPACE(token[8])) { + token += 9; + ParseTextureNameAndOption(&(material.bump_texname), + &(material.bump_texopt), token); + continue; + } + + // bump texture + if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) { + token += 5; + ParseTextureNameAndOption(&(material.bump_texname), + &(material.bump_texopt), token); + continue; + } + + // alpha texture + if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) { + token += 6; + material.alpha_texname = token; + ParseTextureNameAndOption(&(material.alpha_texname), + &(material.alpha_texopt), token); + continue; + } + + // displacement texture + if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) { + token += 5; + ParseTextureNameAndOption(&(material.displacement_texname), + &(material.displacement_texopt), token); + continue; + } + + // reflection map + if ((0 == strncmp(token, "refl", 4)) && IS_SPACE(token[4])) { + token += 5; + ParseTextureNameAndOption(&(material.reflection_texname), + &(material.reflection_texopt), token); + continue; + } + + // PBR: roughness texture + if ((0 == strncmp(token, "map_Pr", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.roughness_texname), + &(material.roughness_texopt), token); + continue; + } + + // PBR: metallic texture + if ((0 == strncmp(token, "map_Pm", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.metallic_texname), + &(material.metallic_texopt), token); + continue; + } + + // PBR: sheen texture + if ((0 == strncmp(token, "map_Ps", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.sheen_texname), + &(material.sheen_texopt), token); + continue; + } + + // PBR: emissive texture + if ((0 == strncmp(token, "map_Ke", 6)) && IS_SPACE(token[6])) { + token += 7; + ParseTextureNameAndOption(&(material.emissive_texname), + &(material.emissive_texopt), token); + continue; + } + + // PBR: normal map texture + if ((0 == strncmp(token, "norm", 4)) && IS_SPACE(token[4])) { + token += 5; + ParseTextureNameAndOption(&(material.normal_texname), + &(material.normal_texopt), token); + continue; + } + + // unknown parameter + const char *_space = strchr(token, ' '); + if (!_space) { + _space = strchr(token, '\t'); + } + if (_space) { + std::ptrdiff_t len = _space - token; + std::string key(token, static_cast(len)); + std::string value = _space + 1; + material.unknown_parameter.insert( + std::pair(key, value)); + } + } + // flush last material. + material_map->insert(std::pair( + material.name, static_cast(materials->size()))); + materials->push_back(material); + + if (warning) { + (*warning) = warn_ss.str(); + } +} + +bool MaterialFileReader::operator()(const std::string &matId, + std::vector *materials, + std::map *matMap, + std::string *warn, std::string *err) { + if (!m_mtlBaseDir.empty()) { +#ifdef _WIN32 + char sep = ';'; +#else + char sep = ':'; +#endif + + // https://stackoverflow.com/questions/5167625/splitting-a-c-stdstring-using-tokens-e-g + std::vector paths; + std::istringstream f(m_mtlBaseDir); + + std::string s; + while (getline(f, s, sep)) { + paths.push_back(s); + } + + for (size_t i = 0; i < paths.size(); i++) { + std::string filepath = JoinPath(paths[i], matId); + + std::ifstream matIStream(filepath.c_str()); + if (matIStream) { + LoadMtl(matMap, materials, &matIStream, warn, err); + + return true; + } + } + + std::stringstream ss; + ss << "Material file [ " << matId + << " ] not found in a path : " << m_mtlBaseDir << "\n"; + if (warn) { + (*warn) += ss.str(); + } + return false; + + } else { + std::string filepath = matId; + std::ifstream matIStream(filepath.c_str()); + if (matIStream) { + LoadMtl(matMap, materials, &matIStream, warn, err); + + return true; + } + + std::stringstream ss; + ss << "Material file [ " << filepath + << " ] not found in a path : " << m_mtlBaseDir << "\n"; + if (warn) { + (*warn) += ss.str(); + } + + return false; + } +} + +bool MaterialStreamReader::operator()(const std::string &matId, + std::vector *materials, + std::map *matMap, + std::string *warn, std::string *err) { + (void)err; + (void)matId; + if (!m_inStream) { + std::stringstream ss; + ss << "Material stream in error state. \n"; + if (warn) { + (*warn) += ss.str(); + } + return false; + } + + LoadMtl(matMap, materials, &m_inStream, warn, err); + + return true; +} + +bool LoadObj(attrib_t *attrib, std::vector *shapes, + std::vector *materials, std::string *warn, + std::string *err, const char *filename, const char *mtl_basedir, + bool triangulate, bool default_vcols_fallback) { + attrib->vertices.clear(); + attrib->normals.clear(); + attrib->texcoords.clear(); + attrib->colors.clear(); + shapes->clear(); + + std::stringstream errss; + + std::ifstream ifs(filename); + if (!ifs) { + errss << "Cannot open file [" << filename << "]\n"; + if (err) { + (*err) = errss.str(); + } + return false; + } + + std::string baseDir = mtl_basedir ? mtl_basedir : ""; + if (!baseDir.empty()) { +#ifndef _WIN32 + const char dirsep = '/'; +#else + const char dirsep = '\\'; +#endif + if (baseDir[baseDir.length() - 1] != dirsep) baseDir += dirsep; + } + MaterialFileReader matFileReader(baseDir); + + return LoadObj(attrib, shapes, materials, warn, err, &ifs, &matFileReader, + triangulate, default_vcols_fallback); +} + +bool LoadObj(attrib_t *attrib, std::vector *shapes, + std::vector *materials, std::string *warn, + std::string *err, std::istream *inStream, + MaterialReader *readMatFn /*= NULL*/, bool triangulate, + bool default_vcols_fallback) { + std::stringstream errss; + + std::vector v; + std::vector vn; + std::vector vt; + std::vector vc; + std::vector vw; + std::vector tags; + PrimGroup prim_group; + std::string name; + + // material + std::set material_filenames; + std::map material_map; + int material = -1; + + // smoothing group id + unsigned int current_smoothing_id = + 0; // Initial value. 0 means no smoothing. + + int greatest_v_idx = -1; + int greatest_vn_idx = -1; + int greatest_vt_idx = -1; + + shape_t shape; + + bool found_all_colors = true; + + size_t line_num = 0; + std::string linebuf; + while (inStream->peek() != -1) { + safeGetline(*inStream, linebuf); + + line_num++; + + // Trim newline '\r\n' or '\n' + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\n') + linebuf.erase(linebuf.size() - 1); + } + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\r') + linebuf.erase(linebuf.size() - 1); + } + + // Skip if empty line. + if (linebuf.empty()) { + continue; + } + + // Skip leading space. + const char *token = linebuf.c_str(); + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') continue; // empty line + + if (token[0] == '#') continue; // comment line + + // vertex + if (token[0] == 'v' && IS_SPACE((token[1]))) { + token += 2; + real_t x, y, z; + real_t r, g, b; + + found_all_colors &= parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token); + + v.push_back(x); + v.push_back(y); + v.push_back(z); + + if (found_all_colors || default_vcols_fallback) { + vc.push_back(r); + vc.push_back(g); + vc.push_back(b); + } + + continue; + } + + // normal + if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { + token += 3; + real_t x, y, z; + parseReal3(&x, &y, &z, &token); + vn.push_back(x); + vn.push_back(y); + vn.push_back(z); + continue; + } + + // texcoord + if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { + token += 3; + real_t x, y; + parseReal2(&x, &y, &token); + vt.push_back(x); + vt.push_back(y); + continue; + } + + // skin weight. tinyobj extension + if (token[0] == 'v' && token[1] == 'w' && IS_SPACE((token[2]))) { + token += 3; + + // vw ... + // example: + // vw 0 0 0.25 1 0.25 2 0.5 + + // TODO(syoyo): Add syntax check + int vid = 0; + vid = parseInt(&token); + + skin_weight_t sw; + + sw.vertex_id = vid; + + while (!IS_NEW_LINE(token[0])) { + real_t j, w; + // joint_id should not be negative, weight may be negative + // TODO(syoyo): # of elements check + parseReal2(&j, &w, &token, -1.0); + + if (j < static_cast(0)) { + if (err) { + std::stringstream ss; + ss << "Failed parse `vw' line. joint_id is negative. " + "line " + << line_num << ".)\n"; + (*err) += ss.str(); + } + return false; + } + + joint_and_weight_t jw; + + jw.joint_id = int(j); + jw.weight = w; + + sw.weightValues.push_back(jw); + + size_t n = strspn(token, " \t\r"); + token += n; + } + + vw.push_back(sw); + } + + // line + if (token[0] == 'l' && IS_SPACE((token[1]))) { + token += 2; + + __line_t line; + + while (!IS_NEW_LINE(token[0])) { + vertex_index_t vi; + if (!parseTriple(&token, static_cast(v.size() / 3), + static_cast(vn.size() / 3), + static_cast(vt.size() / 2), &vi)) { + if (err) { + std::stringstream ss; + ss << "Failed parse `l' line(e.g. zero value for vertex index. " + "line " + << line_num << ".)\n"; + (*err) += ss.str(); + } + return false; + } + + line.vertex_indices.push_back(vi); + + size_t n = strspn(token, " \t\r"); + token += n; + } + + prim_group.lineGroup.push_back(line); + + continue; + } + + // points + if (token[0] == 'p' && IS_SPACE((token[1]))) { + token += 2; + + __points_t pts; + + while (!IS_NEW_LINE(token[0])) { + vertex_index_t vi; + if (!parseTriple(&token, static_cast(v.size() / 3), + static_cast(vn.size() / 3), + static_cast(vt.size() / 2), &vi)) { + if (err) { + std::stringstream ss; + ss << "Failed parse `p' line(e.g. zero value for vertex index. " + "line " + << line_num << ".)\n"; + (*err) += ss.str(); + } + return false; + } + + pts.vertex_indices.push_back(vi); + + size_t n = strspn(token, " \t\r"); + token += n; + } + + prim_group.pointsGroup.push_back(pts); + + continue; + } + + // face + if (token[0] == 'f' && IS_SPACE((token[1]))) { + token += 2; + token += strspn(token, " \t"); + + face_t face; + + face.smoothing_group_id = current_smoothing_id; + face.vertex_indices.reserve(3); + + while (!IS_NEW_LINE(token[0])) { + vertex_index_t vi; + if (!parseTriple(&token, static_cast(v.size() / 3), + static_cast(vn.size() / 3), + static_cast(vt.size() / 2), &vi)) { + if (err) { + std::stringstream ss; + ss << "Failed parse `f' line(e.g. zero value for face index. line " + << line_num << ".)\n"; + (*err) += ss.str(); + } + return false; + } + + greatest_v_idx = greatest_v_idx > vi.v_idx ? greatest_v_idx : vi.v_idx; + greatest_vn_idx = + greatest_vn_idx > vi.vn_idx ? greatest_vn_idx : vi.vn_idx; + greatest_vt_idx = + greatest_vt_idx > vi.vt_idx ? greatest_vt_idx : vi.vt_idx; + + face.vertex_indices.push_back(vi); + size_t n = strspn(token, " \t\r"); + token += n; + } + + // replace with emplace_back + std::move on C++11 + prim_group.faceGroup.push_back(face); + + continue; + } + + // use mtl + if ((0 == strncmp(token, "usemtl", 6))) { + token += 6; + std::string namebuf = parseString(&token); + + int newMaterialId = -1; + std::map::const_iterator it = + material_map.find(namebuf); + if (it != material_map.end()) { + newMaterialId = it->second; + } else { + // { error!! material not found } + if (warn) { + (*warn) += "material [ '" + namebuf + "' ] not found in .mtl\n"; + } + } + + if (newMaterialId != material) { + // Create per-face material. Thus we don't add `shape` to `shapes` at + // this time. + // just clear `faceGroup` after `exportGroupsToShape()` call. + exportGroupsToShape(&shape, prim_group, tags, material, name, + triangulate, v, warn); + prim_group.faceGroup.clear(); + material = newMaterialId; + } + + continue; + } + + // load mtl + if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { + if (readMatFn) { + token += 7; + + std::vector filenames; + SplitString(std::string(token), ' ', '\\', filenames); + + if (filenames.empty()) { + if (warn) { + std::stringstream ss; + ss << "Looks like empty filename for mtllib. Use default " + "material (line " + << line_num << ".)\n"; + + (*warn) += ss.str(); + } + } else { + bool found = false; + for (size_t s = 0; s < filenames.size(); s++) { + if (material_filenames.count(filenames[s]) > 0) { + found = true; + continue; + } + + std::string warn_mtl; + std::string err_mtl; + bool ok = (*readMatFn)(filenames[s].c_str(), materials, + &material_map, &warn_mtl, &err_mtl); + if (warn && (!warn_mtl.empty())) { + (*warn) += warn_mtl; + } + + if (err && (!err_mtl.empty())) { + (*err) += err_mtl; + } + + if (ok) { + found = true; + material_filenames.insert(filenames[s]); + break; + } + } + + if (!found) { + if (warn) { + (*warn) += + "Failed to load material file(s). Use default " + "material.\n"; + } + } + } + } + + continue; + } + + // group name + if (token[0] == 'g' && IS_SPACE((token[1]))) { + // flush previous face group. + bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name, + triangulate, v, warn); + (void)ret; // return value not used. + + if (shape.mesh.indices.size() > 0) { + shapes->push_back(shape); + } + + shape = shape_t(); + + // material = -1; + prim_group.clear(); + + std::vector names; + + while (!IS_NEW_LINE(token[0])) { + std::string str = parseString(&token); + names.push_back(str); + token += strspn(token, " \t\r"); // skip tag + } + + // names[0] must be 'g' + + if (names.size() < 2) { + // 'g' with empty names + if (warn) { + std::stringstream ss; + ss << "Empty group name. line: " << line_num << "\n"; + (*warn) += ss.str(); + name = ""; + } + } else { + std::stringstream ss; + ss << names[1]; + + // tinyobjloader does not support multiple groups for a primitive. + // Currently we concatinate multiple group names with a space to get + // single group name. + + for (size_t i = 2; i < names.size(); i++) { + ss << " " << names[i]; + } + + name = ss.str(); + } + + continue; + } + + // object name + if (token[0] == 'o' && IS_SPACE((token[1]))) { + // flush previous face group. + bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name, + triangulate, v, warn); + (void)ret; // return value not used. + + if (shape.mesh.indices.size() > 0 || shape.lines.indices.size() > 0 || + shape.points.indices.size() > 0) { + shapes->push_back(shape); + } + + // material = -1; + prim_group.clear(); + shape = shape_t(); + + // @todo { multiple object name? } + token += 2; + std::stringstream ss; + ss << token; + name = ss.str(); + + continue; + } + + if (token[0] == 't' && IS_SPACE(token[1])) { + const int max_tag_nums = 8192; // FIXME(syoyo): Parameterize. + tag_t tag; + + token += 2; + + tag.name = parseString(&token); + + tag_sizes ts = parseTagTriple(&token); + + if (ts.num_ints < 0) { + ts.num_ints = 0; + } + if (ts.num_ints > max_tag_nums) { + ts.num_ints = max_tag_nums; + } + + if (ts.num_reals < 0) { + ts.num_reals = 0; + } + if (ts.num_reals > max_tag_nums) { + ts.num_reals = max_tag_nums; + } + + if (ts.num_strings < 0) { + ts.num_strings = 0; + } + if (ts.num_strings > max_tag_nums) { + ts.num_strings = max_tag_nums; + } + + tag.intValues.resize(static_cast(ts.num_ints)); + + for (size_t i = 0; i < static_cast(ts.num_ints); ++i) { + tag.intValues[i] = parseInt(&token); + } + + tag.floatValues.resize(static_cast(ts.num_reals)); + for (size_t i = 0; i < static_cast(ts.num_reals); ++i) { + tag.floatValues[i] = parseReal(&token); + } + + tag.stringValues.resize(static_cast(ts.num_strings)); + for (size_t i = 0; i < static_cast(ts.num_strings); ++i) { + tag.stringValues[i] = parseString(&token); + } + + tags.push_back(tag); + + continue; + } + + if (token[0] == 's' && IS_SPACE(token[1])) { + // smoothing group id + token += 2; + + // skip space. + token += strspn(token, " \t"); // skip space + + if (token[0] == '\0') { + continue; + } + + if (token[0] == '\r' || token[1] == '\n') { + continue; + } + + if (strlen(token) >= 3 && token[0] == 'o' && token[1] == 'f' && + token[2] == 'f') { + current_smoothing_id = 0; + } else { + // assume number + int smGroupId = parseInt(&token); + if (smGroupId < 0) { + // parse error. force set to 0. + // FIXME(syoyo): Report warning. + current_smoothing_id = 0; + } else { + current_smoothing_id = static_cast(smGroupId); + } + } + + continue; + } // smoothing group id + + // Ignore unknown command. + } + + // not all vertices have colors, no default colors desired? -> clear colors + if (!found_all_colors && !default_vcols_fallback) { + vc.clear(); + } + + if (greatest_v_idx >= static_cast(v.size() / 3)) { + if (warn) { + std::stringstream ss; + ss << "Vertex indices out of bounds (line " << line_num << ".)\n\n"; + (*warn) += ss.str(); + } + } + if (greatest_vn_idx >= static_cast(vn.size() / 3)) { + if (warn) { + std::stringstream ss; + ss << "Vertex normal indices out of bounds (line " << line_num << ".)\n\n"; + (*warn) += ss.str(); + } + } + if (greatest_vt_idx >= static_cast(vt.size() / 2)) { + if (warn) { + std::stringstream ss; + ss << "Vertex texcoord indices out of bounds (line " << line_num << ".)\n\n"; + (*warn) += ss.str(); + } + } + + bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name, + triangulate, v, warn); + // exportGroupsToShape return false when `usemtl` is called in the last + // line. + // we also add `shape` to `shapes` when `shape.mesh` has already some + // faces(indices) + if (ret || shape.mesh.indices + .size()) { // FIXME(syoyo): Support other prims(e.g. lines) + shapes->push_back(shape); + } + prim_group.clear(); // for safety + + if (err) { + (*err) += errss.str(); + } + + attrib->vertices.swap(v); + attrib->vertex_weights.swap(v); + attrib->normals.swap(vn); + attrib->texcoords.swap(vt); + attrib->texcoord_ws.swap(vt); + attrib->colors.swap(vc); + attrib->skin_weights.swap(vw); + + return true; +} + +bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback, + void *user_data /*= NULL*/, + MaterialReader *readMatFn /*= NULL*/, + std::string *warn, /* = NULL*/ + std::string *err /*= NULL*/) { + std::stringstream errss; + + // material + std::set material_filenames; + std::map material_map; + int material_id = -1; // -1 = invalid + + std::vector indices; + std::vector materials; + std::vector names; + names.reserve(2); + std::vector names_out; + + std::string linebuf; + while (inStream.peek() != -1) { + safeGetline(inStream, linebuf); + + // Trim newline '\r\n' or '\n' + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\n') + linebuf.erase(linebuf.size() - 1); + } + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\r') + linebuf.erase(linebuf.size() - 1); + } + + // Skip if empty line. + if (linebuf.empty()) { + continue; + } + + // Skip leading space. + const char *token = linebuf.c_str(); + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') continue; // empty line + + if (token[0] == '#') continue; // comment line + + // vertex + if (token[0] == 'v' && IS_SPACE((token[1]))) { + token += 2; + // TODO(syoyo): Support parsing vertex color extension. + real_t x, y, z, w; // w is optional. default = 1.0 + parseV(&x, &y, &z, &w, &token); + if (callback.vertex_cb) { + callback.vertex_cb(user_data, x, y, z, w); + } + continue; + } + + // normal + if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { + token += 3; + real_t x, y, z; + parseReal3(&x, &y, &z, &token); + if (callback.normal_cb) { + callback.normal_cb(user_data, x, y, z); + } + continue; + } + + // texcoord + if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { + token += 3; + real_t x, y, z; // y and z are optional. default = 0.0 + parseReal3(&x, &y, &z, &token); + if (callback.texcoord_cb) { + callback.texcoord_cb(user_data, x, y, z); + } + continue; + } + + // face + if (token[0] == 'f' && IS_SPACE((token[1]))) { + token += 2; + token += strspn(token, " \t"); + + indices.clear(); + while (!IS_NEW_LINE(token[0])) { + vertex_index_t vi = parseRawTriple(&token); + + index_t idx; + idx.vertex_index = vi.v_idx; + idx.normal_index = vi.vn_idx; + idx.texcoord_index = vi.vt_idx; + + indices.push_back(idx); + size_t n = strspn(token, " \t\r"); + token += n; + } + + if (callback.index_cb && indices.size() > 0) { + callback.index_cb(user_data, &indices.at(0), + static_cast(indices.size())); + } + + continue; + } + + // use mtl + if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) { + token += 7; + std::stringstream ss; + ss << token; + std::string namebuf = ss.str(); + + int newMaterialId = -1; + std::map::const_iterator it = + material_map.find(namebuf); + if (it != material_map.end()) { + newMaterialId = it->second; + } else { + // { warn!! material not found } + if (warn && (!callback.usemtl_cb)) { + (*warn) += "material [ " + namebuf + " ] not found in .mtl\n"; + } + } + + if (newMaterialId != material_id) { + material_id = newMaterialId; + } + + if (callback.usemtl_cb) { + callback.usemtl_cb(user_data, namebuf.c_str(), material_id); + } + + continue; + } + + // load mtl + if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { + if (readMatFn) { + token += 7; + + std::vector filenames; + SplitString(std::string(token), ' ', '\\', filenames); + + if (filenames.empty()) { + if (warn) { + (*warn) += + "Looks like empty filename for mtllib. Use default " + "material. \n"; + } + } else { + bool found = false; + for (size_t s = 0; s < filenames.size(); s++) { + if (material_filenames.count(filenames[s]) > 0) { + found = true; + continue; + } + + std::string warn_mtl; + std::string err_mtl; + bool ok = (*readMatFn)(filenames[s].c_str(), &materials, + &material_map, &warn_mtl, &err_mtl); + + if (warn && (!warn_mtl.empty())) { + (*warn) += warn_mtl; // This should be warn message. + } + + if (err && (!err_mtl.empty())) { + (*err) += err_mtl; + } + + if (ok) { + found = true; + material_filenames.insert(filenames[s]); + break; + } + } + + if (!found) { + if (warn) { + (*warn) += + "Failed to load material file(s). Use default " + "material.\n"; + } + } else { + if (callback.mtllib_cb) { + callback.mtllib_cb(user_data, &materials.at(0), + static_cast(materials.size())); + } + } + } + } + + continue; + } + + // group name + if (token[0] == 'g' && IS_SPACE((token[1]))) { + names.clear(); + + while (!IS_NEW_LINE(token[0])) { + std::string str = parseString(&token); + names.push_back(str); + token += strspn(token, " \t\r"); // skip tag + } + + assert(names.size() > 0); + + if (callback.group_cb) { + if (names.size() > 1) { + // create const char* array. + names_out.resize(names.size() - 1); + for (size_t j = 0; j < names_out.size(); j++) { + names_out[j] = names[j + 1].c_str(); + } + callback.group_cb(user_data, &names_out.at(0), + static_cast(names_out.size())); + + } else { + callback.group_cb(user_data, NULL, 0); + } + } + + continue; + } + + // object name + if (token[0] == 'o' && IS_SPACE((token[1]))) { + // @todo { multiple object name? } + token += 2; + + std::stringstream ss; + ss << token; + std::string object_name = ss.str(); + + if (callback.object_cb) { + callback.object_cb(user_data, object_name.c_str()); + } + + continue; + } + +#if 0 // @todo + if (token[0] == 't' && IS_SPACE(token[1])) { + tag_t tag; + + token += 2; + std::stringstream ss; + ss << token; + tag.name = ss.str(); + + token += tag.name.size() + 1; + + tag_sizes ts = parseTagTriple(&token); + + tag.intValues.resize(static_cast(ts.num_ints)); + + for (size_t i = 0; i < static_cast(ts.num_ints); ++i) { + tag.intValues[i] = atoi(token); + token += strcspn(token, "/ \t\r") + 1; + } + + tag.floatValues.resize(static_cast(ts.num_reals)); + for (size_t i = 0; i < static_cast(ts.num_reals); ++i) { + tag.floatValues[i] = parseReal(&token); + token += strcspn(token, "/ \t\r") + 1; + } + + tag.stringValues.resize(static_cast(ts.num_strings)); + for (size_t i = 0; i < static_cast(ts.num_strings); ++i) { + std::stringstream ss; + ss << token; + tag.stringValues[i] = ss.str(); + token += tag.stringValues[i].size() + 1; + } + + tags.push_back(tag); + } +#endif + + // Ignore unknown command. + } + + if (err) { + (*err) += errss.str(); + } + + return true; +} + +bool ObjReader::ParseFromFile(const std::string &filename, + const ObjReaderConfig &config) { + std::string mtl_search_path; + + if (config.mtl_search_path.empty()) { + // + // split at last '/'(for unixish system) or '\\'(for windows) to get + // the base directory of .obj file + // + size_t pos = filename.find_last_of("/\\"); + if (pos != std::string::npos) { + mtl_search_path = filename.substr(0, pos); + } + } else { + mtl_search_path = config.mtl_search_path; + } + + valid_ = LoadObj(&attrib_, &shapes_, &materials_, &warning_, &error_, + filename.c_str(), mtl_search_path.c_str(), + config.triangulate, config.vertex_color); + + return valid_; +} + +bool ObjReader::ParseFromString(const std::string &obj_text, + const std::string &mtl_text, + const ObjReaderConfig &config) { + std::stringbuf obj_buf(obj_text); + std::stringbuf mtl_buf(mtl_text); + + std::istream obj_ifs(&obj_buf); + std::istream mtl_ifs(&mtl_buf); + + MaterialStreamReader mtl_ss(mtl_ifs); + + valid_ = LoadObj(&attrib_, &shapes_, &materials_, &warning_, &error_, + &obj_ifs, &mtl_ss, config.triangulate, config.vertex_color); + + return valid_; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} // namespace tinyobj + +#endif diff --git a/src/utilities.h b/src/utilities.h index d459e33..ab0a4ab 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -11,9 +11,24 @@ #define PI 3.1415926535897932384626422832795028841971f #define TWO_PI 6.2831853071795864769252867665590057683943f +#define PI_OVER_TWO 1.5707963267948966192313216916397514420985f +#define PI_OVER_FOUR 0.7853981633974483096156608458198757210492f #define SQRT_OF_ONE_THIRD 0.5773502691896257645091487805019574556476f #define EPSILON 0.00001f +#define ENABLE 1 +#define DISABLE 0 + +#define RUSSIAN_ROULETTE ENABLE +#define CONVERT_TO_SRGB ENABLE + +#define BB_CULLING DISABLE +#define USE_LBVH ENABLE +#define USE_BVH DISABLE +#define USE_BVH_MIDPOINT ENABLE +#define USE_BVH_SAH DISABLE +#define DISPLAY_HEATMAP DISABLE + class GuiDataContainer { public: