diff --git a/README.md b/README.md index 41b91f0..320ef65 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,40 @@ CUDA Rasterizer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Name: Bowen Yang + * [LinkedIn](https://www.linkedin.com/in/%E5%8D%9A%E6%96%87-%E6%9D%A8-83bba6148) + * [GitHub](https://github.com/Grillnov) + * [Facebook](https://www.facebook.com/yang.bowen.7399) + * [Steam](https://steamcommunity.com/id/grillnov) +* Tested on: Windows 10 x64, i7-6800K @ 3.40GHz 32GB, GTX 1080 8GB (Personal computer at home) -### (TODO: Your README) +# Description -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +Implement a CUDA rasterizer. +# Part 1: The final produce + +![](img/Plain.png) + +I love this little milk truck, especially with bilinear texture filtering. + +# Part 2: Rendering only normal + +![](img/Normal.png) + +The normal vectors are parsed as colors and shown directly. + +# Part 3: Only lines + +![](img/Lineframe.png) + +Line primitives are rasterized. + +# Part 4: Point cloud + +![](img/Pointcloud.png) + +Points are rasterized as point clouds. ### Credits diff --git a/img/Lineframe.png b/img/Lineframe.png new file mode 100644 index 0000000..3b2e310 Binary files /dev/null and b/img/Lineframe.png differ diff --git a/img/Normal.png b/img/Normal.png new file mode 100644 index 0000000..16be119 Binary files /dev/null and b/img/Normal.png differ diff --git a/img/Plain.png b/img/Plain.png new file mode 100644 index 0000000..dc38c45 Binary files /dev/null and b/img/Plain.png differ diff --git a/img/Pointcloud.png b/img/Pointcloud.png new file mode 100644 index 0000000..5f205b6 Binary files /dev/null and b/img/Pointcloud.png differ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a57f69f..d9247c3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,5 +6,5 @@ set(SOURCE_FILES cuda_add_library(src ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_60 ) diff --git a/src/rasterize.cu b/src/rasterize.cu index 1262a09..51d4af4 100644 --- a/src/rasterize.cu +++ b/src/rasterize.cu @@ -18,6 +18,14 @@ #include #include +#define DISPLAY_TRIANGLE true +#define DISPLAY_LINE false +#define DISPLAY_POINT false + +#define CULL_FACE true +#define DISPLAY_NORMAL false +#define MUTEX_ON false + namespace { typedef unsigned short VertexIndex; @@ -28,7 +36,7 @@ namespace { typedef unsigned char BufferByte; - enum PrimitiveType{ + enum PrimitiveType { Point = 1, Line = 2, Triangle = 3 @@ -41,13 +49,13 @@ namespace { // The attributes listed below might be useful, // but always feel free to modify on your own - glm::vec3 eyePos; // eye space position used for shading - glm::vec3 eyeNor; // eye space normal used for shading, cuz normal will go wrong after perspective transformation - // glm::vec3 col; - glm::vec2 texcoord0; - TextureData* dev_diffuseTex = NULL; - // int texWidth, texHeight; - // ... + glm::vec3 eyePos; + glm::vec3 eyeNor; + + glm::vec2 texcoord0; + TextureData* dev_diffuseTex = NULL; + int texWidth; + int texHeight; }; struct Primitive { @@ -62,11 +70,12 @@ namespace { // The attributes listed below might be useful, // but always feel free to modify on your own - // glm::vec3 eyePos; // eye space position used for shading - // glm::vec3 eyeNor; - // VertexAttributeTexcoord texcoord0; - // TextureData* dev_diffuseTex; - // ... + glm::vec3 eyePos; + glm::vec3 eyeNor; + VertexAttributeTexcoord texcoord0; + TextureData* dev_diffuseTex; + int texWidth; + int texHeight; }; struct PrimitiveDevBufPointers { @@ -95,12 +104,10 @@ namespace { // TODO: add more attributes when needed }; - } static std::map> mesh2PrimitivesMap; - static int width = 0; static int height = 0; @@ -108,69 +115,145 @@ static int totalNumPrimitives = 0; static Primitive *dev_primitives = NULL; static Fragment *dev_fragmentBuffer = NULL; static glm::vec3 *dev_framebuffer = NULL; - -static int * dev_depth = NULL; // you might need this buffer when doing depth test +static unsigned int *dev_mutex = NULL; +static float *dev_depth = NULL; /** - * Kernel that writes the image to the OpenGL PBO directly. - */ -__global__ +* Kernel that writes the image to the OpenGL PBO directly. +*/ +__global__ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - int index = x + (y * w); - - if (x < w && y < h) { - glm::vec3 color; - color.x = glm::clamp(image[index].x, 0.0f, 1.0f) * 255.0; - color.y = glm::clamp(image[index].y, 0.0f, 1.0f) * 255.0; - color.z = glm::clamp(image[index].z, 0.0f, 1.0f) * 255.0; - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * w); + + if (x < w && y < h) { + glm::vec3 color; + color.x = glm::clamp(image[index].x, 0.0f, 1.0f) * 255.0; + color.y = glm::clamp(image[index].y, 0.0f, 1.0f) * 255.0; + color.z = glm::clamp(image[index].z, 0.0f, 1.0f) * 255.0; + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } -/** +/** * Writes fragment colors to the framebuffer */ + +static __host__ __device__ glm::vec3 lerp(const glm::vec3 &x, const glm::vec3 &y, const float &alpha) +{ + return (1.0f - alpha) * x + alpha * y; +} + __global__ void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - int index = x + (y * w); + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * w); - if (x < w && y < h) { - framebuffer[index] = fragmentBuffer[index].color; + if (x < w && y < h) { + framebuffer[index] = fragmentBuffer[index].color; // TODO: add your fragment shader code here + int index = x + (y * w); + glm::vec3 &buffer = framebuffer[index]; + const Fragment &frag = fragmentBuffer[index]; + const glm::vec3 lightDir(50.f, 50.f, 100.f); - } + // clear frame buffer + buffer = glm::vec3(0.f); + + const glm::vec3 L = glm::normalize(lightDir - frag.eyePos); + const glm::vec3 &N = frag.eyeNor; + const glm::vec3 V = glm::normalize(-frag.eyePos); + const glm::vec3 H = glm::normalize(V + L); + + glm::vec3 texelFetched = glm::vec3(255.0f, 255.0f, 255.0f); + + if (frag.dev_diffuseTex != NULL) + { + int texW = frag.texWidth; + int texH = frag.texHeight; + + float x = (texW - 1.0f) * fragmentBuffer[index].texcoord0[0]; + float y = (texH - 1.0f) * fragmentBuffer[index].texcoord0[1]; + + int xLower = std::floor(x); + int yLower = std::floor(y); + int xUpper = (xLower + 1) % texW; + int yUpper = (yLower + 1) % texH; + int idxLowerLeft = xLower + yLower * texW; + int idxLowerRight = xUpper + yLower * texW; + int idxUpperLeft = xLower + yUpper * texW; + int idxUpperRight = xUpper + yUpper * texW; + + glm::vec3 lowerLeftTexel = glm::vec3(frag.dev_diffuseTex[idxLowerLeft * 3], + frag.dev_diffuseTex[idxLowerLeft * 3 + 1], frag.dev_diffuseTex[idxLowerLeft * 3 + 2]); + glm::vec3 lowerRightTexel = glm::vec3(frag.dev_diffuseTex[idxLowerRight * 3], + frag.dev_diffuseTex[idxLowerRight * 3 + 1], frag.dev_diffuseTex[idxLowerRight * 3 + 2]); + glm::vec3 upperLeftTexel = glm::vec3(frag.dev_diffuseTex[idxUpperLeft * 3], + frag.dev_diffuseTex[idxUpperLeft * 3 + 1], frag.dev_diffuseTex[idxUpperLeft * 3 + 2]); + glm::vec3 upperRightTexel = glm::vec3(frag.dev_diffuseTex[idxUpperRight * 3], + frag.dev_diffuseTex[idxUpperRight * 3 + 1], frag.dev_diffuseTex[idxUpperRight * 3 + 2]); + //Coefs for bilinear filtering + float alphaX = x - static_cast(xLower); + float alphaY = y - static_cast(yLower); + + glm::vec3 lowerBelt = lerp(lowerLeftTexel, lowerRightTexel, alphaX); + glm::vec3 upperBelt = lerp(upperLeftTexel, upperRightTexel, alphaX); + + texelFetched = lerp(lowerBelt, upperBelt, alphaY); + + } + + const float factor = 1.0f / 255.0f; + glm::vec3 colorNormalized = factor * texelFetched; + if (DISPLAY_TRIANGLE) + { + buffer = glm::max(0.f, glm::dot(L, N)) * colorNormalized; + } + if (DISPLAY_NORMAL) + { + glm::vec3 norm = frag.eyeNor; + buffer = glm::normalize(norm); + } + if (DISPLAY_POINT || DISPLAY_LINE) + { + buffer = frag.color; + } + + buffer += glm::vec3(pow(glm::max(0.f, glm::dot(N, H)), 128.0f)); + //buffer = glm::normalize(buffer); + } } /** - * Called once at the beginning of the program to allocate memory. - */ +* Called once at the beginning of the program to allocate memory. +*/ void rasterizeInit(int w, int h) { - width = w; - height = h; + width = w; + height = h; cudaFree(dev_fragmentBuffer); cudaMalloc(&dev_fragmentBuffer, width * height * sizeof(Fragment)); cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); - cudaFree(dev_framebuffer); - cudaMalloc(&dev_framebuffer, width * height * sizeof(glm::vec3)); - cudaMemset(dev_framebuffer, 0, width * height * sizeof(glm::vec3)); - - cudaFree(dev_depth); - cudaMalloc(&dev_depth, width * height * sizeof(int)); + cudaFree(dev_framebuffer); + cudaMalloc(&dev_framebuffer, width * height * sizeof(glm::vec3)); + cudaMemset(dev_framebuffer, 0, width * height * sizeof(glm::vec3)); + cudaFree(dev_depth); + cudaMalloc(&dev_depth, width * height * sizeof(float)); + //==================================== + cudaMalloc(&dev_mutex, width * height * sizeof(unsigned int)); + cudaMemset(dev_mutex, 0, width * height * sizeof(unsigned int)); checkCUDAError("rasterizeInit"); } __global__ -void initDepth(int w, int h, int * depth) +void initDepth(int w, int h, float * depth) { int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; @@ -187,9 +270,9 @@ void initDepth(int w, int h, int * depth) * kern function with support for stride to sometimes replace cudaMemcpy * One thread is responsible for copying one component */ -__global__ +__global__ void _deviceBufferCopy(int N, BufferByte* dev_dst, const BufferByte* dev_src, int n, int byteStride, int byteOffset, int componentTypeByteSize) { - + // Attribute (vec3 position) // component (3 * float) // byte (4 * byte) @@ -202,20 +285,20 @@ void _deviceBufferCopy(int N, BufferByte* dev_dst, const BufferByte* dev_src, in int offset = i - count * n; // which component of the attribute for (int j = 0; j < componentTypeByteSize; j++) { - - dev_dst[count * componentTypeByteSize * n - + offset * componentTypeByteSize + + dev_dst[count * componentTypeByteSize * n + + offset * componentTypeByteSize + j] - = + = - dev_src[byteOffset - + count * (byteStride == 0 ? componentTypeByteSize * n : byteStride) - + offset * componentTypeByteSize + dev_src[byteOffset + + count * (byteStride == 0 ? componentTypeByteSize * n : byteStride) + + offset * componentTypeByteSize + j]; } } - + } @@ -235,7 +318,7 @@ void _nodeMatrixTransform( } glm::mat4 getMatrixFromNodeMatrixVector(const tinygltf::Node & n) { - + glm::mat4 curMatrix(1.0); const std::vector &m = n.matrix; @@ -247,7 +330,8 @@ glm::mat4 getMatrixFromNodeMatrixVector(const tinygltf::Node & n) { curMatrix[i][j] = (float)m.at(4 * i + j); } } - } else { + } + else { // no matrix, use rotation, scale, translation if (n.translation.size() > 0) { @@ -275,12 +359,12 @@ glm::mat4 getMatrixFromNodeMatrixVector(const tinygltf::Node & n) { return curMatrix; } -void traverseNode ( +void traverseNode( std::map & n2m, const tinygltf::Scene & scene, const std::string & nodeString, const glm::mat4 & parentMatrix - ) +) { const tinygltf::Node & n = scene.nodes.at(nodeString); glm::mat4 M = parentMatrix * getMatrixFromNodeMatrixVector(n); @@ -537,7 +621,7 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) { size_t s = image.image.size() * sizeof(TextureData); cudaMalloc(&dev_diffuseTex, s); cudaMemcpy(dev_diffuseTex, &image.image.at(0), s, cudaMemcpyHostToDevice); - + diffuseTexWidth = image.width; diffuseTexHeight = image.height; @@ -554,7 +638,7 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) { // ---------Node hierarchy transform-------- cudaDeviceSynchronize(); - + dim3 numBlocksNodeTransform((numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); _nodeMatrixTransform << > > ( numVertices, @@ -595,21 +679,21 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) { } // for each node } - + // 3. Malloc for dev_primitives { cudaMalloc(&dev_primitives, totalNumPrimitives * sizeof(Primitive)); } - + // Finally, cudaFree raw dev_bufferViews { std::map::const_iterator it(bufferViewDevPointers.begin()); std::map::const_iterator itEnd(bufferViewDevPointers.end()); - - //bufferViewDevPointers + + //bufferViewDevPointers for (; it != itEnd; it++) { cudaFree(it->second); @@ -623,25 +707,39 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) { -__global__ +__global__ void _vertexTransformAndAssembly( - int numVertices, - PrimitiveDevBufPointers primitive, - glm::mat4 MVP, glm::mat4 MV, glm::mat3 MV_normal, + int numVertices, + PrimitiveDevBufPointers primitive, + glm::mat4 MVP, glm::mat4 MV, glm::mat3 MV_normal, int width, int height) { // vertex id int vid = (blockIdx.x * blockDim.x) + threadIdx.x; - if (vid < numVertices) { - + VertexOut &out = primitive.dev_verticesOut[vid]; + const glm::vec3 &pos = primitive.dev_position[vid]; + const glm::vec3 &nor = primitive.dev_normal[vid]; + if (vid < numVertices) + { // TODO: Apply vertex transformation here // Multiply the MVP matrix for each vertex position, this will transform everything into clipping space // Then divide the pos by its w element to transform into NDC space // Finally transform x and y to viewport space - + out.pos = MVP * glm::vec4(pos, 1.f); + out.pos /= out.pos.w; + out.pos.x = (1.f - out.pos.x) * .5f * width; + out.pos.y = (1.f - out.pos.y) * .5f * height; // TODO: Apply vertex assembly here // Assemble all attribute arraies into the primitive array - + out.eyePos = multiplyMV(MV, glm::vec4(pos, 1.f)); + out.eyeNor = glm::normalize(MV_normal * nor); + if (primitive.dev_texcoord0 != NULL) + { + out.texcoord0 = primitive.dev_texcoord0[vid]; + } + out.dev_diffuseTex = primitive.dev_diffuseTex; + out.texWidth = primitive.diffuseTexWidth; + out.texHeight = primitive.diffuseTexHeight; } } @@ -649,7 +747,7 @@ void _vertexTransformAndAssembly( static int curPrimitiveBeginId = 0; -__global__ +__global__ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_primitives, PrimitiveDevBufPointers primitive) { // index id @@ -660,86 +758,368 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_ // TODO: uncomment the following code for a start // This is primitive assembly for triangles - //int pid; // id for cur primitives vector - //if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { - // pid = iid / (int)primitive.primitiveType; - // dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] - // = primitive.dev_verticesOut[primitive.dev_indices[iid]]; - //} + int pid; + + if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) + { + pid = iid / (int)primitive.primitiveType; + dev_primitives[pid + curPrimitiveBeginId].primitiveType = Triangle; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] + = primitive.dev_verticesOut[primitive.dev_indices[iid]]; + } + else if (primitive.primitiveMode == TINYGLTF_MODE_LINE) + { + pid = iid / (int)primitive.primitiveType; + dev_primitives[pid + curPrimitiveBeginId].primitiveType = Line; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] + = primitive.dev_verticesOut[primitive.dev_indices[iid]]; + } + else if (primitive.primitiveMode == TINYGLTF_MODE_POINTS) + { + pid = iid / (int)primitive.primitiveType; + dev_primitives[pid + curPrimitiveBeginId].primitiveType = Point; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] + = primitive.dev_verticesOut[primitive.dev_indices[iid]]; + } + } +} + +__host__ __device__ glm::vec2 getVec2AtCoord(const glm::vec3 &baryCoord, const glm::vec2 input[3]) +{ + return baryCoord.x * input[0] + baryCoord.y * input[1] + baryCoord.z * input[2]; +} +__host__ __device__ float getFloatAtCoord(const glm::vec3 barycentricCoord, const float input[3]) +{ + return barycentricCoord.x * input[0] + barycentricCoord.y * input[1] + barycentricCoord.z * input[2]; +} +__host__ __device__ glm::vec3 getVec3AtCoord(const glm::vec3 barycentricCoord, const glm::vec3 input[3]) +{ + return barycentricCoord.x * input[0] + barycentricCoord.y * input[1] + barycentricCoord.z * input[2]; +} +__host__ __device__ glm::vec2 getTexcoordAtCoord(const glm::vec3 &baryCoord, + const glm::vec2 _texcoord[3], const float triDepth_1[3]) +{ + const glm::vec2 texcoord[3] = + { + _texcoord[0] * triDepth_1[0], + _texcoord[1] * triDepth_1[1], + _texcoord[2] * triDepth_1[2] + }; + const glm::vec2 numerator = getVec2AtCoord(baryCoord, texcoord); + const float denomenator = getFloatAtCoord(baryCoord, triDepth_1); + + return numerator / denomenator; +} +__global__ +void rasterizePrimitive(int totalNumPrimitives, Primitive *dev_primitives, + Fragment *dev_fragmentBuffer, float *dev_depth, int width, int height, + unsigned int *dev_mutex) +{ + int idx = blockDim.x * blockIdx.x + threadIdx.x; - // TODO: other primitive types (point, line) + if (idx >= totalNumPrimitives) + { + return; + } + + const Primitive &primitive = dev_primitives[idx]; + const glm::vec3 tri[3] = + { + glm::vec3(primitive.v[0].pos), + glm::vec3(primitive.v[1].pos), + glm::vec3(primitive.v[2].pos), + }; + const glm::vec3 eyePos[3] = + { + primitive.v[0].eyePos, + primitive.v[1].eyePos, + primitive.v[2].eyePos + }; + const glm::vec3 eyeNor[3] = + { + primitive.v[0].eyeNor, + primitive.v[1].eyeNor, + primitive.v[2].eyeNor + }; + const glm::vec2 texcoord0[3] = + { + primitive.v[0].texcoord0, + primitive.v[1].texcoord0, + primitive.v[2].texcoord0 + }; + const float triDepth_1[3] = + { + 1.f / tri[0].z, + 1.f / tri[1].z, + 1.f / tri[2].z + }; + + if (primitive.primitiveType == Triangle) + { + TextureData *pDiffuseTexData = primitive.v[0].dev_diffuseTex; + int diffuseTexWidth = 0; + int diffuseTexHeight = 0; + + if (CULL_FACE) + { + if (calculateSignedArea(tri) >= 0.f) + { + return; + } + } + + if (pDiffuseTexData != NULL) + { + diffuseTexWidth = primitive.v[0].texWidth; + diffuseTexHeight = primitive.v[0].texHeight; + } + + const AABB aabb = getAABBForTriangle(tri); + + const int left = glm::max(0, (int)aabb.min.x - 1); + const int right = glm::min(width, (int)aabb.max.x + 1); + const int bottom = glm::max(0, (int)aabb.min.y - 1); + const int top = glm::min(height, (int)aabb.max.y + 1); + + if (left >= right || bottom >= top) + { + return; + } + + for (int i = left; i < right; ++i) + { + for (int j = bottom; j < top; ++j) + { + const int pixelId = i + j * width; + const glm::vec2 p(i + .5f, j + .5f); + const glm::vec3 baryCoord = calculateBarycentricCoordinate(tri, p); + + // outsides triangle + if (!isBarycentricCoordInBounds(baryCoord)) continue; + + const float z = getZAtCoordinate(baryCoord, eyePos); + // too far or too near + //if (z < 0.f || z > 1.f) continue; + + // depth test, account for race condition when accessing depth buffer + const float depth = z; + bool isOccluded = true; + + if (MUTEX_ON) + { + bool isSet = false; + while (!isSet) + { + isSet = atomicCAS(&dev_mutex[pixelId], 0, 1) == 0; + if (isSet) + { + if (dev_depth[pixelId] > depth) + { + dev_depth[pixelId] = depth; + isOccluded = false; + } + dev_mutex[pixelId] = 0; + } + } + } + else + { + if (dev_depth[pixelId] > depth) + { + dev_depth[pixelId] = depth; + isOccluded = false; + } + } + + if (isOccluded) + { + continue; + } + + Fragment &fragment = dev_fragmentBuffer[pixelId]; + fragment.eyePos = getVec3AtCoord(baryCoord, eyePos); + fragment.eyeNor = glm::normalize(getVec3AtCoord(baryCoord, eyeNor)); + fragment.texcoord0 = getTexcoordAtCoord(baryCoord, texcoord0, triDepth_1); + if (pDiffuseTexData != NULL) + { + fragment.dev_diffuseTex = pDiffuseTexData; + fragment.texWidth = diffuseTexWidth; + fragment.texHeight = diffuseTexHeight; + } + } + } + } +} + + +__host__ __device__ +void rasterizeLineHelper(const glm::vec3 &a, const glm::vec3 &b, + Fragment *dev_fragmentBuffer, int w, int h) +{ + const int left = glm::max(0, (int)glm::min(a.x, b.x)); + const int right = glm::min(w, (int)glm::max(a.x, b.x) + 1); + const int bottom = glm::max(0, (int)glm::min(a.y, b.y)); + const int top = glm::min(h, (int)glm::max(a.y, b.y) + 1); + + // outsides window + if (left >= right && bottom >= top) + { + return; + } + + int begin = right - left >= top - bottom ? left : bottom; + int end = right - left >= top - bottom ? right : top; + + for (int i = 0; i <= end - begin; ++i) + { + const glm::vec3 p = lerp(a, b, (i + 0.0f) / (end - begin)); + const int index = (int)p.x + (int)p.y * w; + + dev_fragmentBuffer[index].color = glm::vec3(1.0f, 1.0f, 1.0f); } - } +__global__ +void rasterizeLine(int totalNumPrimitives, Primitive *dev_primitives, + Fragment *dev_fragmentBuffer, int w, int h) +{ + int idx = blockDim.x * blockIdx.x + threadIdx.x; + + if (idx >= totalNumPrimitives) + { + return; + } + + const Primitive &primitive = dev_primitives[idx]; + const glm::vec3 &v0 = glm::vec3(primitive.v[0].pos); + const glm::vec3 &v1 = glm::vec3(primitive.v[1].pos); + const glm::vec3 &v2 = glm::vec3(primitive.v[2].pos); + + rasterizeLineHelper(v0, v1, dev_fragmentBuffer, w, h); + rasterizeLineHelper(v1, v2, dev_fragmentBuffer, w, h); + rasterizeLineHelper(v2, v0, dev_fragmentBuffer, w, h); +} + +__global__ +void rasterizePoint(int totalNumPrimitives, Primitive *dev_primitives, + Fragment *dev_fragmentBuffer, int w, int h) +{ + int idx = blockDim.x * blockIdx.x + threadIdx.x; + + if (idx >= totalNumPrimitives) + { + return; + } + + const Primitive &primitive = dev_primitives[idx]; + const glm::vec3 tri[3] = + { + glm::vec3(primitive.v[0].pos), + glm::vec3(primitive.v[1].pos), + glm::vec3(primitive.v[2].pos) + }; + for (int i = 0; i < 3; ++i) + { + const int x = (int)tri[i].x; + const int y = (int)tri[i].y; + const int index = x + y * w; + if (x < 0 || x >= w || y < 0 || y >= h) + { + continue; + } + dev_fragmentBuffer[index].color = glm::vec3(1.0f, 1.0f, 1.0f); + } +} /** - * Perform rasterization. - */ +* Perform rasterization. +*/ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal) { - int sideLength2d = 8; - dim3 blockSize2d(sideLength2d, sideLength2d); - dim3 blockCount2d((width - 1) / blockSize2d.x + 1, + int sideLength2d = 8; + dim3 blockSize2d(sideLength2d, sideLength2d); + dim3 blockCount2d((width - 1) / blockSize2d.x + 1, (height - 1) / blockSize2d.y + 1); - // Execute your rasterization pipeline here // (See README for rasterization pipeline outline.) // Vertex Process & primitive assembly + curPrimitiveBeginId = 0; + dim3 numThreadsPerBlock(128); + + auto it = mesh2PrimitivesMap.begin(); + auto itEnd = mesh2PrimitivesMap.end(); + + float time_vtfaa = 0; + float time_primitive = 0; + + for (; it != itEnd; ++it) { - curPrimitiveBeginId = 0; - dim3 numThreadsPerBlock(128); - - auto it = mesh2PrimitivesMap.begin(); - auto itEnd = mesh2PrimitivesMap.end(); - - for (; it != itEnd; ++it) { - auto p = (it->second).begin(); // each primitive - auto pEnd = (it->second).end(); - for (; p != pEnd; ++p) { - dim3 numBlocksForVertices((p->numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - dim3 numBlocksForIndices((p->numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); - - _vertexTransformAndAssembly << < numBlocksForVertices, numThreadsPerBlock >> >(p->numVertices, *p, MVP, MV, MV_normal, width, height); - checkCUDAError("Vertex Processing"); - cudaDeviceSynchronize(); - _primitiveAssembly << < numBlocksForIndices, numThreadsPerBlock >> > - (p->numIndices, - curPrimitiveBeginId, - dev_primitives, + auto p = (it->second).begin(); // each primitive + auto pEnd = (it->second).end(); + for (; p != pEnd; ++p) + { + dim3 numBlocksForVertices((p->numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + dim3 numBlocksForIndices((p->numIndices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + + _vertexTransformAndAssembly<<>>(p->numVertices, *p, MVP, MV, MV_normal, width, height); + checkCUDAError("Vertex Processing"); + cudaDeviceSynchronize(); + + _primitiveAssembly<<>> + (p->numIndices, + curPrimitiveBeginId, + dev_primitives, *p); - checkCUDAError("Primitive Assembly"); - - curPrimitiveBeginId += p->numPrimitives; - } + checkCUDAError("Primitive Assembly"); + curPrimitiveBeginId += p->numPrimitives; } - - checkCUDAError("Vertex Processing and Primitive Assembly"); } + checkCUDAError("Vertex Processing and Primitive Assembly"); + cudaMemset(dev_fragmentBuffer, 0, width * height * sizeof(Fragment)); - initDepth << > >(width, height, dev_depth); - + initDepth<<>>(width, height, dev_depth); + // TODO: rasterize + dim3 primitiveBlocks((totalNumPrimitives + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x); + if (DISPLAY_TRIANGLE) + { + rasterizePrimitive<<>>( + totalNumPrimitives, dev_primitives, dev_fragmentBuffer, dev_depth, + width, height, dev_mutex); + } + if (DISPLAY_LINE) + { + rasterizeLine<<>>( + totalNumPrimitives, dev_primitives, dev_fragmentBuffer, + width, height); + } + if (DISPLAY_POINT) + { + rasterizePoint<<>>( + totalNumPrimitives, dev_primitives, dev_fragmentBuffer, + width, height); + } + checkCUDAError("rasterize primitive"); + + render<<>>(width, height, dev_fragmentBuffer, dev_framebuffer); - // Copy depthbuffer colors into framebuffer - render << > >(width, height, dev_fragmentBuffer, dev_framebuffer); checkCUDAError("fragment shader"); - // Copy framebuffer into OpenGL buffer for OpenGL previewing - sendImageToPBO<<>>(pbo, width, height, dev_framebuffer); - checkCUDAError("copy render result to pbo"); + // Copy framebuffer into OpenGL buffer for OpenGL previewing + sendImageToPBO<<>>(pbo, width, height, dev_framebuffer); + checkCUDAError("copy render result to pbo"); } /** - * Called once at the end of the program to free CUDA memory. - */ +* Called once at the end of the program to free CUDA memory. +*/ void rasterizeFree() { - // deconstruct primitives attribute/indices device buffer + // deconstruct primitives attribute/indices device buffer auto it(mesh2PrimitivesMap.begin()); auto itEnd(mesh2PrimitivesMap.end()); @@ -753,24 +1133,25 @@ void rasterizeFree() { cudaFree(p->dev_verticesOut); - + //TODO: release other attributes and materials } } //////////// - cudaFree(dev_primitives); - dev_primitives = NULL; + cudaFree(dev_primitives); + dev_primitives = NULL; cudaFree(dev_fragmentBuffer); dev_fragmentBuffer = NULL; - cudaFree(dev_framebuffer); - dev_framebuffer = NULL; + cudaFree(dev_framebuffer); + dev_framebuffer = NULL; cudaFree(dev_depth); dev_depth = NULL; - checkCUDAError("rasterize Free"); -} + cudaFree(dev_mutex); + checkCUDAError("rasterize Free"); +} \ No newline at end of file