|
| 1 | +#include <string> |
| 2 | +#include "core/compiler.h" |
| 3 | +#include "core/lowering/passes/passes.h" |
| 4 | +#include "gtest/gtest.h" |
| 5 | +#include "tests/util/util.h" |
| 6 | +#include "torch/csrc/jit/ir/irparser.h" |
| 7 | + |
| 8 | +TEST(Converters, ATenScaledDotProductAttentionConvertsCorrectly) { |
| 9 | + const auto graph = R"IR( |
| 10 | + graph(%query : Tensor, %key : Tensor, %value : Tensor): |
| 11 | + %none : NoneType = prim::Constant() |
| 12 | + %0 : float = prim::Constant[value=0.]() |
| 13 | + %false : bool = prim::Constant[value=0]() |
| 14 | + %3 : Tensor = aten::scaled_dot_product_attention(%query, %key, %value, %none, %0, %false) |
| 15 | + return (%3))IR"; |
| 16 | + |
| 17 | + auto g = std::make_shared<torch::jit::Graph>(); |
| 18 | + torch::jit::parseIR(graph, &*g); |
| 19 | + |
| 20 | + auto query = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 21 | + auto key = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 22 | + auto value = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 23 | + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); |
| 24 | + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {query, key, value}); |
| 25 | + |
| 26 | + torch_tensorrt::core::lowering::passes::UnpackScaledDotProductAttention(g); |
| 27 | + |
| 28 | + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); |
| 29 | + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {query, key, value}); |
| 30 | + |
| 31 | + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 1e-5)); |
| 32 | +} |
| 33 | + |
| 34 | +TEST(Converters, ATenScaledDotProductAttnMaskFloatConvertsCorrectly) { |
| 35 | + const auto graph = R"IR( |
| 36 | + graph(%query : Tensor, %key : Tensor, %value : Tensor, %attn_mask : Tensor): |
| 37 | + %0 : float = prim::Constant[value=0.]() |
| 38 | + %false : bool = prim::Constant[value=0]() |
| 39 | + %3 : Tensor = aten::scaled_dot_product_attention(%query, %key, %value, %attn_mask, %0, %false) |
| 40 | + return (%3))IR"; |
| 41 | + |
| 42 | + auto g = std::make_shared<torch::jit::Graph>(); |
| 43 | + torch::jit::parseIR(graph, &*g); |
| 44 | + |
| 45 | + auto query = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 46 | + auto key = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 47 | + auto value = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 48 | + auto attn_mask = at::rand({32, 8, 128, 128}, {at::kCUDA}); |
| 49 | + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); |
| 50 | + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {query, key, value, attn_mask}); |
| 51 | + |
| 52 | + torch_tensorrt::core::lowering::passes::UnpackScaledDotProductAttention(g); |
| 53 | + |
| 54 | + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); |
| 55 | + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {query, key, value, attn_mask}); |
| 56 | + |
| 57 | + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 1e-5)); |
| 58 | +} |
| 59 | + |
| 60 | +TEST(Converters, ATenScaledDotProductAttnMaskBoolConvertsCorrectly) { |
| 61 | + const auto graph = R"IR( |
| 62 | + graph(%query : Tensor, %key : Tensor, %value : Tensor, %attn_mask : Tensor): |
| 63 | + %0 : float = prim::Constant[value=0.]() |
| 64 | + %false : bool = prim::Constant[value=0]() |
| 65 | + %3 : Tensor = aten::scaled_dot_product_attention(%query, %key, %value, %attn_mask, %0, %false) |
| 66 | + return (%3))IR"; |
| 67 | + |
| 68 | + auto g = std::make_shared<torch::jit::Graph>(); |
| 69 | + torch::jit::parseIR(graph, &*g); |
| 70 | + |
| 71 | + auto query = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 72 | + auto key = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 73 | + auto value = at::rand({32, 8, 128, 64}, {at::kCUDA}); |
| 74 | + auto attn_mask = at::randint(0, 2, {32, 8, 128, 128}, at::kCUDA).to(at::kBool); |
| 75 | + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); |
| 76 | + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {query, key, value, attn_mask}); |
| 77 | + |
| 78 | + torch_tensorrt::core::lowering::passes::UnpackScaledDotProductAttention(g); |
| 79 | + |
| 80 | + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); |
| 81 | + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {query, key, value, attn_mask}); |
| 82 | + |
| 83 | + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 1e-5)); |
| 84 | +} |
0 commit comments