pytorch
diff --git a/‎.github/workflows/assigner.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/assigner.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/build-test.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/build-test.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/label.yml‎
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/label.yml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/linux-test.yml‎
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/linux-test.yml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/conversion/converters/BUILD‎
100755100644
Lines changed: 1 addition & 0 deletions b/‎core/conversion/converters/BUILD‎
100755100644
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/conversion/converters/impl/internal_ops.cpp‎
Lines changed: 46 additions & 0 deletions b/‎core/conversion/converters/impl/internal_ops.cpp‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/unary.cpp‎
Lines changed: 16 additions & 1 deletion b/‎core/conversion/converters/impl/unary.cpp‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎core/lowering/lowering.cpp‎
Lines changed: 1 addition & 0 deletions b/‎core/lowering/lowering.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/BUILD‎
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/BUILD‎
Lines changed: 1 addition & 0 deletions
@@ -16,6 +16,9 @@ on:
 
 jobs:
   assign:
+    permissions:
+      contents: read
+      pull-requests: write
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
 
@@ -26,6 +26,9 @@ jobs:
 
   build:
     needs: generate-matrix
+    permissions:
+      id-token: write
+      contents: read
     strategy:
       fail-fast: false
       matrix:
@@ -50,9 +53,6 @@ jobs:
       package-name: ${{ matrix.package-name }}
       smoke-test-script: ${{ matrix.smoke-test-script }}
       trigger-event: ${{ github.event_name }}
-    secrets:
-      AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
-      AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}
 
   tests-py-torchscript-fe:
     name: Test torchscript frontend [Python]
 
@@ -10,11 +10,12 @@ on: [pull_request_target]
 
 jobs:
   label:
-
+    permissions:
+      contents: read
+      pull-requests: write
     runs-on: ubuntu-latest
-
     steps:
-    - uses: actions/labeler@v2
+    - uses: actions/labeler@v4
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
         configuration-path: .github/pr-labels.yml
@@ -67,6 +67,7 @@ jobs:
       CU_VERSION: ${{ matrix.desired_cuda }}
       SCRIPT: ${{ inputs.script }}
       RUNNER_TEST_RESULTS_DIR: /tmp/test_results
+      ARCH: ${{ inputs.architecture }}
     name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }}
     runs-on: ${{ matrix.validation_runner }}
     container:
@@ -100,6 +101,8 @@ jobs:
           ref: ${{ inputs.ref }}
           setup-miniconda: ${{ inputs.setup-miniconda }}
           python-version: ${{ env.PYTHON_VERSION }}
+          cuda-version: ${{ env.CU_VERSION }}
+          arch: ${{ env.ARCH }}
       - name: Run Pre-Script with Caching
         if: ${{ inputs.pre-script != '' }}
         uses: ./test-infra/.github/actions/run-script-with-cache
@@ -191,4 +194,4 @@ jobs:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
-  cancel-in-progress: true
+  cancel-in-progress: true
@@ -116,7 +116,7 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") # save the TRT embedd
 These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.
 
 - Bazel 5.2.0
-- Libtorch 2.2.0.dev (latest nightly) (built with CUDA 12.1)
+- Libtorch 2.3.0.dev (latest nightly) (built with CUDA 12.1)
 - CUDA 12.1
 - cuDNN 8.9.5
 - TensorRT 8.6.1
 
@@ -66,6 +66,7 @@ cc_library(
         "impl/einsum.cpp",
         "impl/element_wise.cpp",
         "impl/expand.cpp",
+        "impl/internal_ops.cpp",
         "impl/interpolate.cpp",
         "impl/layer_norm.cpp",
         "impl/linear.cpp",
 
@@ -0,0 +1,46 @@
+#include "core/conversion/converters/converters.h"
+#include "core/util/prelude.h"
+#include "torch/torch.h"
+
+namespace torch_tensorrt {
+namespace core {
+namespace conversion {
+namespace converters {
+namespace impl {
+namespace {
+
+auto linear_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern(
+    {"trt::attn_bias_from_attn_mask(Tensor attn_mask) -> Tensor",
+     [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+       // Converter for internal op used in unpack_scaled_dot_product_attention
+       // We don't have visibility to check types during lowering and can't introduce conditionals so do type specific
+       // specialization here
+       auto in = args[0].ITensorOrFreeze(ctx);
+       auto out = in;
+       if (in->getType() == nvinfer1::DataType::kBOOL) {
+         auto not_layer = ctx->net->addUnary(*in, nvinfer1::UnaryOperation::kNOT);
+         TORCHTRT_CHECK(not_layer, "Unable to create not layer for attn_bias_from_attn_mask");
+         not_layer->setName((util::node_info(n) + "_not").c_str());
+         auto neg_inf = torch::tensor(-std::numeric_limits<float>::infinity());
+         auto neg_inf_itensor = tensor_to_const(ctx, neg_inf);
+         auto prod_layer = add_elementwise(
+             ctx,
+             nvinfer1::ElementWiseOperation::kPROD,
+             not_layer->getOutput(0),
+             neg_inf_itensor,
+             util::node_info(n) + "_mul");
+         auto add_layer = add_elementwise(
+             ctx, nvinfer1::ElementWiseOperation::kSUM, prod_layer->getOutput(0), in, util::node_info(n) + "_add");
+         out = add_layer->getOutput(0);
+       }
+       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+       LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
+       LOG_DEBUG("Output tensor type: " << out_tensor->getType());
+       return true;
+     }});
+} // namespace
+} // namespace impl
+} // namespace converters
+} // namespace conversion
+} // namespace core
+} // namespace torch_tensorrt
@@ -79,6 +79,22 @@ auto logical_not_registration TORCHTRT_UNUSED = RegisterNodeConversionPatterns()
        return true;
      }});
 
+auto sqrt_registration TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern(
+    {"aten::sqrt(Tensor self) -> Tensor", [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+       auto in = args[0].ITensorOrFreeze(ctx);
+       if (in->getType() == nvinfer1::DataType::kINT32) {
+         // unary sqrt layer only supports float inputs
+         in = castITensor(ctx, in, nvinfer1::DataType::kFLOAT, util::node_info(n).c_str());
+       }
+       auto unary_layer = ctx->net->addUnary(*in, nvinfer1::UnaryOperation::kSQRT);
+       TORCHTRT_CHECK(unary_layer, "Unable to create sqrt layer from node: " << *n);
+       unary_layer->setName(util::node_info(n).c_str());
+       unary_layer->setOutputType(0, in->getType());
+       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], unary_layer->getOutput(0));
+       LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
+       return true;
+     }});
+
 auto isfinite_registration TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern(
     {"aten::isfinite(Tensor self) -> Tensor", [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
        auto in = args[0].ITensorOrFreeze(ctx);
@@ -126,7 +142,6 @@ convert(atan, kATAN);
 convert(floor, kFLOOR);
 convert(log, kLOG);
 convert(ceil, kCEIL);
-convert(sqrt, kSQRT);
 convert(exp, kEXP);
 convert(neg, kNEG);
 convert(erf, kERF);
 
@@ -146,6 +146,7 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, std::vector<torch::jit::I
   if (lower_info.converting_to_trt_engine) {
     passes::RemoveCollectionCast(g);
   }
+  passes::UnpackScaledDotProductAttention(g);
   passes::UnpackAndCastMaskedFill(g, lower_info.getGPUDeviceString());
   passes::UnpackAndCastNumToTensor(g, lower_info.getGPUDeviceString());
   passes::UnpackAndCastFull(g, lower_info.getGPUDeviceString());
 
@@ -38,6 +38,7 @@ cc_library(
         "unpack_hardswish.cpp",
         "unpack_log_softmax.cpp",
         "unpack_rsqrt.cpp",
+        "unpack_scaled_dot_product_attention.cpp",
         "unpack_std.cpp",
         "unpack_var.cpp",
         "view_to_reshape.cpp",
Original file line number	Diff line number	Diff line change
`@@ -146,6 +146,7 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, std::vector<torch::jit::I`
`146`	`146`	`if (lower_info.converting_to_trt_engine) {`
`147`	`147`	`passes::RemoveCollectionCast(g);`
`148`	`148`	`}`
	`149`	`+ passes::UnpackScaledDotProductAttention(g);`
`149`	`150`	`passes::UnpackAndCastMaskedFill(g, lower_info.getGPUDeviceString());`
`150`	`151`	`passes::UnpackAndCastNumToTensor(g, lower_info.getGPUDeviceString());`
`151`	`152`	`passes::UnpackAndCastFull(g, lower_info.getGPUDeviceString());`