|
7 | 7 | import numpy as np
|
8 | 8 | import torch
|
9 | 9 | from torch.fx.node import Argument, Node, Target
|
| 10 | +from torch_tensorrt._utils import is_tensorrt_version_supported |
10 | 11 | from torch_tensorrt.dynamo._settings import CompilationSettings
|
11 | 12 | from torch_tensorrt.dynamo._SourceIR import SourceIR
|
12 | 13 | from torch_tensorrt.dynamo.conversion import impl
|
@@ -619,40 +620,41 @@ def aten_ops_quantize_op(
|
619 | 620 | )
|
620 | 621 |
|
621 | 622 |
|
622 |
| -try: |
623 |
| - import modelopt.torch.quantization as mtq # noqa: F401 |
| 623 | +if is_tensorrt_version_supported("10.8.0"): |
| 624 | + try: |
| 625 | + import modelopt.torch.quantization as mtq # noqa: F401 |
624 | 626 |
|
625 |
| - assert torch.ops.tensorrt.dynamic_block_quantize_op.default |
626 |
| -except Exception as e: |
627 |
| - _LOGGER.warning( |
628 |
| - "Unable to import quantize op. Please install modelopt library (https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#installation) to add support for compiling quantized models" |
629 |
| - ) |
630 |
| -else: |
| 627 | + assert torch.ops.tensorrt.dynamic_block_quantize_op.default |
| 628 | + except Exception as e: |
| 629 | + _LOGGER.warning( |
| 630 | + "Unable to import quantize op. Please install modelopt library (https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#installation) to add support for compiling quantized models" |
| 631 | + ) |
| 632 | + else: |
631 | 633 |
|
632 |
| - @dynamo_tensorrt_converter( |
633 |
| - torch.ops.tensorrt.dynamic_block_quantize_op.default, |
634 |
| - supports_dynamic_shapes=True, |
635 |
| - ) |
636 |
| - def aten_ops_dynamic_block_quantize_op( |
637 |
| - ctx: ConversionContext, |
638 |
| - target: Target, |
639 |
| - args: Tuple[Argument, ...], |
640 |
| - kwargs: Dict[str, Argument], |
641 |
| - name: str, |
642 |
| - ) -> Union[TRTTensor, Sequence[TRTTensor]]: |
643 |
| - return impl.dynamic_block_quantize.quantize( |
644 |
| - ctx, |
645 |
| - target, |
646 |
| - SourceIR.ATEN, |
647 |
| - name, |
648 |
| - args[0], |
649 |
| - args[1], |
650 |
| - args[2], |
651 |
| - args[3], |
652 |
| - args[4], |
653 |
| - args[5], |
654 |
| - args[6], |
| 634 | + @dynamo_tensorrt_converter( |
| 635 | + torch.ops.tensorrt.dynamic_block_quantize_op.default, |
| 636 | + supports_dynamic_shapes=True, |
655 | 637 | )
|
| 638 | + def aten_ops_dynamic_block_quantize_op( |
| 639 | + ctx: ConversionContext, |
| 640 | + target: Target, |
| 641 | + args: Tuple[Argument, ...], |
| 642 | + kwargs: Dict[str, Argument], |
| 643 | + name: str, |
| 644 | + ) -> Union[TRTTensor, Sequence[TRTTensor]]: |
| 645 | + return impl.dynamic_block_quantize.quantize( |
| 646 | + ctx, |
| 647 | + target, |
| 648 | + SourceIR.ATEN, |
| 649 | + name, |
| 650 | + args[0], |
| 651 | + args[1], |
| 652 | + args[2], |
| 653 | + args[3], |
| 654 | + args[4], |
| 655 | + args[5], |
| 656 | + args[6], |
| 657 | + ) |
656 | 658 |
|
657 | 659 |
|
658 | 660 | @dynamo_tensorrt_converter(torch.ops.aten.squeeze.dim, supports_dynamic_shapes=True)
|
|
0 commit comments