Fixed comments

cehongwang · cehongwang · commit 76f1fd553baf · 2025-04-25T03:54:01.000Z
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -678,17 +678,24 @@ def compile(
     )
 
     gm = exported_program.module()
-    # Move the weights in the state_dict to CPU
     logger.debug("Input graph: " + str(gm.graph))
 
     # Apply lowering on the graph module
     gm = post_lowering(gm, settings)
     logger.debug("Lowered Input graph: " + str(gm.graph))
+
+    # Move the weights in the state_dict to CPU
     if offload_module_to_cpu:
         exported_program.module().to(CPU_DEVICE)
         logger.info(
-            "The model is offloaded to CPU during compilation. If you want to keep the model on GPU, set offload_module_to_cpu=False."
+            "The PyTorch model was moved to the CPU to allocate all GPU memory to TensorRT. To retain the model on the GPU, set offload_module_to_cpu=False"
         )
+    else:
+        remaining_memory, total_memory = torch.cuda.mem_get_info()
+        if remaining_memory < total_memory // 2:
+            logger.warning(
+                "The remaining GPU memory is not enough to compile the model. This may lead to an OOM error. Consider setting offload_module_to_cpu=True."
+            )
     trt_gm = compile_module(
         gm, trt_arg_inputs, trt_kwarg_inputs, settings, engine_cache
     )
@@ -833,7 +840,7 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
                 str(name),
                 str(submodule.graph),
             )
-            submodule.to(torch.cuda.current_device())
+            submodule.to(to_torch_device(settings.device))
             continue
 
         if name not in submodule_node_dict: