Fix other PyTorch models

julien-c · julien-c · commit 2f3a4210185f · 2019-11-06T14:03:47.000-05:00
diff --git a/templates/adding_a_new_model/modeling_xxx.py b/templates/adding_a_new_model/modeling_xxx.py
@@ -309,10 +309,12 @@ def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, posi
         else:
             raise ValueError("You have to specify either input_ids or inputs_embeds")
 
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+
         if attention_mask is None:
-            attention_mask = torch.ones(input_shape)
+            attention_mask = torch.ones(input_shape, device=device)
         if token_type_ids is None:
-            token_type_ids = torch.zeros(input_shape, dtype=torch.long)
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
 
         # We create a 3D attention mask from a 2D tensor mask.
         # Sizes are [batch_size, 1, 1, to_seq_length]
diff --git a/transformers/modeling_distilbert.py b/transformers/modeling_distilbert.py
@@ -450,8 +450,10 @@ def forward(self,
         else:
             raise ValueError("You have to specify either input_ids or inputs_embeds")
 
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+
         if attention_mask is None:
-            attention_mask = torch.ones(input_shape) # (bs, seq_length)
+            attention_mask = torch.ones(input_shape, device=device) # (bs, seq_length)
 
         # Prepare head mask if needed
         # 1.0 in head_mask indicate we keep the head