Updated python code to match original repository again

lapplislazuli · lapplislazuli · commit 1eb27c30886d · 2022-03-17T09:19:59.000+01:00
diff --git a/code/model.py b/code/model.py
@@ -11,7 +11,6 @@ class Seq2Seq(nn.Module):
         Build Seqence-to-Sequence.
         
         Parameters:
-
         * `encoder`- encoder of seq2seq model. e.g. roberta
         * `decoder`- decoder of seq2seq model. e.g. transformer
         * `config`- configuration of encoder model. 
@@ -73,9 +72,8 @@ def forward(self, source_ids=None,source_mask=None,target_ids=None,target_mask=N
             return outputs
         else:
             #Predict 
-            preds=[]
-            #zero=torch.cuda.LongTensor(1).fill_(0)
-            zero=torch.LongTensor(1).fill_(0)
+            preds=[]       
+            zero=torch.cuda.LongTensor(1).fill_(0)     
             for i in range(source_ids.shape[0]):
                 context=encoder_output[:,i:i+1]
                 context_mask=source_mask[i:i+1,:]
@@ -108,9 +106,7 @@ def forward(self, source_ids=None,source_mask=None,target_ids=None,target_mask=N
 class Beam(object):
     def __init__(self, size,sos,eos):
         self.size = size
-        #TODO: Make tt with switch on version
-        #self.tt = torch.cuda
-        self.tt = torch
+        self.tt = torch.cuda
         # The score for each translation on the beam.
         self.scores = self.tt.FloatTensor(size).zero_()
         # The backpointers at each time-step.
@@ -138,12 +134,9 @@ def advance(self, wordLk):
         """
         Given prob over words for every last beam `wordLk` and attention
         `attnOut`: Compute and update the beam search.
-
         Parameters:
-
         * `wordLk`- probs of advancing from the last step (K x words)
         * `attnOut`- attention at the last step
-
         Returns: True if beam search is complete.
         """
         numWords = wordLk.size(1)
@@ -218,5 +211,4 @@ def buildTargetTokens(self, preds):
                     break
                 tokens.append(tok)
             sentence.append(tokens)
-        return sentence
-        
+        return sentence
diff --git a/code/run.py b/code/run.py
@@ -8,8 +8,7 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, softwareFuchs.Mader.Luchs
-
+# Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
@@ -304,17 +303,19 @@ def main():
              'weight_decay': args.weight_decay},
             {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
         ]
+        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
         optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
-        scheduler = get_linear_schedule_with_warmup(optimizer, 
-                                                    num_warmup_steps=int(len(train_dataloader)*args.num_train_epochs*0.1),
-                                                    num_training_steps=len(train_dataloader)*args.num_train_epochs)
+        scheduler = get_linear_schedule_with_warmup(optimizer,
+                                                    num_warmup_steps=int(t_total*0.1),
+                                                    num_training_steps=t_total)
     
         #Start training
         logger.info("***** Running training *****")
         logger.info("  Num examples = %d", len(train_examples))
         logger.info("  Batch size = %d", args.train_batch_size)
         logger.info("  Num epoch = %d", args.num_train_epochs)
         
+
         model.train()
         dev_dataset={}
         nb_tr_examples, nb_tr_steps,tr_loss,global_step,best_bleu,best_loss = 0, 0,0,0,0,1e6 
@@ -515,6 +516,4 @@ def main():
                 
                 
 if __name__ == "__main__":
-    main()
-
-
+    main()