Cleaned Entrypoint, updated composes

lapplislazuli · lapplislazuli · commit 8c034ce79fbb · 2022-03-17T12:11:00.000+01:00
diff --git a/docker-compose-minimal.yml b/docker-compose-minimal.yml
@@ -3,6 +3,8 @@ version: '3.8'
 services:
   experiment:
     image: ciselab/codebert-code2text
+    build:
+      context: .
     volumes:
       - ./dataset/java/:/dataset:ro
       - ./compose_output:/experiment/output
diff --git a/docker-compose-pretrained-minimal.yml b/docker-compose-pretrained-minimal.yml
@@ -6,17 +6,17 @@ services:
     volumes:
       - ./dataset/java/:/dataset:ro
       - ./compose_output:/experiment/output
-      - ./models/java:/models
+      - ./compose_output/checkpoint-best-bleu:/models
     environment:
-      do_train: "false"
-      do_val: "true"
-      do_test: "true"
+      DO_TRAIN: "false"
+      DO_VALID: "true"
+      DO_TEST: "true"
       valid_file: /dataset/valid_minimal.jsonl
       test_file: /dataset/test_minimal.jsonl
       no_cuda: "true"
       pretrained_model: microsoft/codebert-base
       load_existing_model: "true"
-      load_model_path: /models/best_pytorch_model.bin
+      load_model_path: /models/pytorch_model.bin
     deploy:
       resources:
         reservations:
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -1,196 +1,46 @@
 #!/bin/bash
 
 # Entrypoint for CodeBert Code-To-Text Experiment
-
 # This file invokes the original python code of the codebert text with the environment variables set in the docker container. 
-# Additionally, it does a switch-case which flags for training, validation and testing have been set 
-
-# The use of exit without a number returns the exit code of the fore-going statement - that is in this case the anaconda command. 
-# The Exit codes are necessary, as otherwise all cases are run (atleast, all cases with flags set). 
-# That would not only take longer, but also overwrite valid artifacts. Do not remove exits!
+# The use of exit without a number returns the exit code of the fore-going statement - that is in this case the python command.
 
+echo "Starting  CodeBert-Code2Text-Reproduction - Building the command"
 
-# ============================================
-#        Case 1: Pretrained Model 
-# ============================================
+# As the command gets maybe complex, 
+# it is stitched together with any flag set in docker environment. 
 
-if [ "$load_existing_model" = true ]; then 
-    echo "Found flag to load a model under $load_model_path"
+commandCollector="python ./run.py"
 
-    if [ "$DO_TRAIN" = true -a "$DO_TEST" = true -a "$DO_VALID" = true ]; then
-        echo "performing full run with training, validation and test"
-        python ./run.py \
-            --do_train --do_test --do_eval \
-            --model_type roberta --model_name_or_path $pretrained_model \
-            --train_filename $train_file --test_filename $test_file --dev_filename $valid_file \
-            --output_dir $output_dir \
-            --max_source_length $source_length \
-            --max_target_length $target_length \
-            --beam_size $beam_size \
-            --train_batch_size $batch_size --eval_batch_size $batch_size \
-            --learning_rate $lr \
-            --num_train_epochs $epochs \
-            --load_model_path $load_model_path \
-            --seed $seed
-        exit
-    fi
-    if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
-        echo "performing run with training and validation"
-        python ./run.py \
-            --do_train --do_eval \
-            --model_type roberta --model_name_or_path $pretrained_model \
-            --train_filename $train_file --dev_filename $valid_file \
-            --output_dir $output_dir \
-            --max_source_length $source_length \
-            --max_target_length $target_length \
-            --beam_size $beam_size \
-            --train_batch_size $batch_size --eval_batch_size $batch_size \
-            --learning_rate $lr \
-            --load_model_path $load_model_path \
-            --num_train_epochs $epochs \
-            --seed $seed
-        exit
-    fi
-    if [ "$DO_TRAIN" = true -a "$DO_TEST" = true ]; then
-        echo "performing run with training and test"
-        python ./run.py \
-            --do_train --do_test \
-            --model_type roberta --model_name_or_path $pretrained_model \
-            --train_filename $train_file --test_filename $test_file \
-            --output_dir $output_dir \
-            --max_source_length $source_length \
-            --max_target_length $target_length \
-            --beam_size $beam_size \
-            --train_batch_size $batch_size --eval_batch_size $batch_size \
-            --learning_rate $lr \
-            --num_train_epochs $epochs \
-            --load_model_path $load_model_path \
-            --seed $seed
-        exit
-    fi
-    if [ "$DO_TRAIN" = true ]; then
-        echo "performing run with (only) training"
-        python ./run.py \
-            --do_train \
-            --model_type roberta --model_name_or_path $pretrained_model \
-            --train_filename $train_file \
-            --output_dir $output_dir \
-            --max_source_length $source_length \
-            --max_target_length $target_length \
-            --beam_size $beam_size \
-            --train_batch_size $batch_size \
-            --eval_batch_size $batch_size \
-            --learning_rate $lr \
-            --num_train_epochs $epochs \
-            --load_model_path $load_model_path \
-            --seed $seed
-        exit 0
-    fi
-    if [ "$DO_TEST" = true ]; then
-        echo "performing run with (only) testing"
-        python ./run.py \
-            --do_test \
-            --model_type roberta --model_name_or_path $pretrained_model \
-            --test_filename $test_file \
-            --output_dir $output_dir \
-            --max_source_length $source_length \
-            --max_target_length $target_length \
-            --train_batch_size $batch_size \
-            --eval_batch_size $batch_size \
-            --load_model_path $load_model_path \
-            --seed $seed
-        exit
-    fi
+if [ "$load_existing_model" = true ]; 
+then commandCollector="$commandCollector --load_model_path $load_model_path "; 
+else echo "Creating a new model - not loading an existing one"
 fi
 
-# ============================================
-#        Case 2: No Pretrained Model 
-# ============================================
-
-if [ "$DO_TRAIN" = true -a "$DO_TEST" = true -a "$DO_VALID" = true ]; then
-    echo "performing full run with training, validation and test"
-    python ./run.py \
-        --do_train --do_test --do_eval \
-        --model_type roberta --model_name_or_path $pretrained_model \
-        --train_filename $train_file --test_filename $test_file --dev_filename $valid_file \
-        --output_dir $output_dir \
-        --max_source_length $source_length \
-        --max_target_length $target_length \
-        --beam_size $beam_size \
-        --train_batch_size $batch_size --eval_batch_size $batch_size \
-        --learning_rate $lr \
-        --num_train_epochs $epochs \
-        --seed $seed
-    exit
-fi
-if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
-    echo "performing run with training and validation"
-    python ./run.py \
-        --do_train --do_eval \
-        --model_type roberta --model_name_or_path $pretrained_model \
-        --train_filename $train_file --dev_filename $valid_file \
-        --output_dir $output_dir \
-        --max_source_length $source_length \
-        --max_target_length $target_length \
-        --beam_size $beam_size \
-        --train_batch_size $batch_size --eval_batch_size $batch_size \
-        --learning_rate $lr \
-        --num_train_epochs $epochs \
-        --seed $seed
-    exit
+if [ "$DO_TRAIN" = true ]; 
+then commandCollector="$commandCollector --do_train --train_filename $train_file --train_batch_size $batch_size --num_train_epochs $epochs --seed $seed"; 
+else echo "Do not do Training"
 fi
 
-if [ "$DO_TRAIN" = true -a "$DO_TEST" = true ]; then
-    echo "performing run with training and test"
-    python ./run.py \
-        --do_train --do_test \
-        --model_type roberta --model_name_or_path $pretrained_model \
-        --train_filename $train_file --test_filename $test_file \
-        --output_dir $output_dir \
-        --max_source_length $source_length \
-        --max_target_length $target_length \
-        --beam_size $beam_size \
-        --train_batch_size $batch_size --eval_batch_size $batch_size \
-        --learning_rate $lr \
-        --num_train_epochs $epochs \
-        --seed $seed
-    exit
+if [ "$DO_TEST" = true ]; 
+then commandCollector="$commandCollector --do_test --test_filename $test_file"; 
+else echo "Do not do Testing"
 fi
-if [ "$DO_TRAIN" = true ]; then
-    echo "performing run with (only) training"
-    python ./run.py \
-        --do_train \
-        --model_type roberta --model_name_or_path $pretrained_model \
-        --train_filename $train_file \
-        --output_dir $output_dir \
-        --max_source_length $source_length \
-        --max_target_length $target_length \
-        --beam_size $beam_size \
-        --train_batch_size $batch_size \
-        --eval_batch_size $batch_size \
-        --learning_rate $lr \
-        --num_train_epochs $epochs \
-        --seed $seed
-    exit 0
-fi
-if [ "$DO_TEST" = true ]; then
-    echo "performing run with (only) testing"
-    python ./run.py \
-        --do_test \
-        --model_type roberta --model_name_or_path $pretrained_model \
-        --test_filename $test_file \
-        --output_dir $output_dir \
-        --max_source_length $source_length \
-        --max_target_length $target_length \
-        --train_batch_size $batch_size \
-        --eval_batch_size $batch_size \
-        --seed $seed
-    exit
+
+if [ "$DO_VALID" = true ]; 
+then commandCollector="$commandCollector  --do_eval --eval_batch_size $batch_size --dev_filename $valid_file "; 
+else echo "Do not do Validation"
 fi
+# Add standard variables that are always used
+# Note: the "$pretrained_model" points to one of the standards in huggingface, and is necessary to find right imports and methods. It does not collide with "$load_model_path"
+commandCollector="$commandCollector --model_type roberta --model_name_or_path $pretrained_model --output_dir $output_dir --max_source_length $source_length --max_target_length $target_length --beam_size $beam_size "
+
+echo "Final command is:"
+echo $commandCollector
+
+/bin/bash -c "$commandCollector"
 
-# ===================================
-#     Case 3: Error / Unknown 
-# ===================================
+exit
 
-echo "no flags set - please inspect your compose"
-exit 1
+# To keep the container open for inspection
+# echo "Program finished - Keeping Container open for inspection"
+# tail -f /dev/null