Skip to content

Commit 8c034ce

Browse files
committed
Cleaned Entrypoint, updated composes
1 parent dee9242 commit 8c034ce

File tree

3 files changed

+37
-185
lines changed

3 files changed

+37
-185
lines changed

docker-compose-minimal.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ version: '3.8'
33
services:
44
experiment:
55
image: ciselab/codebert-code2text
6+
build:
7+
context: .
68
volumes:
79
- ./dataset/java/:/dataset:ro
810
- ./compose_output:/experiment/output

docker-compose-pretrained-minimal.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@ services:
66
volumes:
77
- ./dataset/java/:/dataset:ro
88
- ./compose_output:/experiment/output
9-
- ./models/java:/models
9+
- ./compose_output/checkpoint-best-bleu:/models
1010
environment:
11-
do_train: "false"
12-
do_val: "true"
13-
do_test: "true"
11+
DO_TRAIN: "false"
12+
DO_VALID: "true"
13+
DO_TEST: "true"
1414
valid_file: /dataset/valid_minimal.jsonl
1515
test_file: /dataset/test_minimal.jsonl
1616
no_cuda: "true"
1717
pretrained_model: microsoft/codebert-base
1818
load_existing_model: "true"
19-
load_model_path: /models/best_pytorch_model.bin
19+
load_model_path: /models/pytorch_model.bin
2020
deploy:
2121
resources:
2222
reservations:

entrypoint.sh

Lines changed: 30 additions & 180 deletions
Original file line numberDiff line numberDiff line change
@@ -1,196 +1,46 @@
11
#!/bin/bash
22

33
# Entrypoint for CodeBert Code-To-Text Experiment
4-
54
# This file invokes the original python code of the codebert text with the environment variables set in the docker container.
6-
# Additionally, it does a switch-case which flags for training, validation and testing have been set
7-
8-
# The use of exit without a number returns the exit code of the fore-going statement - that is in this case the anaconda command.
9-
# The Exit codes are necessary, as otherwise all cases are run (atleast, all cases with flags set).
10-
# That would not only take longer, but also overwrite valid artifacts. Do not remove exits!
5+
# The use of exit without a number returns the exit code of the fore-going statement - that is in this case the python command.
116

7+
echo "Starting CodeBert-Code2Text-Reproduction - Building the command"
128

13-
# ============================================
14-
# Case 1: Pretrained Model
15-
# ============================================
9+
# As the command gets maybe complex,
10+
# it is stitched together with any flag set in docker environment.
1611

17-
if [ "$load_existing_model" = true ]; then
18-
echo "Found flag to load a model under $load_model_path"
12+
commandCollector="python ./run.py"
1913

20-
if [ "$DO_TRAIN" = true -a "$DO_TEST" = true -a "$DO_VALID" = true ]; then
21-
echo "performing full run with training, validation and test"
22-
python ./run.py \
23-
--do_train --do_test --do_eval \
24-
--model_type roberta --model_name_or_path $pretrained_model \
25-
--train_filename $train_file --test_filename $test_file --dev_filename $valid_file \
26-
--output_dir $output_dir \
27-
--max_source_length $source_length \
28-
--max_target_length $target_length \
29-
--beam_size $beam_size \
30-
--train_batch_size $batch_size --eval_batch_size $batch_size \
31-
--learning_rate $lr \
32-
--num_train_epochs $epochs \
33-
--load_model_path $load_model_path \
34-
--seed $seed
35-
exit
36-
fi
37-
if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
38-
echo "performing run with training and validation"
39-
python ./run.py \
40-
--do_train --do_eval \
41-
--model_type roberta --model_name_or_path $pretrained_model \
42-
--train_filename $train_file --dev_filename $valid_file \
43-
--output_dir $output_dir \
44-
--max_source_length $source_length \
45-
--max_target_length $target_length \
46-
--beam_size $beam_size \
47-
--train_batch_size $batch_size --eval_batch_size $batch_size \
48-
--learning_rate $lr \
49-
--load_model_path $load_model_path \
50-
--num_train_epochs $epochs \
51-
--seed $seed
52-
exit
53-
fi
54-
if [ "$DO_TRAIN" = true -a "$DO_TEST" = true ]; then
55-
echo "performing run with training and test"
56-
python ./run.py \
57-
--do_train --do_test \
58-
--model_type roberta --model_name_or_path $pretrained_model \
59-
--train_filename $train_file --test_filename $test_file \
60-
--output_dir $output_dir \
61-
--max_source_length $source_length \
62-
--max_target_length $target_length \
63-
--beam_size $beam_size \
64-
--train_batch_size $batch_size --eval_batch_size $batch_size \
65-
--learning_rate $lr \
66-
--num_train_epochs $epochs \
67-
--load_model_path $load_model_path \
68-
--seed $seed
69-
exit
70-
fi
71-
if [ "$DO_TRAIN" = true ]; then
72-
echo "performing run with (only) training"
73-
python ./run.py \
74-
--do_train \
75-
--model_type roberta --model_name_or_path $pretrained_model \
76-
--train_filename $train_file \
77-
--output_dir $output_dir \
78-
--max_source_length $source_length \
79-
--max_target_length $target_length \
80-
--beam_size $beam_size \
81-
--train_batch_size $batch_size \
82-
--eval_batch_size $batch_size \
83-
--learning_rate $lr \
84-
--num_train_epochs $epochs \
85-
--load_model_path $load_model_path \
86-
--seed $seed
87-
exit 0
88-
fi
89-
if [ "$DO_TEST" = true ]; then
90-
echo "performing run with (only) testing"
91-
python ./run.py \
92-
--do_test \
93-
--model_type roberta --model_name_or_path $pretrained_model \
94-
--test_filename $test_file \
95-
--output_dir $output_dir \
96-
--max_source_length $source_length \
97-
--max_target_length $target_length \
98-
--train_batch_size $batch_size \
99-
--eval_batch_size $batch_size \
100-
--load_model_path $load_model_path \
101-
--seed $seed
102-
exit
103-
fi
14+
if [ "$load_existing_model" = true ];
15+
then commandCollector="$commandCollector --load_model_path $load_model_path ";
16+
else echo "Creating a new model - not loading an existing one"
10417
fi
10518

106-
# ============================================
107-
# Case 2: No Pretrained Model
108-
# ============================================
109-
110-
if [ "$DO_TRAIN" = true -a "$DO_TEST" = true -a "$DO_VALID" = true ]; then
111-
echo "performing full run with training, validation and test"
112-
python ./run.py \
113-
--do_train --do_test --do_eval \
114-
--model_type roberta --model_name_or_path $pretrained_model \
115-
--train_filename $train_file --test_filename $test_file --dev_filename $valid_file \
116-
--output_dir $output_dir \
117-
--max_source_length $source_length \
118-
--max_target_length $target_length \
119-
--beam_size $beam_size \
120-
--train_batch_size $batch_size --eval_batch_size $batch_size \
121-
--learning_rate $lr \
122-
--num_train_epochs $epochs \
123-
--seed $seed
124-
exit
125-
fi
126-
if [ "$DO_TRAIN" = true -a "$DO_VALID" = true ]; then
127-
echo "performing run with training and validation"
128-
python ./run.py \
129-
--do_train --do_eval \
130-
--model_type roberta --model_name_or_path $pretrained_model \
131-
--train_filename $train_file --dev_filename $valid_file \
132-
--output_dir $output_dir \
133-
--max_source_length $source_length \
134-
--max_target_length $target_length \
135-
--beam_size $beam_size \
136-
--train_batch_size $batch_size --eval_batch_size $batch_size \
137-
--learning_rate $lr \
138-
--num_train_epochs $epochs \
139-
--seed $seed
140-
exit
19+
if [ "$DO_TRAIN" = true ];
20+
then commandCollector="$commandCollector --do_train --train_filename $train_file --train_batch_size $batch_size --num_train_epochs $epochs --seed $seed";
21+
else echo "Do not do Training"
14122
fi
14223

143-
if [ "$DO_TRAIN" = true -a "$DO_TEST" = true ]; then
144-
echo "performing run with training and test"
145-
python ./run.py \
146-
--do_train --do_test \
147-
--model_type roberta --model_name_or_path $pretrained_model \
148-
--train_filename $train_file --test_filename $test_file \
149-
--output_dir $output_dir \
150-
--max_source_length $source_length \
151-
--max_target_length $target_length \
152-
--beam_size $beam_size \
153-
--train_batch_size $batch_size --eval_batch_size $batch_size \
154-
--learning_rate $lr \
155-
--num_train_epochs $epochs \
156-
--seed $seed
157-
exit
24+
if [ "$DO_TEST" = true ];
25+
then commandCollector="$commandCollector --do_test --test_filename $test_file";
26+
else echo "Do not do Testing"
15827
fi
159-
if [ "$DO_TRAIN" = true ]; then
160-
echo "performing run with (only) training"
161-
python ./run.py \
162-
--do_train \
163-
--model_type roberta --model_name_or_path $pretrained_model \
164-
--train_filename $train_file \
165-
--output_dir $output_dir \
166-
--max_source_length $source_length \
167-
--max_target_length $target_length \
168-
--beam_size $beam_size \
169-
--train_batch_size $batch_size \
170-
--eval_batch_size $batch_size \
171-
--learning_rate $lr \
172-
--num_train_epochs $epochs \
173-
--seed $seed
174-
exit 0
175-
fi
176-
if [ "$DO_TEST" = true ]; then
177-
echo "performing run with (only) testing"
178-
python ./run.py \
179-
--do_test \
180-
--model_type roberta --model_name_or_path $pretrained_model \
181-
--test_filename $test_file \
182-
--output_dir $output_dir \
183-
--max_source_length $source_length \
184-
--max_target_length $target_length \
185-
--train_batch_size $batch_size \
186-
--eval_batch_size $batch_size \
187-
--seed $seed
188-
exit
28+
29+
if [ "$DO_VALID" = true ];
30+
then commandCollector="$commandCollector --do_eval --eval_batch_size $batch_size --dev_filename $valid_file ";
31+
else echo "Do not do Validation"
18932
fi
33+
# Add standard variables that are always used
34+
# Note: the "$pretrained_model" points to one of the standards in huggingface, and is necessary to find right imports and methods. It does not collide with "$load_model_path"
35+
commandCollector="$commandCollector --model_type roberta --model_name_or_path $pretrained_model --output_dir $output_dir --max_source_length $source_length --max_target_length $target_length --beam_size $beam_size "
36+
37+
echo "Final command is:"
38+
echo $commandCollector
39+
40+
/bin/bash -c "$commandCollector"
19041

191-
# ===================================
192-
# Case 3: Error / Unknown
193-
# ===================================
42+
exit
19443

195-
echo "no flags set - please inspect your compose"
196-
exit 1
44+
# To keep the container open for inspection
45+
# echo "Program finished - Keeping Container open for inspection"
46+
# tail -f /dev/null

0 commit comments

Comments
 (0)