populate some more legacy checkpoints (#5457)

Borda · tchaton · web-flow · commit f065ea65bf4a · 2021-01-11T20:36:12.000-05:00
* populate some more legacy checkpoints

* .

* pt freeze

* .

* skip

Co-authored-by: chaton &lt;thomas@grid.ai&gt;
diff --git a/legacy/README.md b/legacy/README.md
@@ -0,0 +1,17 @@
+# Maintaining back-compatibility with come legacy versions
+
+The aim of this section is set some baselines and workflows/guidelines for maintaining back compatibility with some legacies version of PL
+
+At this moment we focus on ability running old checkpoints, so the flow here is to create a checkpoint with every release and store it in our public AWS storage and so each CI testing will pull this archive and test loading and resuming training with this model.
+
+If you want to pull all saved version-checkpoints for local testing/development, call
+```bash
+wget https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip
+unzip -o checkpoints.zip
+```
+
+To back populate collection with past version you can use following bash:
+```bash
+bash generate_checkpoints.sh 1.0.2 1.0.3 1.0.4
+zip -r checkpoints.zip checkpoints/
+```
diff --git a/legacy/generate_checkpoints.sh b/legacy/generate_checkpoints.sh
@@ -21,7 +21,8 @@ do
   virtualenv $ENV_PATH --system-site-packages
   # activate and install PL version
   source "$ENV_PATH/bin/activate"
-  pip install "pytorch_lightning==$ver" --quiet -U --no-cache-dir
+  # there are problem to load ckpt in older versions since they are saved the newer versions
+  pip install "pytorch_lightning==$ver" "torch==1.3" --quiet --no-cache-dir
 
   python --version
   pip --version
diff --git a/legacy/zero_training.py b/legacy/zero_training.py
@@ -49,7 +49,8 @@ def _loss(self, batch, prediction):
     def _step(self, batch, batch_idx):
         output = self.layer(batch)
         loss = self._loss(batch, output)
-        return loss
+        # return {'loss': loss}  # used for PL<1.0
+        return loss  # used for PL >= 1.0
 
     def training_step(self, batch, batch_idx):
         return self._step(batch, batch_idx)
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
@@ -170,7 +170,6 @@ def automatic_optimization(self) -> bool:
     def automatic_optimization(self, automatic_optimization: bool) -> None:
         self._automatic_optimization = automatic_optimization
 
-
     def print(self, *args, **kwargs) -> None:
         r"""
         Prints only from process 0. Use this in any distributed mode to log only once.
diff --git a/tests/checkpointing/test_legacy_checkpoints.py b/tests/checkpointing/test_legacy_checkpoints.py
@@ -24,9 +24,27 @@
 CHECKPOINT_EXTENSION = ".ckpt"
 
 
-# todo: add more legacy checkpoints :]
+# todo: add more legacy checkpoints - for < v0.8
 @pytest.mark.parametrize("pl_version", [
-    "0.10.0", "1.0.0", "1.0.1", "1.0.2", "1.0.3", "1.0.4", "1.0.5", "1.0.6", "1.0.7", "1.0.8"
+    # "0.8.1",
+    "0.8.3",
+    "0.8.4",
+    # "0.8.5", # this version has problem with loading on PT<=1.4 as it seems to be archive
+    # "0.9.0", # this version has problem with loading on PT<=1.4 as it seems to be archive
+    "0.10.0",
+    "1.0.0",
+    "1.0.1",
+    "1.0.2",
+    "1.0.3",
+    "1.0.4",
+    "1.0.5",
+    "1.0.6",
+    "1.0.7",
+    "1.0.8",
+    "1.1.0",
+    "1.1.1",
+    "1.1.2",
+    "1.1.3",
 ])
 def test_resume_legacy_checkpoints(tmpdir, pl_version):
     path_dir = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)