ExponentialML · aiXander · Aug 20, 2024
diff --git a/animatediff/models/attention.py b/animatediff/models/attention.py
@@ -8,10 +8,10 @@
 from torch import nn
 
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.modeling_utils import ModelMixin
+from diffusers import ModelMixin
 from diffusers.utils import BaseOutput
 from diffusers.utils.import_utils import is_xformers_available
-from diffusers.models.attention import CrossAttention, FeedForward, AdaLayerNorm
+from diffusers.models.attention import Attention as CrossAttention, FeedForward, AdaLayerNorm
 
 from einops import rearrange, repeat
 import pdb

diff --git a/animatediff/models/motion_module.py b/animatediff/models/motion_module.py
@@ -11,7 +11,7 @@
 from diffusers import ModelMixin
 from diffusers.utils import BaseOutput
 from diffusers.utils.import_utils import is_xformers_available
-from diffusers.models.attention import CrossAttention, FeedForward
+from diffusers.models.attention import Attention as CrossAttention, FeedForward
 
 from einops import rearrange, repeat
 import math

diff --git a/animatediff/models/sparse_controlnet.py b/animatediff/models/sparse_controlnet.py
@@ -23,8 +23,7 @@
 from diffusers.configuration_utils import ConfigMixin, register_to_config
 from diffusers.utils import BaseOutput, logging
 from diffusers.models.embeddings import TimestepEmbedding, Timesteps
-from diffusers.modeling_utils import ModelMixin
-
+from diffusers import ModelMixin
 
 from .unet_blocks import (
     CrossAttnDownBlock3D,
@@ -35,7 +34,7 @@
 from einops import repeat, rearrange
 from .resnet import InflatedConv3d
 
-from diffusers.models.unet_2d_condition import UNet2DConditionModel
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 

diff --git a/animatediff/models/unet.py b/animatediff/models/unet.py
@@ -12,7 +12,7 @@
 import torch.utils.checkpoint
 
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.modeling_utils import ModelMixin
+from diffusers import ModelMixin
 from diffusers.utils import BaseOutput, logging
 from diffusers.models.embeddings import TimestepEmbedding, Timesteps
 from .unet_blocks import (

diff --git a/animatediff/pipelines/pipeline_animation.py b/animatediff/pipelines/pipeline_animation.py
@@ -14,7 +14,11 @@
 
 from diffusers.configuration_utils import FrozenDict
 from diffusers.models import AutoencoderKL
-from diffusers.pipeline_utils import DiffusionPipeline
+try:
+    from diffusers.pipeline_utils import DiffusionPipeline
+except:
+    from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+
 from diffusers.schedulers import (
     DDIMScheduler,
     DPMSolverMultistepScheduler,

diff --git a/configs/training/motion_director/my_video (copy).yaml b/configs/training/motion_director/my_video (copy).yaml
@@ -0,0 +1,55 @@
+# Model From Huggingface Diffusers
+pretrained_model_path: "diffusers/stable-diffusion-v1-5"
+
+# Model in CKPT format. This is the CKPT that you download from CivitAI or use in A111 Comfy, etc.
+# In most cases, leave this blank.
+unet_checkpoint_path: ""
+
+# Must be CKPT from https://huggingface.co/guoyww/animatediff/tree/main
+motion_module_path: "v3_sd15_mm.ckpt"
+
+# Must be CKPT from https://huggingface.co/guoyww/animatediff/tree/main
+# Optional for training, but highly recommended as a starting point. 
+domain_adapter_path: "" #"v3_sd15_adapter.ckpt"
+
+# ["single_video", "folder"]
+# single_video = path/my_video.mp4
+# folder = path/my_videos
+
+# You can have .txt file with prompt in the same folder. 
+# Eg. path/my_videos/1.mp4 path/my_videos/1.txt
+# Otherwise, every video will have the same training prompt.
+mode_type: "single_video"
+
+video:
+  # Your local video path
+  path: "examples/pexels-cottonbro-5319934 (2160p).mp4" # Or just path/to/folder_of_videos_with_.txt_files/ (set mode_type to "folder")
+
+  # Optional custom start frame (idx). Leave this at 0 if your video is already trimmed.
+  # This is only recommended for single videos.
+  start_time: 0
+
+  # If your video is longer than 16 frames, it will be chunked into multiple parts.
+  # This is advanced usage, so leave this at 1.
+  max_chunks: 1
+
+  # Describe your action with a simple prompt.
+  training_prompt: "a man is running on a bridge"
+
+  # A custom prompt that will generate during to see your training progress.
+  validation_prompt: "a highly realistic video of batman running in a mystic forest, depth of field, epic lights, high quality, trending on artstation"
+
+# The name of the LoRA file (will save to ./results/{save_name}...)
+save_name: "man_running"
+
+# Quality
+# Quality of training: ["low", "preferred", "best"]
+# low = Save the most memory, preferred = Most optimal, best = Memory intensive
+quality: "preferred"
+
+# Advanced users only. Only modify this if you're familiar with traning models.
+# You can call / modify this config directly if you know what you're doing.
+training_config: "configs/training/motion_director/training.yaml"
+
+# Do not change this. Refer to the above for advanced training.
+simple_mode: True
diff --git a/configs/training/motion_director/my_video.yaml b/configs/training/motion_director/my_video.yaml
@@ -1,29 +1,29 @@
 # Model From Huggingface Diffusers
-pretrained_model_path: "diffusers/stable-diffusion-v1-5"
+pretrained_model_path: "/home/rednax/SSD2TB/Github_repos/ComfyUI/models/checkpoints/juggernaut_reborn.safetensors"
 
 # Model in CKPT format. This is the CKPT that you download from CivitAI or use in A111 Comfy, etc.
 # In most cases, leave this blank.
 unet_checkpoint_path: ""
 
 # Must be CKPT from https://huggingface.co/guoyww/animatediff/tree/main
-motion_module_path: "v3_sd15_mm.ckpt"
+motion_module_path: "/home/rednax/SSD2TB/Github_repos/ComfyUI/models/animatediff_models/v3_sd15_mm.ckpt"
 
 # Must be CKPT from https://huggingface.co/guoyww/animatediff/tree/main
 # Optional for training, but highly recommended as a starting point. 
-domain_adapter_path: "" #"v3_sd15_adapter.ckpt"
+domain_adapter_path: "/home/rednax/SSD2TB/Github_repos/ComfyUI/models/loras/v3_sd15_adapter.ckpt"
 
 # ["single_video", "folder"]
 # single_video = path/my_video.mp4
-# folder = path/my_videos
+# folder: "/home/rednax/SSD2TB/stored_CLIP_results/AAA_init_imgs/00_video_data/sunlight_foliage"
 
 # You can have .txt file with prompt in the same folder. 
 # Eg. path/my_videos/1.mp4 path/my_videos/1.txt
 # Otherwise, every video will have the same training prompt.
-mode_type: "single_video"
+mode_type: "folder"
 
 video:
   # Your local video path
-  path: "examples/pexels-cottonbro-5319934 (2160p).mp4" # Or just path/to/folder_of_videos_with_.txt_files/ (set mode_type to "folder")
+  path: "/home/rednax/SSD2TB/stored_CLIP_results/AAA_init_imgs/00_video_data/sunlight_foliage" # Or just path/to/folder_of_videos_with_.txt_files/ (set mode_type to "folder")
 
   # Optional custom start frame (idx). Leave this at 0 if your video is already trimmed.
   # This is only recommended for single videos.
@@ -34,13 +34,13 @@ video:
   max_chunks: 1
 
   # Describe your action with a simple prompt.
-  training_prompt: "a man is running on a bridge"
+  training_prompt: "foliage blowing in the wind, rays of sunlight, backlit"
 
   # A custom prompt that will generate during to see your training progress.
-  validation_prompt: "a highly realistic video of batman running in a mystic forest, depth of field, epic lights, high quality, trending on artstation"
+  validation_prompt: "foliage blowing in the wind, rays of sunlight, backlit"
 
 # The name of the LoRA file (will save to ./results/{save_name}...)
-save_name: "man_running"
+save_name: "sunlight_01"
 
 # Quality
 # Quality of training: ["low", "preferred", "best"]

diff --git a/configs/training/motion_director/training.yaml b/configs/training/motion_director/training.yaml
@@ -48,7 +48,7 @@ train_data:
   sample_start_idx: 0
 
   # Used for 'folder'. The rate at which your frames are sampled.
-  fps: 0
+  fps: 8
 
   # For 'single_video' and 'json'. The number of frames to "step" (1,2,3,4) (frame_step=2) -> (1,3,5,7, ...).  
   frame_step: 3
@@ -79,7 +79,7 @@ validation_data:
 
 lora_name: ""
 use_motion_lora_format: True
-lora_rank: 32
+lora_rank: 24
 lora_unet_dropout: 0.1
 single_spatial_lora: True
 train_sample_validation: False